Index: projects/powernv/arm/arm/hdmi_if.m
===================================================================
--- projects/powernv/arm/arm/hdmi_if.m	(revision 290990)
+++ projects/powernv/arm/arm/hdmi_if.m	(revision 290991)
@@ -1,57 +1,59 @@
 #-
 # Copyright (c) 2015 Oleksandr Tymoshenko <gonzo@freebsd.org>
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 
 #include <sys/bus.h>
 #include <dev/videomode/videomode.h>
 #include <dev/videomode/edidvar.h>
 
 INTERFACE hdmi;
 
 HEADER {
 	#include <sys/eventhandler.h>
 
-	typedef void (*hdmi_event_hook)(void *, int);
+	typedef void (*hdmi_event_hook)(void *, device_t, int);
 	EVENTHANDLER_DECLARE(hdmi_event, hdmi_event_hook);
+
+	#define HDMI_EVENT_CONNECTED	0
 }
 
 #
 # Get EDID info
 #
 METHOD int get_edid {
 	device_t dev;
 	uint8_t **edid;
 	uint32_t *edid_length;
 };
 
 #
 # Set videomode
 #
 METHOD int set_videomode {
 	device_t dev;
 	const struct videomode *videomode;
 };
Index: projects/powernv/arm/arm/pl310.c
===================================================================
--- projects/powernv/arm/arm/pl310.c	(revision 290990)
+++ projects/powernv/arm/arm/pl310.c	(revision 290991)
@@ -1,549 +1,549 @@
 /*-
  * Copyright (c) 2012 Olivier Houchard <cognet@FreeBSD.org>
  * Copyright (c) 2011
  *	Ben Gray <ben.r.gray@gmail.com>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BEN GRAY ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BEN GRAY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/rman.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/intr.h>
 
 #include <machine/bus.h>
 #include <machine/pl310.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 /*
  * Define this if you need to disable PL310 for debugging purpose
  * Spec:
  * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0246e/DDI0246E_l2c310_r3p1_trm.pdf
  */
 
 /*
  * Hardcode errata for now
  * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0246b/pr01s02s02.html
  */
 #define	PL310_ERRATA_588369
 #define	PL310_ERRATA_753970
 #define	PL310_ERRATA_727915
 
 #define	PL310_LOCK(sc) do {		\
 	mtx_lock_spin(&(sc)->sc_mtx);	\
 } while(0);
 
 #define	PL310_UNLOCK(sc) do {		\
 	mtx_unlock_spin(&(sc)->sc_mtx);	\
 } while(0);
 
 static int pl310_enabled = 1;
 TUNABLE_INT("hw.pl310.enabled", &pl310_enabled);
 
 static uint32_t g_l2cache_way_mask;
 
 static const uint32_t g_l2cache_line_size = 32;
 static const uint32_t g_l2cache_align_mask = (32 - 1);
 
 static uint32_t g_l2cache_size;
 static uint32_t g_way_size;
 static uint32_t g_ways_assoc;
 
 static struct pl310_softc *pl310_softc;
 
 static struct ofw_compat_data compat_data[] = {
 	{"arm,pl310",		true}, /* Non-standard, FreeBSD. */
 	{"arm,pl310-cache",	true},
 	{NULL,			false}
 };
 
-void
+static void
 pl310_print_config(struct pl310_softc *sc)
 {
 	uint32_t aux, prefetch;
 	const char *dis = "disabled";
 	const char *ena = "enabled";
 
 	aux = pl310_read4(sc, PL310_AUX_CTRL);
 	prefetch = pl310_read4(sc, PL310_PREFETCH_CTRL);
 
 	device_printf(sc->sc_dev, "Early BRESP response: %s\n",
 		(aux & AUX_CTRL_EARLY_BRESP) ? ena : dis);
 	device_printf(sc->sc_dev, "Instruction prefetch: %s\n",
 		(aux & AUX_CTRL_INSTR_PREFETCH) ? ena : dis);
 	device_printf(sc->sc_dev, "Data prefetch: %s\n",
 		(aux & AUX_CTRL_DATA_PREFETCH) ? ena : dis);
 	device_printf(sc->sc_dev, "Non-secure interrupt control: %s\n",
 		(aux & AUX_CTRL_NS_INT_CTRL) ? ena : dis);
 	device_printf(sc->sc_dev, "Non-secure lockdown: %s\n",
 		(aux & AUX_CTRL_NS_LOCKDOWN) ? ena : dis);
 	device_printf(sc->sc_dev, "Share override: %s\n",
 		(aux & AUX_CTRL_SHARE_OVERRIDE) ? ena : dis);
 
 	device_printf(sc->sc_dev, "Double linefill: %s\n",
 		(prefetch & PREFETCH_CTRL_DL) ? ena : dis);
 	device_printf(sc->sc_dev, "Instruction prefetch: %s\n",
 		(prefetch & PREFETCH_CTRL_INSTR_PREFETCH) ? ena : dis);
 	device_printf(sc->sc_dev, "Data prefetch: %s\n",
 		(prefetch & PREFETCH_CTRL_DATA_PREFETCH) ? ena : dis);
 	device_printf(sc->sc_dev, "Double linefill on WRAP request: %s\n",
 		(prefetch & PREFETCH_CTRL_DL_ON_WRAP) ? ena : dis);
 	device_printf(sc->sc_dev, "Prefetch drop: %s\n",
 		(prefetch & PREFETCH_CTRL_PREFETCH_DROP) ? ena : dis);
 	device_printf(sc->sc_dev, "Incr double Linefill: %s\n",
 		(prefetch & PREFETCH_CTRL_INCR_DL) ? ena : dis);
 	device_printf(sc->sc_dev, "Not same ID on exclusive sequence: %s\n",
 		(prefetch & PREFETCH_CTRL_NOTSAMEID) ? ena : dis);
 	device_printf(sc->sc_dev, "Prefetch offset: %d\n",
 		(prefetch & PREFETCH_CTRL_OFFSET_MASK));
 }
 
 void
 pl310_set_ram_latency(struct pl310_softc *sc, uint32_t which_reg,
    uint32_t read, uint32_t write, uint32_t setup)
 {
 	uint32_t v;
 
 	KASSERT(which_reg == PL310_TAG_RAM_CTRL ||
 	    which_reg == PL310_DATA_RAM_CTRL,
 	    ("bad pl310 ram latency register address"));
 
 	v = pl310_read4(sc, which_reg);
 	if (setup != 0) {
 		KASSERT(setup <= 8, ("bad pl310 setup latency: %d", setup));
 		v &= ~RAM_CTRL_SETUP_MASK;
 		v |= (setup - 1) << RAM_CTRL_SETUP_SHIFT;
 	}
 	if (read != 0) {
 		KASSERT(read <= 8, ("bad pl310 read latency: %d", read));
 		v &= ~RAM_CTRL_READ_MASK;
 		v |= (read - 1) << RAM_CTRL_READ_SHIFT;
 	}
 	if (write != 0) {
 		KASSERT(write <= 8, ("bad pl310 write latency: %d", write));
 		v &= ~RAM_CTRL_WRITE_MASK;
 		v |= (write - 1) << RAM_CTRL_WRITE_SHIFT;
 	}
 	pl310_write4(sc, which_reg, v);
 }
 
 static int
 pl310_filter(void *arg)
 {
 	struct pl310_softc *sc = arg;
 	uint32_t intr;
 
 	intr = pl310_read4(sc, PL310_INTR_MASK);
 
 	if (!sc->sc_enabled && (intr & INTR_MASK_ECNTR)) {
 		/*
 		 * This is for debug purpose, so be blunt about it
 		 * We disable PL310 only when something fishy is going
 		 * on and we need to make sure L2 cache is 100% disabled
 		 */
 		panic("pl310: caches disabled but cache event detected\n");
 	}
 
 	return (FILTER_HANDLED);
 }
 
 static __inline void
 pl310_wait_background_op(uint32_t off, uint32_t mask)
 {
 
 	while (pl310_read4(pl310_softc, off) & mask)
 		continue;
 }
 
 
 /**
  *	pl310_cache_sync - performs a cache sync operation
  *
  *	According to the TRM:
  *
  *  "Before writing to any other register you must perform an explicit
  *   Cache Sync operation. This is particularly important when the cache is
  *   enabled and changes to how the cache allocates new lines are to be made."
  *
  *
  */
 static __inline void
 pl310_cache_sync(void)
 {
 
 	if ((pl310_softc == NULL) || !pl310_softc->sc_enabled)
 		return;
 
 #ifdef PL310_ERRATA_753970
 	if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r3p0)
 		/* Write uncached PL310 register */
 		pl310_write4(pl310_softc, 0x740, 0xffffffff);
 	else
 #endif
 		pl310_write4(pl310_softc, PL310_CACHE_SYNC, 0xffffffff);
 }
 
 
 static void
 pl310_wbinv_all(void)
 {
 
 	if ((pl310_softc == NULL) || !pl310_softc->sc_enabled)
 		return;
 
 	PL310_LOCK(pl310_softc);
 #ifdef PL310_ERRATA_727915
 	if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r2p0) {
 		int i, j;
 
 		for (i = 0; i < g_ways_assoc; i++) {
 			for (j = 0; j < g_way_size / g_l2cache_line_size; j++) {
 				pl310_write4(pl310_softc,
 				    PL310_CLEAN_INV_LINE_IDX,
 				    (i << 28 | j << 5));
 			}
 		}
 		pl310_cache_sync();
 		PL310_UNLOCK(pl310_softc);
 		return;
 
 	}
 	if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r3p0)
 		platform_pl310_write_debug(pl310_softc, 3);
 #endif
 	pl310_write4(pl310_softc, PL310_CLEAN_INV_WAY, g_l2cache_way_mask);
 	pl310_wait_background_op(PL310_CLEAN_INV_WAY, g_l2cache_way_mask);
 	pl310_cache_sync();
 #ifdef PL310_ERRATA_727915
 	if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r3p0)
 		platform_pl310_write_debug(pl310_softc, 0);
 #endif
 	PL310_UNLOCK(pl310_softc);
 }
 
 static void
 pl310_wbinv_range(vm_paddr_t start, vm_size_t size)
 {
 
 	if ((pl310_softc == NULL) || !pl310_softc->sc_enabled)
 		return;
 
 	PL310_LOCK(pl310_softc);
 	if (start & g_l2cache_align_mask) {
 		size += start & g_l2cache_align_mask;
 		start &= ~g_l2cache_align_mask;
 	}
 	if (size & g_l2cache_align_mask) {
 		size &= ~g_l2cache_align_mask;
 	   	size += g_l2cache_line_size;
 	}
 
 
 #ifdef PL310_ERRATA_727915
 	platform_pl310_write_debug(pl310_softc, 3);
 #endif
 	while (size > 0) {
 #ifdef PL310_ERRATA_588369
 		if (pl310_softc->sc_rtl_revision <= CACHE_ID_RELEASE_r1p0) {
 			/*
 			 * Errata 588369 says that clean + inv may keep the
 			 * cache line if it was clean, the recommanded
 			 * workaround is to clean then invalidate the cache
 			 * line, with write-back and cache linefill disabled.
 			 */
 			pl310_write4(pl310_softc, PL310_CLEAN_LINE_PA, start);
 			pl310_write4(pl310_softc, PL310_INV_LINE_PA, start);
 		} else
 #endif
 			pl310_write4(pl310_softc, PL310_CLEAN_INV_LINE_PA,
 			    start);
 		start += g_l2cache_line_size;
 		size -= g_l2cache_line_size;
 	}
 #ifdef PL310_ERRATA_727915
 	platform_pl310_write_debug(pl310_softc, 0);
 #endif
 
 	pl310_cache_sync();
 	PL310_UNLOCK(pl310_softc);
 }
 
 static void
 pl310_wb_range(vm_paddr_t start, vm_size_t size)
 {
 
 	if ((pl310_softc == NULL) || !pl310_softc->sc_enabled)
 		return;
 
 	PL310_LOCK(pl310_softc);
 	if (start & g_l2cache_align_mask) {
 		size += start & g_l2cache_align_mask;
 		start &= ~g_l2cache_align_mask;
 	}
 
 	if (size & g_l2cache_align_mask) {
 		size &= ~g_l2cache_align_mask;
 		size += g_l2cache_line_size;
 	}
 
 	while (size > 0) {
 		pl310_write4(pl310_softc, PL310_CLEAN_LINE_PA, start);
 		start += g_l2cache_line_size;
 		size -= g_l2cache_line_size;
 	}
 
 	pl310_cache_sync();
 	PL310_UNLOCK(pl310_softc);
 }
 
 static void
 pl310_inv_range(vm_paddr_t start, vm_size_t size)
 {
 
 	if ((pl310_softc == NULL) || !pl310_softc->sc_enabled)
 		return;
 
 	PL310_LOCK(pl310_softc);
 	if (start & g_l2cache_align_mask) {
 		size += start & g_l2cache_align_mask;
 		start &= ~g_l2cache_align_mask;
 	}
 	if (size & g_l2cache_align_mask) {
 		size &= ~g_l2cache_align_mask;
 		size += g_l2cache_line_size;
 	}
 	while (size > 0) {
 		pl310_write4(pl310_softc, PL310_INV_LINE_PA, start);
 		start += g_l2cache_line_size;
 		size -= g_l2cache_line_size;
 	}
 
 	pl310_cache_sync();
 	PL310_UNLOCK(pl310_softc);
 }
 
 static void
 pl310_drain_writebuf(void)
 {
 
 	if ((pl310_softc == NULL) || !pl310_softc->sc_enabled)
 		return;
 
 	PL310_LOCK(pl310_softc);
 	pl310_cache_sync();
 	PL310_UNLOCK(pl310_softc);
 }
 
 static void
 pl310_set_way_sizes(struct pl310_softc *sc)
 {
 	uint32_t aux_value;
 
 	aux_value = pl310_read4(sc, PL310_AUX_CTRL);
 	g_way_size = (aux_value & AUX_CTRL_WAY_SIZE_MASK) >>
 	    AUX_CTRL_WAY_SIZE_SHIFT;
 	g_way_size = 1 << (g_way_size + 13);
 	if (aux_value & (1 << AUX_CTRL_ASSOCIATIVITY_SHIFT))
 		g_ways_assoc = 16;
 	else
 		g_ways_assoc = 8;
 	g_l2cache_way_mask = (1 << g_ways_assoc) - 1;
 	g_l2cache_size = g_way_size * g_ways_assoc;
 }
 
 /*
  * Setup interrupt handling.  This is done only if the cache controller is
  * disabled, for debugging.  We set counters so when a cache event happens we'll
  * get interrupted and be warned that something is wrong, because no cache
  * events should happen if we're disabled.
  */
 static void
 pl310_config_intr(void *arg)
 {
 	struct pl310_softc * sc;
 
 	sc = arg;
 
 	/* activate the interrupt */
 	bus_setup_intr(sc->sc_dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
 	    pl310_filter, NULL, sc, &sc->sc_irq_h);
 
 	/* Cache Line Eviction for Counter 0 */
 	pl310_write4(sc, PL310_EVENT_COUNTER0_CONF,
 	    EVENT_COUNTER_CONF_INCR | EVENT_COUNTER_CONF_CO);
 	/* Data Read Request for Counter 1 */
 	pl310_write4(sc, PL310_EVENT_COUNTER1_CONF,
 	    EVENT_COUNTER_CONF_INCR | EVENT_COUNTER_CONF_DRREQ);
 
 	/* Enable and clear pending interrupts */
 	pl310_write4(sc, PL310_INTR_CLEAR, INTR_MASK_ECNTR);
 	pl310_write4(sc, PL310_INTR_MASK, INTR_MASK_ALL);
 
 	/* Enable counters and reset C0 and C1 */
 	pl310_write4(sc, PL310_EVENT_COUNTER_CTRL,
 	    EVENT_COUNTER_CTRL_ENABLED |
 	    EVENT_COUNTER_CTRL_C0_RESET |
 	    EVENT_COUNTER_CTRL_C1_RESET);
 
 	config_intrhook_disestablish(sc->sc_ich);
 	free(sc->sc_ich, M_DEVBUF);
 	sc->sc_ich = NULL;
 }
 
 static int
 pl310_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 	if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data)
 		return (ENXIO);
 	device_set_desc(dev, "PL310 L2 cache controller");
 	return (0);
 }
 
 static int
 pl310_attach(device_t dev)
 {
 	struct pl310_softc *sc = device_get_softc(dev);
 	int rid;
 	uint32_t cache_id, debug_ctrl;
 
 	sc->sc_dev = dev;
 	rid = 0;
 	sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->sc_mem_res == NULL)
 		panic("%s: Cannot map registers", device_get_name(dev));
 
 	/* Allocate an IRQ resource */
 	rid = 0;
 	sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	                                        RF_ACTIVE | RF_SHAREABLE);
 	if (sc->sc_irq_res == NULL) {
 		device_printf(dev, "cannot allocate IRQ, not using interrupt\n");
 	}
 
 	pl310_softc = sc;
 	mtx_init(&sc->sc_mtx, "pl310lock", NULL, MTX_SPIN);
 
 	cache_id = pl310_read4(sc, PL310_CACHE_ID);
 	sc->sc_rtl_revision = (cache_id >> CACHE_ID_RELEASE_SHIFT) &
 	    CACHE_ID_RELEASE_MASK;
 	device_printf(dev, "Part number: 0x%x, release: 0x%x\n",
 	    (cache_id >> CACHE_ID_PARTNUM_SHIFT) & CACHE_ID_PARTNUM_MASK,
 	    (cache_id >> CACHE_ID_RELEASE_SHIFT) & CACHE_ID_RELEASE_MASK);
 
 	/*
 	 * If L2 cache is already enabled then something has violated the rules,
 	 * because caches are supposed to be off at kernel entry.  The cache
 	 * must be disabled to write the configuration registers without
 	 * triggering an access error (SLVERR), but there's no documented safe
 	 * procedure for disabling the L2 cache in the manual.  So we'll try to
 	 * invent one:
 	 *  - Use the debug register to force write-through mode and prevent
 	 *    linefills (allocation of new lines on read); now anything we do
 	 *    will not cause new data to come into the L2 cache.
 	 *  - Writeback and invalidate the current contents.
 	 *  - Disable the controller.
 	 *  - Restore the original debug settings.
 	 */
 	if (pl310_read4(sc, PL310_CTRL) & CTRL_ENABLED) {
 		device_printf(dev, "Warning: L2 Cache should not already be "
 		    "active; trying to de-activate and re-initialize...\n");
 		sc->sc_enabled = 1;
 		debug_ctrl = pl310_read4(sc, PL310_DEBUG_CTRL);
 		platform_pl310_write_debug(sc, debug_ctrl |
 		    DEBUG_CTRL_DISABLE_WRITEBACK | DEBUG_CTRL_DISABLE_LINEFILL);
 		pl310_set_way_sizes(sc);
 		pl310_wbinv_all();
 		platform_pl310_write_ctrl(sc, CTRL_DISABLED);
 		platform_pl310_write_debug(sc, debug_ctrl);
 	}
 	sc->sc_enabled = pl310_enabled;
 
 	if (sc->sc_enabled) {
 		platform_pl310_init(sc);
 		pl310_set_way_sizes(sc); /* platform init might change these */
 		pl310_write4(pl310_softc, PL310_INV_WAY, 0xffff);
 		pl310_wait_background_op(PL310_INV_WAY, 0xffff);
 		platform_pl310_write_ctrl(sc, CTRL_ENABLED);
 		device_printf(dev, "L2 Cache enabled: %uKB/%dB %d ways\n",
 		    (g_l2cache_size / 1024), g_l2cache_line_size, g_ways_assoc);
 		if (bootverbose)
 			pl310_print_config(sc);
 	} else {
 		if (sc->sc_irq_res != NULL) {
 			sc->sc_ich = malloc(sizeof(*sc->sc_ich), M_DEVBUF, M_WAITOK);
 			sc->sc_ich->ich_func = pl310_config_intr;
 			sc->sc_ich->ich_arg = sc;
 			if (config_intrhook_establish(sc->sc_ich) != 0) {
 				device_printf(dev,
 				    "config_intrhook_establish failed\n");
 				free(sc->sc_ich, M_DEVBUF);
 				return(ENXIO);
 			}
 		}
 
 		device_printf(dev, "L2 Cache disabled\n");
 	}
 
 	/* Set the l2 functions in the set of cpufuncs */
 	cpufuncs.cf_l2cache_wbinv_all = pl310_wbinv_all;
 	cpufuncs.cf_l2cache_wbinv_range = pl310_wbinv_range;
 	cpufuncs.cf_l2cache_inv_range = pl310_inv_range;
 	cpufuncs.cf_l2cache_wb_range = pl310_wb_range;
 	cpufuncs.cf_l2cache_drain_writebuf = pl310_drain_writebuf;
 
 	return (0);
 }
 
 static device_method_t pl310_methods[] = {
 	DEVMETHOD(device_probe, pl310_probe),
 	DEVMETHOD(device_attach, pl310_attach),
 	DEVMETHOD_END
 };
 
 static driver_t pl310_driver = {
         "l2cache",
         pl310_methods,
         sizeof(struct pl310_softc),
 };
 static devclass_t pl310_devclass;
 
 EARLY_DRIVER_MODULE(pl310, simplebus, pl310_driver, pl310_devclass, 0, 0,
     BUS_PASS_CPU + BUS_PASS_ORDER_MIDDLE);
 
Index: projects/powernv/arm/freescale/imx/imx6_ccm.c
===================================================================
--- projects/powernv/arm/freescale/imx/imx6_ccm.c	(revision 290990)
+++ projects/powernv/arm/freescale/imx/imx6_ccm.c	(revision 290991)
@@ -1,356 +1,384 @@
 /*-
  * Copyright (c) 2013 Ian Lepore <ian@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Clocks and power control driver for Freescale i.MX6 family of SoCs.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <machine/bus.h>
 
 #include <arm/freescale/imx/imx6_anatopreg.h>
 #include <arm/freescale/imx/imx6_anatopvar.h>
 #include <arm/freescale/imx/imx6_ccmreg.h>
 #include <arm/freescale/imx/imx_machdep.h>
 #include <arm/freescale/imx/imx_ccmvar.h>
 
 #ifndef CCGR_CLK_MODE_ALWAYS
 #define	CCGR_CLK_MODE_OFF		0
 #define	CCGR_CLK_MODE_RUNMODE		1
 #define	CCGR_CLK_MODE_ALWAYS		3
 #endif
 
 struct ccm_softc {
 	device_t	dev;
 	struct resource	*mem_res;
 };
 
 static struct ccm_softc *ccm_sc;
 
 static inline uint32_t
 RD4(struct ccm_softc *sc, bus_size_t off)
 {
 
 	return (bus_read_4(sc->mem_res, off));
 }
 
 static inline void
 WR4(struct ccm_softc *sc, bus_size_t off, uint32_t val)
 {
 
 	bus_write_4(sc->mem_res, off, val);
 }
 
 /*
  * Until we have a fully functional ccm driver which implements the fdt_clock
  * interface, use the age-old workaround of unconditionally enabling the clocks
  * for devices we might need to use.  The SoC defaults to most clocks enabled,
  * but the rom boot code and u-boot disable a few of them.  We turn on only
  * what's needed to run the chip plus devices we have drivers for, and turn off
  * devices we don't yet have drivers for.  (Note that USB is not turned on here
  * because that is one we do when the driver asks for it.)
  */
 static void
 ccm_init_gates(struct ccm_softc *sc)
 {
-                                        /* Turns on... */
-	WR4(sc, CCM_CCGR0, 0x0000003f); /* ahpbdma, aipstz 1 & 2 busses */
-	WR4(sc, CCM_CCGR1, 0x00300c00); /* gpt, enet */
-	WR4(sc, CCM_CCGR2, 0x0fffffc0); /* ipmux & ipsync (bridges), iomux, i2c */
-	WR4(sc, CCM_CCGR3, 0x3ff00000); /* DDR memory controller */
-	WR4(sc, CCM_CCGR4, 0x0000f300); /* pl301 bus crossbar */
-	WR4(sc, CCM_CCGR5, 0x0ffc00c0); /* uarts, ssi, sdma */
-	WR4(sc, CCM_CCGR6, 0x000003ff); /* usdhc 1-4, usboh3 */
+	uint32_t reg;
+
+ 	/* ahpbdma, aipstz 1 & 2 busses */
+	reg = CCGR0_AIPS_TZ1 | CCGR0_AIPS_TZ2 | CCGR0_ABPHDMA;
+	WR4(sc, CCM_CCGR0, reg);
+
+	/* gpt, enet */
+	reg = CCGR1_ENET | CCGR1_GPT;
+	WR4(sc, CCM_CCGR1, reg);
+
+	/* ipmux & ipsync (bridges), iomux, i2c */
+	reg = CCGR2_I2C1 | CCGR2_I2C2 | CCGR2_I2C3 | CCGR2_IIM |
+	    CCGR2_IOMUX_IPT | CCGR2_IPMUX1 | CCGR2_IPMUX2 | CCGR2_IPMUX3 |
+	    CCGR2_IPSYNC_IP2APB_TZASC1 | CCGR2_IPSYNC_IP2APB_TZASC2 |
+	    CCGR2_IPSYNC_VDOA;
+	WR4(sc, CCM_CCGR2, reg);
+
+	/* DDR memory controller */
+	reg = CCGR3_OCRAM | CCGR3_MMDC_CORE_IPG |
+	    CCGR3_MMDC_CORE_ACLK_FAST | CCGR3_CG11 | CCGR3_CG13;
+	WR4(sc, CCM_CCGR3, reg);
+
+	/* pl301 bus crossbar */
+	reg = CCGR4_PL301_MX6QFAST1_S133 |
+	    CCGR4_PL301_MX6QPER1_BCH | CCGR4_PL301_MX6QPER2_MAIN;
+	WR4(sc, CCM_CCGR4, reg);
+
+	/* uarts, ssi, sdma */
+	reg = CCGR5_SDMA | CCGR5_SSI1 | CCGR5_SSI2 | CCGR5_SSI3 |
+	    CCGR5_UART | CCGR5_UART_SERIAL;
+	WR4(sc, CCM_CCGR5, reg);
+
+	/* usdhc 1-4, usboh3 */
+	reg = CCGR6_USBOH3 | CCGR6_USDHC1 | CCGR6_USDHC2 |
+	    CCGR6_USDHC3 | CCGR6_USDHC4;
+	WR4(sc, CCM_CCGR6, reg);
 }
 
 static int
 ccm_detach(device_t dev)
 {
 	struct ccm_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	if (sc->mem_res != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->mem_res);
 
 	return (0);
 }
 
 static int
 ccm_attach(device_t dev)
 {
 	struct ccm_softc *sc;
 	int err, rid;
 	uint32_t reg;
 
 	sc = device_get_softc(dev);
 	err = 0;
 
 	/* Allocate bus_space resources. */
 	rid = 0;
 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->mem_res == NULL) {
 		device_printf(dev, "Cannot allocate memory resources\n");
 		err = ENXIO;
 		goto out;
 	}
 
 	ccm_sc = sc;
 
 	/*
 	 * Configure the Low Power Mode setting to leave the ARM core power on
 	 * when a WFI instruction is executed.  This lets the MPCore timers and
 	 * GIC continue to run, which is helpful when the only thing that can
 	 * wake you up is an MPCore Private Timer interrupt delivered via GIC.
 	 *
 	 * XXX Based on the docs, setting CCM_CGPR_INT_MEM_CLK_LPM shouldn't be
 	 * required when the LPM bits are set to LPM_RUN.  But experimentally
 	 * I've experienced a fairly rare lockup when not setting it.  I was
 	 * unable to prove conclusively that the lockup was related to power
 	 * management or that this definitively fixes it.  Revisit this.
 	 */
 	reg = RD4(sc, CCM_CGPR);
 	reg |= CCM_CGPR_INT_MEM_CLK_LPM;
 	WR4(sc, CCM_CGPR, reg);
 	reg = RD4(sc, CCM_CLPCR);
 	reg = (reg & ~CCM_CLPCR_LPM_MASK) | CCM_CLPCR_LPM_RUN;
 	WR4(sc, CCM_CLPCR, reg);
 
 	ccm_init_gates(sc);
 
 	err = 0;
 
 out:
 
 	if (err != 0)
 		ccm_detach(dev);
 
 	return (err);
 }
 
 static int
 ccm_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
         if (ofw_bus_is_compatible(dev, "fsl,imx6q-ccm") == 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "Freescale i.MX6 Clock Control Module");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 void
 imx_ccm_ssi_configure(device_t _ssidev)
 {
 	struct ccm_softc *sc;
 	uint32_t reg;
 
 	sc = ccm_sc;
 
 	/*
 	 * Select PLL4 (Audio PLL) clock multiplexer as source.
 	 * PLL output frequency = Fref * (DIV_SELECT + NUM/DENOM).
 	 */
 
 	reg = RD4(sc, CCM_CSCMR1);
 	reg &= ~(SSI_CLK_SEL_M << SSI1_CLK_SEL_S);
 	reg |= (SSI_CLK_SEL_PLL4 << SSI1_CLK_SEL_S);
 	reg &= ~(SSI_CLK_SEL_M << SSI2_CLK_SEL_S);
 	reg |= (SSI_CLK_SEL_PLL4 << SSI2_CLK_SEL_S);
 	reg &= ~(SSI_CLK_SEL_M << SSI3_CLK_SEL_S);
 	reg |= (SSI_CLK_SEL_PLL4 << SSI3_CLK_SEL_S);
 	WR4(sc, CCM_CSCMR1, reg);
 
 	/*
 	 * Ensure we have set hardware-default values
 	 * for pre and post dividers.
 	 */
 
 	/* SSI1 and SSI3 */
 	reg = RD4(sc, CCM_CS1CDR);
 	/* Divide by 2 */
 	reg &= ~(SSI_CLK_PODF_MASK << SSI1_CLK_PODF_SHIFT);
 	reg &= ~(SSI_CLK_PODF_MASK << SSI3_CLK_PODF_SHIFT);
 	reg |= (0x1 << SSI1_CLK_PODF_SHIFT);
 	reg |= (0x1 << SSI3_CLK_PODF_SHIFT);
 	/* Divide by 4 */
 	reg &= ~(SSI_CLK_PRED_MASK << SSI1_CLK_PRED_SHIFT);
 	reg &= ~(SSI_CLK_PRED_MASK << SSI3_CLK_PRED_SHIFT);
 	reg |= (0x3 << SSI1_CLK_PRED_SHIFT);
 	reg |= (0x3 << SSI3_CLK_PRED_SHIFT);
 	WR4(sc, CCM_CS1CDR, reg);
 
 	/* SSI2 */
 	reg = RD4(sc, CCM_CS2CDR);
 	/* Divide by 2 */
 	reg &= ~(SSI_CLK_PODF_MASK << SSI2_CLK_PODF_SHIFT);
 	reg |= (0x1 << SSI2_CLK_PODF_SHIFT);
 	/* Divide by 4 */
 	reg &= ~(SSI_CLK_PRED_MASK << SSI2_CLK_PRED_SHIFT);
 	reg |= (0x3 << SSI2_CLK_PRED_SHIFT);
 	WR4(sc, CCM_CS2CDR, reg);
 }
 
 void
 imx_ccm_usb_enable(device_t _usbdev)
 {
 
 	/*
 	 * For imx6, the USBOH3 clock gate is bits 0-1 of CCGR6, so no need for
 	 * shifting and masking here, just set the low-order two bits to ALWAYS.
 	 */
 	WR4(ccm_sc, CCM_CCGR6, RD4(ccm_sc, CCM_CCGR6) | CCGR_CLK_MODE_ALWAYS);
 }
 
 void
 imx_ccm_usbphy_enable(device_t _phydev)
 {
         /*
          * XXX Which unit?
          * Right now it's not clear how to figure from fdt data which phy unit
          * we're supposed to operate on.  Until this is worked out, just enable
          * both PHYs.
          */
 #if 0
 	int phy_num, regoff;
 
 	phy_num = 0; /* XXX */
 
 	switch (phy_num) {
 	case 0:
 		regoff = 0;
 		break;
 	case 1:
 		regoff = 0x10;
 		break;
 	default:
 		device_printf(ccm_sc->dev, "Bad PHY number %u,\n", 
 		    phy_num);
 		return;
 	}
 
 	imx6_anatop_write_4(IMX6_ANALOG_CCM_PLL_USB1 + regoff, 
 	    IMX6_ANALOG_CCM_PLL_USB_ENABLE | 
 	    IMX6_ANALOG_CCM_PLL_USB_POWER |
 	    IMX6_ANALOG_CCM_PLL_USB_EN_USB_CLKS);
 #else
 	imx6_anatop_write_4(IMX6_ANALOG_CCM_PLL_USB1 + 0,
 	    IMX6_ANALOG_CCM_PLL_USB_ENABLE | 
 	    IMX6_ANALOG_CCM_PLL_USB_POWER |
 	    IMX6_ANALOG_CCM_PLL_USB_EN_USB_CLKS);
 
 	imx6_anatop_write_4(IMX6_ANALOG_CCM_PLL_USB1 + 0x10, 
 	    IMX6_ANALOG_CCM_PLL_USB_ENABLE | 
 	    IMX6_ANALOG_CCM_PLL_USB_POWER |
 	    IMX6_ANALOG_CCM_PLL_USB_EN_USB_CLKS);
 #endif
 }
 
 uint32_t
 imx_ccm_ipg_hz(void)
 {
 
 	return (66000000);
 }
 
 uint32_t
 imx_ccm_perclk_hz(void)
 {
 
 	return (66000000);
 }
 
 uint32_t
 imx_ccm_sdhci_hz(void)
 {
 
 	return (200000000);
 }
 
 uint32_t
 imx_ccm_uart_hz(void)
 {
 
 	return (80000000);
 }
 
 uint32_t
 imx_ccm_ahb_hz(void)
 {
 	return (132000000);
 }
 
 uint32_t
 imx_ccm_get_cacrr(void)
 {
 
 	return (RD4(ccm_sc, CCM_CACCR));
 }
 
 void
 imx_ccm_set_cacrr(uint32_t divisor)
 {
 
 	WR4(ccm_sc, CCM_CACCR, divisor);
 }
 
 static device_method_t ccm_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,  ccm_probe),
 	DEVMETHOD(device_attach, ccm_attach),
 	DEVMETHOD(device_detach, ccm_detach),
 
 	DEVMETHOD_END
 };
 
 static driver_t ccm_driver = {
 	"ccm",
 	ccm_methods,
 	sizeof(struct ccm_softc)
 };
 
 static devclass_t ccm_devclass;
 
 EARLY_DRIVER_MODULE(ccm, simplebus, ccm_driver, ccm_devclass, 0, 0, 
     BUS_PASS_CPU + BUS_PASS_ORDER_EARLY);
 
Index: projects/powernv/arm/freescale/imx/imx6_ccmreg.h
===================================================================
--- projects/powernv/arm/freescale/imx/imx6_ccmreg.h	(revision 290990)
+++ projects/powernv/arm/freescale/imx/imx6_ccmreg.h	(revision 290991)
@@ -1,69 +1,114 @@
 /*-
  * Copyright (c) 2013 Ian Lepore <ian@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	IMX6_CCMREG_H
 #define	IMX6_CCMREG_H
 
 #define	CCM_CACCR			0x010
 #define	CCM_CSCMR1			0x01C
 #define	  SSI1_CLK_SEL_S		  10
 #define	  SSI2_CLK_SEL_S		  12
 #define	  SSI3_CLK_SEL_S		  14
 #define	  SSI_CLK_SEL_M			  0x3
 #define	  SSI_CLK_SEL_508_PFD		  0
 #define	  SSI_CLK_SEL_454_PFD		  1
 #define	  SSI_CLK_SEL_PLL4		  2
 #define	CCM_CSCMR2			0x020
 #define	CCM_CS1CDR			0x028
 #define	  SSI1_CLK_PODF_SHIFT		  0
 #define	  SSI1_CLK_PRED_SHIFT		  6
 #define	  SSI3_CLK_PODF_SHIFT		  16
 #define	  SSI3_CLK_PRED_SHIFT		  22
 #define	  SSI_CLK_PODF_MASK		  0x3f
 #define	  SSI_CLK_PRED_MASK		  0x7
 #define	CCM_CS2CDR			0x02C
 #define	  SSI2_CLK_PODF_SHIFT		  0
 #define	  SSI2_CLK_PRED_SHIFT		  6
 #define	CCM_CSCDR2			0x038
 #define	CCM_CLPCR			0x054
 #define	  CCM_CLPCR_LPM_MASK		  0x03
 #define	  CCM_CLPCR_LPM_RUN		  0x00
 #define	  CCM_CLPCR_LPM_WAIT		  0x01
 #define	  CCM_CLPCR_LPM_STOP		  0x02
 #define	CCM_CGPR			0x064
 #define	  CCM_CGPR_INT_MEM_CLK_LPM	  (1 << 17)
 #define	CCM_CCGR0			0x068
+#define		CCGR0_AIPS_TZ1			(0x3 << 0)
+#define		CCGR0_AIPS_TZ2			(0x3 << 2)
+#define		CCGR0_ABPHDMA			(0x3 << 4)
 #define	CCM_CCGR1			0x06C
+#define		CCGR1_ENET			(0x3 << 10)
+#define		CCGR1_GPT			(0x3 << 20)
 #define	CCM_CCGR2			0x070
+#define		CCGR2_HDMI_TX			(0x3 << 0)
+#define		CCGR2_HDMI_TX_ISFR		(0x3 << 4)
+#define		CCGR2_I2C1			(0x3 << 6)
+#define		CCGR2_I2C2			(0x3 << 8)
+#define		CCGR2_I2C3			(0x3 << 10)
+#define		CCGR2_IIM			(0x3 << 12)
+#define		CCGR2_IOMUX_IPT			(0x3 << 14)
+#define		CCGR2_IPMUX1			(0x3 << 16)
+#define		CCGR2_IPMUX2			(0x3 << 18)
+#define		CCGR2_IPMUX3			(0x3 << 20)
+#define		CCGR2_IPSYNC_IP2APB_TZASC1	(0x3 << 22)
+#define		CCGR2_IPSYNC_IP2APB_TZASC2	(0x3 << 24)
+#define		CCGR2_IPSYNC_VDOA		(0x3 << 26)
 #define	CCM_CCGR3			0x074
+#define		CCGR3_IPU1_IPU			(0x3 << 0)
+#define		CCGR3_IPU1_DI0			(0x3 << 2)
+#define		CCGR3_IPU1_DI1			(0x3 << 4)
+#define		CCGR3_IPU2_IPU			(0x3 << 6)
+#define		CCGR3_IPU2_DI0			(0x3 << 8)
+#define		CCGR3_IPU2_DI1			(0x3 << 10)
+#define		CCGR3_LDB_DI0			(0x3 << 12)
+#define		CCGR3_LDB_DI1			(0x3 << 14)
+#define		CCGR3_MMDC_CORE_ACLK_FAST	(0x3 << 20)
+#define		CCGR3_CG11			(0x3 << 22)
+#define		CCGR3_MMDC_CORE_IPG		(0x3 << 24)
+#define		CCGR3_CG13			(0x3 << 26)
+#define		CCGR3_OCRAM			(0x3 << 28)
 #define	CCM_CCGR4			0x078
+#define		CCGR4_PL301_MX6QFAST1_S133	(0x3 << 8)
+#define		CCGR4_PL301_MX6QPER1_BCH	(0x3 << 12)
+#define		CCGR4_PL301_MX6QPER2_MAIN	(0x3 << 14)
 #define	CCM_CCGR5			0x07C
+#define		CCGR5_SDMA			(0x3 << 6)
+#define		CCGR5_SSI1			(0x3 << 18)
+#define		CCGR5_SSI2			(0x3 << 20)
+#define		CCGR5_SSI3			(0x3 << 22)
+#define		CCGR5_UART			(0x3 << 24)
+#define		CCGR5_UART_SERIAL		(0x3 << 26)
 #define	CCM_CCGR6			0x080
+#define		CCGR6_USBOH3			(0x3 << 0)
+#define		CCGR6_USDHC1			(0x3 << 2)
+#define		CCGR6_USDHC2			(0x3 << 4)
+#define		CCGR6_USDHC3			(0x3 << 6)
+#define		CCGR6_USDHC4			(0x3 << 8)
 #define	CCM_CMEOR			0x088
 
 #endif
Index: projects/powernv/arm/include/pcb.h
===================================================================
--- projects/powernv/arm/include/pcb.h	(revision 290990)
+++ projects/powernv/arm/include/pcb.h	(revision 290991)
@@ -1,85 +1,88 @@
 /*	$NetBSD: pcb.h,v 1.10 2003/10/13 21:46:39 scw Exp $	*/
 
 /*-
  * Copyright (c) 2001 Matt Thomas <matt@3am-software.com>.
  * Copyright (c) 1994 Mark Brinicombe.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the RiscBSD team.
  * 4. The name "RiscBSD" nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY RISCBSD ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL RISCBSD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PCB_H_
 #define	_MACHINE_PCB_H_
 
 #include <machine/fp.h>
 #include <machine/frame.h>
 
 
 /*
  * WARNING!
  * Keep pcb_regs first for faster access in switch.S
  */
 struct pcb {
 	struct switchframe pcb_regs;		/* CPU state */
 	u_int	pcb_flags;
 #define	PCB_OWNFPU	0x00000001
 #define PCB_NOALIGNFLT	0x00000002
 	caddr_t	pcb_onfault;			/* On fault handler */
-#ifdef  ARM_NEW_PMAP
-	uint32_t	pcb_pagedir;		/* TTB0 value */
-#else
-	vm_offset_t	pcb_pagedir;		/* PT hooks */
+	vm_offset_t	pcb_pagedir;		/* TTB0 value */
+	/*
+	 * XXX:
+	 * Variables pcb_pl1vec, pcb_l1vec, pcb_dacr are used solely
+	 * by old PMAP. Keep them here for PCB binary compatibility
+	 * between old and new PMAP.
+	 */
 	uint32_t *pcb_pl1vec;			/* PTR to vector_base L1 entry*/
 	uint32_t pcb_l1vec;			/* Value to stuff on ctx sw */
 	u_int	pcb_dacr;			/* Domain Access Control Reg */
-#endif
+
 	struct vfp_state pcb_vfpstate;          /* VP/NEON state */
 	u_int pcb_vfpcpu;                       /* VP/NEON last cpu */
 } __aligned(8); /*
 		 * We need the PCB to be aligned on 8 bytes, as we may
 		 * access it using ldrd/strd, and ARM ABI require it
 		 * to by aligned on 8 bytes.
 		 */
 
 /*
  * No additional data for core dumps.
  */
 struct md_coredump {
 	int	md_empty;
 };
 
 void	makectx(struct trapframe *tf, struct pcb *pcb);
 
 #ifdef _KERNEL
 
 void    savectx(struct pcb *) __returns_twice;
 #endif	/* _KERNEL */
 
 #endif	/* !_MACHINE_PCB_H_ */
Index: projects/powernv/arm/include/pl310.h
===================================================================
--- projects/powernv/arm/include/pl310.h	(revision 290990)
+++ projects/powernv/arm/include/pl310.h	(revision 290991)
@@ -1,188 +1,187 @@
 /*-
  * Copyright (c) 2012 Olivier Houchard.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 
 /*
  * $FreeBSD$
  */
 
 #ifndef PL310_H_
 #define PL310_H_
 
 /**
  *	PL310 - L2 Cache Controller register offsets.
  *
  */
 #define PL310_CACHE_ID			0x000
 #define 	CACHE_ID_RELEASE_SHIFT		0
 #define 	CACHE_ID_RELEASE_MASK		0x3f
 #define 	CACHE_ID_RELEASE_r0p0		0x00
 #define 	CACHE_ID_RELEASE_r1p0		0x02
 #define 	CACHE_ID_RELEASE_r2p0		0x04
 #define 	CACHE_ID_RELEASE_r3p0		0x05
 #define 	CACHE_ID_RELEASE_r3p1		0x06
 #define 	CACHE_ID_RELEASE_r3p2		0x08
 #define 	CACHE_ID_RELEASE_r3p3		0x09
 #define 	CACHE_ID_PARTNUM_SHIFT		6
 #define 	CACHE_ID_PARTNUM_MASK		0xf
 #define 	CACHE_ID_PARTNUM_VALUE		0x3
 #define PL310_CACHE_TYPE		0x004
 #define PL310_CTRL			0x100
 #define		CTRL_ENABLED			0x01
 #define		CTRL_DISABLED			0x00
 #define PL310_AUX_CTRL			0x104
 #define 	AUX_CTRL_MASK			0xc0000fff
 #define 	AUX_CTRL_ASSOCIATIVITY_SHIFT	16
 #define 	AUX_CTRL_WAY_SIZE_SHIFT		17
 #define 	AUX_CTRL_WAY_SIZE_MASK		(0x7 << 17)
 #define 	AUX_CTRL_SHARE_OVERRIDE		(1 << 22)
 #define 	AUX_CTRL_NS_LOCKDOWN		(1 << 26)
 #define 	AUX_CTRL_NS_INT_CTRL		(1 << 27)
 #define 	AUX_CTRL_DATA_PREFETCH		(1 << 28)
 #define 	AUX_CTRL_INSTR_PREFETCH		(1 << 29)
 #define 	AUX_CTRL_EARLY_BRESP		(1 << 30)
 #define PL310_TAG_RAM_CTRL			0x108
 #define PL310_DATA_RAM_CTRL			0x10C
 #define		RAM_CTRL_WRITE_SHIFT		8
 #define		RAM_CTRL_WRITE_MASK		(0x7 << 8)
 #define		RAM_CTRL_READ_SHIFT		4
 #define		RAM_CTRL_READ_MASK		(0x7 << 4)
 #define		RAM_CTRL_SETUP_SHIFT		0
 #define		RAM_CTRL_SETUP_MASK		(0x7 << 0)
 #define PL310_EVENT_COUNTER_CTRL	0x200
 #define		EVENT_COUNTER_CTRL_ENABLED	(1 << 0)
 #define		EVENT_COUNTER_CTRL_C0_RESET	(1 << 1)
 #define		EVENT_COUNTER_CTRL_C1_RESET	(1 << 2)
 #define PL310_EVENT_COUNTER1_CONF	0x204
 #define PL310_EVENT_COUNTER0_CONF	0x208
 #define		EVENT_COUNTER_CONF_NOINTR	0
 #define		EVENT_COUNTER_CONF_INCR		1
 #define		EVENT_COUNTER_CONF_OVFW		2
 #define		EVENT_COUNTER_CONF_NOEV		(0 << 2)
 #define		EVENT_COUNTER_CONF_CO		(1 << 2)
 #define		EVENT_COUNTER_CONF_DRHIT	(2 << 2)
 #define		EVENT_COUNTER_CONF_DRREQ	(3 << 2)
 #define		EVENT_COUNTER_CONF_DWHIT	(4 << 2)
 #define		EVENT_COUNTER_CONF_DWREQ	(5 << 2)
 #define		EVENT_COUNTER_CONF_DWTREQ	(6 << 2)
 #define		EVENT_COUNTER_CONF_DIRHIT	(7 << 2)
 #define		EVENT_COUNTER_CONF_DIRREQ	(8 << 2)
 #define		EVENT_COUNTER_CONF_WA		(9 << 2)
 #define PL310_EVENT_COUNTER1_VAL	0x20C
 #define PL310_EVENT_COUNTER0_VAL	0x210
 #define PL310_INTR_MASK			0x214
 #define PL310_MASKED_INTR_STAT		0x218
 #define PL310_RAW_INTR_STAT		0x21C
 #define PL310_INTR_CLEAR		0x220
 #define		INTR_MASK_ALL			((1 << 9) - 1)
 #define		INTR_MASK_ECNTR			(1 << 0)
 #define		INTR_MASK_PARRT			(1 << 1)
 #define		INTR_MASK_PARRD			(1 << 2)
 #define		INTR_MASK_ERRWT			(1 << 3)
 #define		INTR_MASK_ERRWD			(1 << 4)
 #define		INTR_MASK_ERRRT			(1 << 5)
 #define		INTR_MASK_ERRRD			(1 << 6)
 #define		INTR_MASK_SLVERR		(1 << 7)
 #define		INTR_MASK_DECERR		(1 << 8)
 #define PL310_CACHE_SYNC		0x730
 #define PL310_INV_LINE_PA		0x770
 #define PL310_INV_WAY			0x77C
 #define PL310_CLEAN_LINE_PA		0x7B0
 #define PL310_CLEAN_LINE_IDX		0x7B8
 #define PL310_CLEAN_WAY			0x7BC
 #define PL310_CLEAN_INV_LINE_PA		0x7F0
 #define PL310_CLEAN_INV_LINE_IDX	0x7F8
 #define PL310_CLEAN_INV_WAY		0x7FC
 #define PL310_LOCKDOWN_D_WAY(x)		(0x900 + ((x) * 8))
 #define PL310_LOCKDOWN_I_WAY(x)		(0x904 + ((x) * 8))
 #define PL310_LOCKDOWN_LINE_ENABLE	0x950
 #define PL310_UNLOCK_ALL_LINES_WAY	0x954
 #define PL310_ADDR_FILTER_STAR		0xC00
 #define PL310_ADDR_FILTER_END		0xC04
 #define PL310_DEBUG_CTRL		0xF40
 #define		DEBUG_CTRL_DISABLE_LINEFILL	(1 << 0)
 #define		DEBUG_CTRL_DISABLE_WRITEBACK	(1 << 1)
 #define		DEBUG_CTRL_SPNIDEN		(1 << 2)
 #define PL310_PREFETCH_CTRL		0xF60
 #define		PREFETCH_CTRL_OFFSET_MASK	(0x1f)
 #define		PREFETCH_CTRL_NOTSAMEID		(1 << 21)
 #define		PREFETCH_CTRL_INCR_DL		(1 << 23)
 #define		PREFETCH_CTRL_PREFETCH_DROP	(1 << 24)
 #define		PREFETCH_CTRL_DL_ON_WRAP	(1 << 27)
 #define		PREFETCH_CTRL_DATA_PREFETCH	(1 << 28)
 #define		PREFETCH_CTRL_INSTR_PREFETCH	(1 << 29)
 #define		PREFETCH_CTRL_DL		(1 << 30)
 #define PL310_POWER_CTRL		0xF80
 #define		POWER_CTRL_ENABLE_GATING	(1 << 0)
 #define		POWER_CTRL_ENABLE_STANDBY	(1 << 1)
 
 struct intr_config_hook;
 
 struct pl310_softc {
 	device_t	sc_dev;
 	struct resource *sc_mem_res;
 	struct resource *sc_irq_res;
 	void*		sc_irq_h;
 	int		sc_enabled;
 	struct mtx	sc_mtx;
 	u_int		sc_rtl_revision;
 	struct intr_config_hook *sc_ich;
 };
 
 /**
  *	pl310_read4 - read a 32-bit value from the PL310 registers
  *	pl310_write4 - write a 32-bit value from the PL310 registers
  *	@off: byte offset within the register set to read from
  *	@val: the value to write into the register
  *
  *
  *	LOCKING:
  *	None
  *
  *	RETURNS:
  *	nothing in case of write function, if read function returns the value read.
  */
 static __inline uint32_t
 pl310_read4(struct pl310_softc *sc, bus_size_t off)
 {
 
 	return bus_read_4(sc->sc_mem_res, off);
 }
 
 static __inline void
 pl310_write4(struct pl310_softc *sc, bus_size_t off, uint32_t val)
 {
 
 	bus_write_4(sc->sc_mem_res, off, val);
 }
 
-void pl310_print_config(struct pl310_softc *sc);
 void pl310_set_ram_latency(struct pl310_softc *sc, uint32_t which_reg,
     uint32_t read, uint32_t write, uint32_t setup);
 
 void platform_pl310_init(struct pl310_softc *);
 void platform_pl310_write_ctrl(struct pl310_softc *, uint32_t);
 void platform_pl310_write_debug(struct pl310_softc *, uint32_t);
 
 #endif /* PL310_H_ */
Index: projects/powernv/arm/samsung/exynos/exynos5_ehci.c
===================================================================
--- projects/powernv/arm/samsung/exynos/exynos5_ehci.c	(revision 290990)
+++ projects/powernv/arm/samsung/exynos/exynos5_ehci.c	(revision 290991)
@@ -1,394 +1,394 @@
 /*-
  * Copyright (c) 2013-2014 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bus.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/condvar.h>
 #include <sys/rman.h>
 #include <sys/gpio.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 #include <dev/usb/usb_busdma.h>
 #include <dev/usb/usb_process.h>
 #include <dev/usb/usb_controller.h>
 #include <dev/usb/usb_bus.h>
 #include <dev/usb/controller/ehci.h>
 #include <dev/usb/controller/ehcireg.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <arm/samsung/exynos/exynos5_common.h>
 #include <arm/samsung/exynos/exynos5_pmu.h>
 
 #include "gpio_if.h"
 
 #include "opt_platform.h"
 
 /* GPIO control */
 #define	GPIO_OUTPUT	1
 #define	GPIO_INPUT	0
 #define	PIN_USB		161
 
 /* SYSREG */
 #define	EXYNOS5_SYSREG_USB2_PHY	0x0
 #define	USB2_MODE_HOST		0x1
 
 /* USB HOST */
 #define	HOST_CTRL_CLK_24MHZ	(5 << 16)
 #define	HOST_CTRL_CLK_MASK	(7 << 16)
 #define	HOST_CTRL_SIDDQ		(1 << 6)
 #define	HOST_CTRL_SLEEP		(1 << 5)
 #define	HOST_CTRL_SUSPEND	(1 << 4)
 #define	HOST_CTRL_RESET_LINK	(1 << 1)
 #define	HOST_CTRL_RESET_PHY	(1 << 0)
 #define	HOST_CTRL_RESET_PHY_ALL	(1U << 31)
 
 /* Forward declarations */
 static int	exynos_ehci_attach(device_t dev);
 static int	exynos_ehci_detach(device_t dev);
 static int	exynos_ehci_probe(device_t dev);
 
 struct exynos_ehci_softc {
 	device_t		dev;
 	ehci_softc_t		base;
 	struct resource		*res[4];
 	bus_space_tag_t		host_bst;
 	bus_space_tag_t		sysreg_bst;
 	bus_space_handle_t	host_bsh;
 	bus_space_handle_t	sysreg_bsh;
 
 };
 
 static struct resource_spec exynos_ehci_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
 	{ SYS_RES_MEMORY,	1,	RF_ACTIVE },
 	{ SYS_RES_MEMORY,	2,	RF_ACTIVE },
 	{ SYS_RES_IRQ,		0,	RF_ACTIVE },
 	{ -1, 0 }
 };
 
 static device_method_t ehci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, exynos_ehci_probe),
 	DEVMETHOD(device_attach, exynos_ehci_attach),
 	DEVMETHOD(device_detach, exynos_ehci_detach),
 	DEVMETHOD(device_suspend, bus_generic_suspend),
 	DEVMETHOD(device_resume, bus_generic_resume),
 	DEVMETHOD(device_shutdown, bus_generic_shutdown),
 
 	/* Bus interface */
 	DEVMETHOD(bus_print_child, bus_generic_print_child),
 
 	{ 0, 0 }
 };
 
 /* kobj_class definition */
 static driver_t ehci_driver = {
 	"ehci",
 	ehci_methods,
-	sizeof(ehci_softc_t)
+	sizeof(struct exynos_ehci_softc)
 };
 
 static devclass_t ehci_devclass;
 
 DRIVER_MODULE(ehci, simplebus, ehci_driver, ehci_devclass, 0, 0);
 MODULE_DEPEND(ehci, usb, 1, 1, 1);
 
 /*
  * Public methods
  */
 static int
 exynos_ehci_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (ofw_bus_is_compatible(dev, "exynos,usb-ehci") == 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "Exynos integrated USB controller");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 gpio_ctrl(struct exynos_ehci_softc *esc, int dir, int power)
 {
 	device_t gpio_dev;
 
 	/* Get the GPIO device, we need this to give power to USB */
 	gpio_dev = devclass_get_device(devclass_find("gpio"), 0);
 	if (gpio_dev == NULL) {
 		device_printf(esc->dev, "cant find gpio_dev\n");
 		return (1);
 	}
 
 	if (power)
 		GPIO_PIN_SET(gpio_dev, PIN_USB, GPIO_PIN_HIGH);
 	else
 		GPIO_PIN_SET(gpio_dev, PIN_USB, GPIO_PIN_LOW);
 
 	if (dir)
 		GPIO_PIN_SETFLAGS(gpio_dev, PIN_USB, GPIO_PIN_OUTPUT);
 	else
 		GPIO_PIN_SETFLAGS(gpio_dev, PIN_USB, GPIO_PIN_INPUT);
 
 	return (0);
 }
 
 static int
 reset_hsic_hub(struct exynos_ehci_softc *esc, phandle_t hub)
 {
 	device_t gpio_dev;
 	pcell_t pin;
 
 	/* TODO: check that hub is compatible with "smsc,usb3503" */
 	if (!OF_hasprop(hub, "freebsd,reset-gpio")) {
 		return (1);
 	}
 
 	if (OF_getencprop(hub, "freebsd,reset-gpio", &pin, sizeof(pin)) < 0) {
 		device_printf(esc->dev,
 		    "failed to decode reset GPIO pin number for HSIC hub\n");
 		return (1);
 	}
 
 	/* Get the GPIO device, we need this to give power to USB */
 	gpio_dev = devclass_get_device(devclass_find("gpio"), 0);
 	if (gpio_dev == NULL) {
 		device_printf(esc->dev, "Cant find gpio device\n");
 		return (1);
 	}
 
 	GPIO_PIN_SET(gpio_dev, pin, GPIO_PIN_LOW);
 	DELAY(100);
 	GPIO_PIN_SET(gpio_dev, pin, GPIO_PIN_HIGH);
 
 	return (0);
 }
 
 static int
 phy_init(struct exynos_ehci_softc *esc)
 {
 	int reg;
 	phandle_t hub;
 
 	gpio_ctrl(esc, GPIO_INPUT, 1);
 
 	/* set USB HOST mode */
 	bus_space_write_4(esc->sysreg_bst, esc->sysreg_bsh,
 	    EXYNOS5_SYSREG_USB2_PHY, USB2_MODE_HOST);
 
 	/* Power ON phy */
 	usb2_phy_power_on();
 
 	reg = bus_space_read_4(esc->host_bst, esc->host_bsh, 0x0);
 	reg &= ~(HOST_CTRL_CLK_MASK |
 	    HOST_CTRL_RESET_PHY |
 	    HOST_CTRL_RESET_PHY_ALL |
 	    HOST_CTRL_SIDDQ |
 	    HOST_CTRL_SUSPEND |
 	    HOST_CTRL_SLEEP);
 
 	reg |= (HOST_CTRL_CLK_24MHZ |
 	    HOST_CTRL_RESET_LINK);
 	bus_space_write_4(esc->host_bst, esc->host_bsh, 0x0, reg);
 
 	DELAY(10);
 
 	reg = bus_space_read_4(esc->host_bst, esc->host_bsh, 0x0);
 	reg &= ~(HOST_CTRL_RESET_LINK);
 	bus_space_write_4(esc->host_bst, esc->host_bsh, 0x0, reg);
 
 	if ((hub = OF_finddevice("/hsichub")) != 0) {
 		reset_hsic_hub(esc, hub);
 	}
 
 	gpio_ctrl(esc, GPIO_OUTPUT, 1);
 
 	return (0);
 }
 
 static int
 exynos_ehci_attach(device_t dev)
 {
 	struct exynos_ehci_softc *esc;
 	ehci_softc_t *sc;
 	bus_space_handle_t bsh;
 	int err;
 
 	esc = device_get_softc(dev);
 	esc->dev = dev;
 	sc = &esc->base;
 	sc->sc_bus.parent = dev;
 	sc->sc_bus.devices = sc->sc_devices;
 	sc->sc_bus.devices_max = EHCI_MAX_DEVICES;
 	sc->sc_bus.dma_bits = 32;
 
 	if (bus_alloc_resources(dev, exynos_ehci_spec, esc->res)) {
 		device_printf(dev, "could not allocate resources\n");
 		return (ENXIO);
 	}
 
 	/* EHCI registers */
 	sc->sc_io_tag = rman_get_bustag(esc->res[0]);
 	bsh = rman_get_bushandle(esc->res[0]);
 	sc->sc_io_size = rman_get_size(esc->res[0]);
 
 	/* EHCI HOST ctrl registers */
 	esc->host_bst = rman_get_bustag(esc->res[1]);
 	esc->host_bsh = rman_get_bushandle(esc->res[1]);
 
 	/* SYSREG */
 	esc->sysreg_bst = rman_get_bustag(esc->res[2]);
 	esc->sysreg_bsh = rman_get_bushandle(esc->res[2]);
 
 	/* get all DMA memory */
 	if (usb_bus_mem_alloc_all(&sc->sc_bus, USB_GET_DMA_TAG(dev),
 		&ehci_iterate_hw_softc))
 		return (ENXIO);
 
 	/*
 	 * Set handle to USB related registers subregion used by
 	 * generic EHCI driver.
 	 */
 	err = bus_space_subregion(sc->sc_io_tag, bsh, 0x0,
 	    sc->sc_io_size, &sc->sc_io_hdl);
 	if (err != 0)
 		return (ENXIO);
 
 	phy_init(esc);
 
 	/* Setup interrupt handler */
 	err = bus_setup_intr(dev, esc->res[3], INTR_TYPE_BIO | INTR_MPSAFE,
 	    NULL, (driver_intr_t *)ehci_interrupt, sc,
 	    &sc->sc_intr_hdl);
 	if (err) {
 		device_printf(dev, "Could not setup irq, "
 		    "%d\n", err);
 		return (1);
 	}
 
 	/* Add USB device */
 	sc->sc_bus.bdev = device_add_child(dev, "usbus", -1);
 	if (!sc->sc_bus.bdev) {
 		device_printf(dev, "Could not add USB device\n");
 		err = bus_teardown_intr(dev, esc->res[3],
 		    sc->sc_intr_hdl);
 		if (err)
 			device_printf(dev, "Could not tear down irq,"
 			    " %d\n", err);
 		return (1);
 	}
 	device_set_ivars(sc->sc_bus.bdev, &sc->sc_bus);
 
 	strlcpy(sc->sc_vendor, "Samsung", sizeof(sc->sc_vendor));
 
 	err = ehci_init(sc);
 	if (!err) {
 		sc->sc_flags |= EHCI_SCFLG_DONEINIT;
 		err = device_probe_and_attach(sc->sc_bus.bdev);
 	} else {
 		device_printf(dev, "USB init failed err=%d\n", err);
 
 		device_delete_child(dev, sc->sc_bus.bdev);
 		sc->sc_bus.bdev = NULL;
 
 		err = bus_teardown_intr(dev, esc->res[3],
 		    sc->sc_intr_hdl);
 		if (err)
 			device_printf(dev, "Could not tear down irq,"
 			    " %d\n", err);
 		return (1);
 	}
 	return (0);
 }
 
 static int
 exynos_ehci_detach(device_t dev)
 {
 	struct exynos_ehci_softc *esc;
 	ehci_softc_t *sc;
 	int err;
 
 	esc = device_get_softc(dev);
 	sc = &esc->base;
 
 	if (sc->sc_flags & EHCI_SCFLG_DONEINIT)
 		return (0);
 
 	/*
 	 * only call ehci_detach() after ehci_init()
 	 */
 	if (sc->sc_flags & EHCI_SCFLG_DONEINIT) {
 		ehci_detach(sc);
 		sc->sc_flags &= ~EHCI_SCFLG_DONEINIT;
 	}
 
 	/*
 	 * Disable interrupts that might have been switched on in
 	 * ehci_init.
 	 */
 	if (sc->sc_io_tag && sc->sc_io_hdl)
 		bus_space_write_4(sc->sc_io_tag, sc->sc_io_hdl,
 		    EHCI_USBINTR, 0);
 
 	if (esc->res[3] && sc->sc_intr_hdl) {
 		err = bus_teardown_intr(dev, esc->res[3],
 		    sc->sc_intr_hdl);
 		if (err) {
 			device_printf(dev, "Could not tear down irq,"
 			    " %d\n", err);
 			return (err);
 		}
 		sc->sc_intr_hdl = NULL;
 	}
 
 	if (sc->sc_bus.bdev) {
 		device_delete_child(dev, sc->sc_bus.bdev);
 		sc->sc_bus.bdev = NULL;
 	}
 
 	/* During module unload there are lots of children leftover */
 	device_delete_children(dev);
 
 	bus_release_resources(dev, exynos_ehci_spec, esc->res);
 
 	return (0);
 }
Index: projects/powernv/arm/ti/am335x/am335x_lcd.c
===================================================================
--- projects/powernv/arm/ti/am335x/am335x_lcd.c	(revision 290990)
+++ projects/powernv/arm/ti/am335x/am335x_lcd.c	(revision 290991)
@@ -1,1047 +1,1047 @@
 /*-
  * Copyright 2013 Oleksandr Tymoshenko <gonzo@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_syscons.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/clock.h>
 #include <sys/time.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <sys/fbio.h>
 #include <sys/consio.h>
 
 #include <machine/bus.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/videomode/videomode.h>
 #include <dev/videomode/edidvar.h>
 
 #include <dev/fb/fbreg.h>
 #ifdef DEV_SC
 #include <dev/syscons/syscons.h>
 #else /* VT */
 #include <dev/vt/vt.h>
 #endif
 
 #include <arm/ti/ti_prcm.h>
 #include <arm/ti/ti_scm.h>
 
 #include "am335x_lcd.h"
 #include "am335x_pwm.h"
 
 #include "fb_if.h"
 #include "hdmi_if.h"
 
 #define	LCD_PID			0x00
 #define	LCD_CTRL		0x04
 #define		CTRL_DIV_MASK		0xff
 #define		CTRL_DIV_SHIFT		8
 #define		CTRL_AUTO_UFLOW_RESTART	(1 << 1)
 #define		CTRL_RASTER_MODE	1
 #define		CTRL_LIDD_MODE		0
 #define	LCD_LIDD_CTRL		0x0C
 #define	LCD_LIDD_CS0_CONF	0x10
 #define	LCD_LIDD_CS0_ADDR	0x14
 #define	LCD_LIDD_CS0_DATA	0x18
 #define	LCD_LIDD_CS1_CONF	0x1C
 #define	LCD_LIDD_CS1_ADDR	0x20
 #define	LCD_LIDD_CS1_DATA	0x24
 #define	LCD_RASTER_CTRL		0x28
 #define		RASTER_CTRL_TFT24_UNPACKED	(1 << 26)
 #define		RASTER_CTRL_TFT24		(1 << 25)
 #define		RASTER_CTRL_STN565		(1 << 24)
 #define		RASTER_CTRL_TFTPMAP		(1 << 23)
 #define		RASTER_CTRL_NIBMODE		(1 << 22)
 #define		RASTER_CTRL_PALMODE_SHIFT	20
 #define		PALETTE_PALETTE_AND_DATA	0x00
 #define		PALETTE_PALETTE_ONLY		0x01
 #define		PALETTE_DATA_ONLY		0x02
 #define		RASTER_CTRL_REQDLY_SHIFT	12
 #define		RASTER_CTRL_MONO8B		(1 << 9)
 #define		RASTER_CTRL_RBORDER		(1 << 8)
 #define		RASTER_CTRL_LCDTFT		(1 << 7)
 #define		RASTER_CTRL_LCDBW		(1 << 1)
 #define		RASTER_CTRL_LCDEN		(1 << 0)
 #define	LCD_RASTER_TIMING_0	0x2C
 #define		RASTER_TIMING_0_HBP_SHIFT	24
 #define		RASTER_TIMING_0_HFP_SHIFT	16
 #define		RASTER_TIMING_0_HSW_SHIFT	10
 #define		RASTER_TIMING_0_PPLLSB_SHIFT	4
 #define		RASTER_TIMING_0_PPLMSB_SHIFT	3
 #define	LCD_RASTER_TIMING_1	0x30
 #define		RASTER_TIMING_1_VBP_SHIFT	24
 #define		RASTER_TIMING_1_VFP_SHIFT	16
 #define		RASTER_TIMING_1_VSW_SHIFT	10
 #define		RASTER_TIMING_1_LPP_SHIFT	0
 #define	LCD_RASTER_TIMING_2	0x34
 #define		RASTER_TIMING_2_HSWHI_SHIFT	27
 #define		RASTER_TIMING_2_LPP_B10_SHIFT	26
 #define		RASTER_TIMING_2_PHSVS		(1 << 25)
 #define		RASTER_TIMING_2_PHSVS_RISE	(1 << 24)
 #define		RASTER_TIMING_2_PHSVS_FALL	(0 << 24)
 #define		RASTER_TIMING_2_IOE		(1 << 23)
 #define		RASTER_TIMING_2_IPC		(1 << 22)
 #define		RASTER_TIMING_2_IHS		(1 << 21)
 #define		RASTER_TIMING_2_IVS		(1 << 20)
 #define		RASTER_TIMING_2_ACBI_SHIFT	16
 #define		RASTER_TIMING_2_ACB_SHIFT	8
 #define		RASTER_TIMING_2_HBPHI_SHIFT	4
 #define		RASTER_TIMING_2_HFPHI_SHIFT	0
 #define	LCD_RASTER_SUBPANEL	0x38
 #define	LCD_RASTER_SUBPANEL2	0x3C
 #define	LCD_LCDDMA_CTRL		0x40
 #define		LCDDMA_CTRL_DMA_MASTER_PRIO_SHIFT		16
 #define		LCDDMA_CTRL_TH_FIFO_RDY_SHIFT	8
 #define		LCDDMA_CTRL_BURST_SIZE_SHIFT	4
 #define		LCDDMA_CTRL_BYTES_SWAP		(1 << 3)
 #define		LCDDMA_CTRL_BE			(1 << 1)
 #define		LCDDMA_CTRL_FB0_ONLY		0
 #define		LCDDMA_CTRL_FB0_FB1		(1 << 0)
 #define	LCD_LCDDMA_FB0_BASE	0x44
 #define	LCD_LCDDMA_FB0_CEILING	0x48
 #define	LCD_LCDDMA_FB1_BASE	0x4C
 #define	LCD_LCDDMA_FB1_CEILING	0x50
 #define	LCD_SYSCONFIG		0x54
 #define		SYSCONFIG_STANDBY_FORCE		(0 << 4)
 #define		SYSCONFIG_STANDBY_NONE		(1 << 4)
 #define		SYSCONFIG_STANDBY_SMART		(2 << 4)
 #define		SYSCONFIG_IDLE_FORCE		(0 << 2)
 #define		SYSCONFIG_IDLE_NONE		(1 << 2)
 #define		SYSCONFIG_IDLE_SMART		(2 << 2)
 #define	LCD_IRQSTATUS_RAW	0x58
 #define	LCD_IRQSTATUS		0x5C
 #define	LCD_IRQENABLE_SET	0x60
 #define	LCD_IRQENABLE_CLEAR	0x64
 #define		IRQ_EOF1		(1 << 9)
 #define		IRQ_EOF0		(1 << 8)
 #define		IRQ_PL			(1 << 6)
 #define		IRQ_FUF			(1 << 5)
 #define		IRQ_ACB			(1 << 3)
 #define		IRQ_SYNC_LOST		(1 << 2)
 #define		IRQ_RASTER_DONE		(1 << 1)
 #define		IRQ_FRAME_DONE		(1 << 0)
 #define	LCD_END_OF_INT_IND	0x68
 #define	LCD_CLKC_ENABLE		0x6C
 #define		CLKC_ENABLE_DMA		(1 << 2)
 #define		CLKC_ENABLE_LDID	(1 << 1)
 #define		CLKC_ENABLE_CORE	(1 << 0)
 #define	LCD_CLKC_RESET		0x70
 #define		CLKC_RESET_MAIN		(1 << 3)
 #define		CLKC_RESET_DMA		(1 << 2)
 #define		CLKC_RESET_LDID		(1 << 1)
 #define		CLKC_RESET_CORE		(1 << 0)
 
 #define	LCD_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define	LCD_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
 #define	LCD_LOCK_INIT(_sc)	mtx_init(&(_sc)->sc_mtx, \
     device_get_nameunit(_sc->sc_dev), "am335x_lcd", MTX_DEF)
 #define	LCD_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->sc_mtx);
 
 #define	LCD_READ4(_sc, reg)	bus_read_4((_sc)->sc_mem_res, reg);
 #define	LCD_WRITE4(_sc, reg, value)	\
     bus_write_4((_sc)->sc_mem_res, reg, value);
 
 /* Backlight is controlled by eCAS interface on PWM unit 0 */
 #define	PWM_UNIT	0
 #define	PWM_PERIOD	100
 
 #define	MODE_HBP(mode)	((mode)->htotal - (mode)->hsync_end)
 #define	MODE_HFP(mode)	((mode)->hsync_start - (mode)->hdisplay)
 #define	MODE_HSW(mode)	((mode)->hsync_end - (mode)->hsync_start)
 #define	MODE_VBP(mode)	((mode)->vtotal - (mode)->vsync_end)
 #define	MODE_VFP(mode)	((mode)->vsync_start - (mode)->vdisplay)
 #define	MODE_VSW(mode)	((mode)->vsync_end - (mode)->vsync_start)
 
 #define	MAX_PIXEL_CLOCK	126000
 #define	MAX_BANDWIDTH	(1280*1024*60)
 
 struct am335x_lcd_softc {
 	device_t		sc_dev;
 	struct fb_info		sc_fb_info;
 	struct resource		*sc_mem_res;
 	struct resource		*sc_irq_res;
 	void			*sc_intr_hl;
 	struct mtx		sc_mtx;
 	int			sc_backlight;
 	struct sysctl_oid	*sc_oid;
 
 	struct panel_info	sc_panel;
 
 	/* Framebuffer */
 	bus_dma_tag_t		sc_dma_tag;
 	bus_dmamap_t		sc_dma_map;
 	size_t			sc_fb_size;
 	bus_addr_t		sc_fb_phys;
 	uint8_t			*sc_fb_base;
 
 	/* HDMI framer */
 	phandle_t		sc_hdmi_framer;
 	eventhandler_tag	sc_hdmi_evh;
 };
 
 static void
 am335x_fb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
 {
 	bus_addr_t *addr;
 
 	if (err)
 		return;
 
 	addr = (bus_addr_t*)arg;
 	*addr = segs[0].ds_addr;
 }
 
 static uint32_t
 am335x_lcd_calc_divisor(uint32_t reference, uint32_t freq)
 {
 	uint32_t div, i;
 	uint32_t delta, min_delta;
 
 	min_delta = freq;
 	div = 255;
 
 	/* Raster mode case: divisors are in range from 2 to 255 */
 	for (i = 2; i < 255; i++) {
 		delta = abs(reference/i - freq);
 		if (delta < min_delta) {
 			div = i;
 			min_delta = delta;
 		}
 	}
 
 	return (div);
 }
 
 static int
 am335x_lcd_sysctl_backlight(SYSCTL_HANDLER_ARGS)
 {
 	struct am335x_lcd_softc *sc = (struct am335x_lcd_softc*)arg1;
 	int error;
 	int backlight;
 
 	backlight = sc->sc_backlight;
 	error = sysctl_handle_int(oidp, &backlight, 0, req);
 
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (backlight < 0)
 		backlight = 0;
 	if (backlight > 100)
 		backlight = 100;
 
 	LCD_LOCK(sc);
 	error = am335x_pwm_config_ecap(PWM_UNIT, PWM_PERIOD,
 	    backlight*PWM_PERIOD/100);
 	if (error == 0)
 		sc->sc_backlight = backlight;
 	LCD_UNLOCK(sc);
 
 	return (error);
 }
 
 static uint32_t
 am335x_mode_vrefresh(const struct videomode *mode)
 {
 	uint32_t refresh;
 
 	/* Calculate vertical refresh rate */
         refresh = (mode->dot_clock * 1000 / mode->htotal);
 	refresh = (refresh + mode->vtotal / 2) / mode->vtotal;
 
 	if (mode->flags & VID_INTERLACE)
 		refresh *= 2;
 	if (mode->flags & VID_DBLSCAN)
 		refresh /= 2;
 
 	return refresh;
 }
 
 static int
 am335x_mode_is_valid(const struct videomode *mode)
 {
 	uint32_t hbp, hfp, hsw;
 	uint32_t vbp, vfp, vsw;
 
 	if (mode->dot_clock > MAX_PIXEL_CLOCK)
 		return (0);
 
 	if (mode->hdisplay & 0xf)
 		return (0);
 
 	if (mode->vdisplay > 2048)
 		return (0);
 
 	/* Check ranges for timing parameters */
 	hbp = MODE_HBP(mode) - 1;
 	hfp = MODE_HFP(mode) - 1;
 	hsw = MODE_HSW(mode) - 1;
 	vbp = MODE_VBP(mode);
 	vfp = MODE_VFP(mode);
 	vsw = MODE_VSW(mode) - 1;
 
 	if (hbp > 0x3ff)
 		return (0);
 	if (hfp > 0x3ff)
 		return (0);
 	if (hsw > 0x3ff)
 		return (0);
 
 	if (vbp > 0xff)
 		return (0);
 	if (vfp > 0xff)
 		return (0);
 	if (vsw > 0x3f)
 		return (0);
 	if (mode->vdisplay*mode->hdisplay*am335x_mode_vrefresh(mode) 
 	    > MAX_BANDWIDTH)
 		return (0);
 
 	return (1);
 }
 
 static void
 am335x_read_hdmi_property(device_t dev)
 {
 	phandle_t node;
 	phandle_t hdmi_xref;
 	struct am335x_lcd_softc *sc;
 
 	sc = device_get_softc(dev);
 	node = ofw_bus_get_node(dev);
 	if (OF_getencprop(node, "hdmi", &hdmi_xref, sizeof(hdmi_xref)) == -1)
 		sc->sc_hdmi_framer = 0;
 	else
 		sc->sc_hdmi_framer = hdmi_xref; 
 }
 
 static int
 am335x_read_property(device_t dev, phandle_t node, const char *name, uint32_t *val)
 {
 	pcell_t cell;
 
 	if ((OF_getprop(node, name, &cell, sizeof(cell))) <= 0) {
 		device_printf(dev, "missing '%s' attribute in LCD panel info\n",
 		    name);
 		return (ENXIO);
 	}
 
 	*val = fdt32_to_cpu(cell);
 
 	return (0);
 }
 
 static int
 am335x_read_timing(device_t dev, phandle_t node, struct panel_info *panel)
 {
 	int error;
 	phandle_t timings_node, timing_node, native;
 
 	timings_node = ofw_bus_find_child(node, "display-timings");
 	if (timings_node == 0) {
 		device_printf(dev, "no \"display-timings\" node\n");
 		return (-1);
 	}
 
 	if (OF_searchencprop(timings_node, "native-mode", &native,
 	    sizeof(native)) == -1) {
 		device_printf(dev, "no \"native-mode\" reference in \"timings\" node\n");
 		return (-1);
 	}
 
 	timing_node = OF_node_from_xref(native);
 
 	error = 0;
 	if ((error = am335x_read_property(dev, timing_node,
 	    "hactive", &panel->panel_width)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "vactive", &panel->panel_height)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "hfront-porch", &panel->panel_hfp)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "hback-porch", &panel->panel_hbp)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "hsync-len", &panel->panel_hsw)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "vfront-porch", &panel->panel_vfp)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "vback-porch", &panel->panel_vbp)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "vsync-len", &panel->panel_vsw)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "clock-frequency", &panel->panel_pxl_clk)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "pixelclk-active", &panel->pixelclk_active)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "hsync-active", &panel->hsync_active)))
 		goto out;
 
 	if ((error = am335x_read_property(dev, timing_node,
 	    "vsync-active", &panel->vsync_active)))
 		goto out;
 
 out:
 	return (error);
 }
 
 static int
 am335x_read_panel_info(device_t dev, phandle_t node, struct panel_info *panel)
 {
 	phandle_t panel_info_node;
 
 	panel_info_node = ofw_bus_find_child(node, "panel-info");
 	if (panel_info_node == 0)
 		return (-1);
 
 	am335x_read_property(dev, panel_info_node,
 	    "ac-bias", &panel->ac_bias);
 
 	am335x_read_property(dev, panel_info_node,
 	    "ac-bias-intrpt", &panel->ac_bias_intrpt);
 
 	am335x_read_property(dev, panel_info_node,
 	    "dma-burst-sz", &panel->dma_burst_sz);
 
 	am335x_read_property(dev, panel_info_node,
 	    "bpp", &panel->bpp);
 
 	am335x_read_property(dev, panel_info_node,
 	    "fdd", &panel->fdd);
 
 	am335x_read_property(dev, panel_info_node,
 	    "sync-edge", &panel->sync_edge);
 
 	am335x_read_property(dev, panel_info_node,
 	    "sync-ctrl", &panel->sync_ctrl);
 
 	return (0);
 }
 
 static void
 am335x_lcd_intr(void *arg)
 {
 	struct am335x_lcd_softc *sc = arg;
 	uint32_t reg; 
 
 	reg = LCD_READ4(sc, LCD_IRQSTATUS);
 	LCD_WRITE4(sc, LCD_IRQSTATUS, reg);
 	/* Read value back to make sure it reached the hardware */
 	reg = LCD_READ4(sc, LCD_IRQSTATUS);
 
 	if (reg & IRQ_SYNC_LOST) {
 		reg = LCD_READ4(sc, LCD_RASTER_CTRL);
 		reg &= ~RASTER_CTRL_LCDEN;
 		LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); 
 
 		reg = LCD_READ4(sc, LCD_RASTER_CTRL);
 		reg |= RASTER_CTRL_LCDEN;
 		LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); 
 		goto done;
 	}
 
 	if (reg & IRQ_PL) {
 		reg = LCD_READ4(sc, LCD_RASTER_CTRL);
 		reg &= ~RASTER_CTRL_LCDEN;
 		LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); 
 
 		reg = LCD_READ4(sc, LCD_RASTER_CTRL);
 		reg |= RASTER_CTRL_LCDEN;
 		LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); 
 		goto done;
 	}
 
 	if (reg & IRQ_EOF0) {
 		LCD_WRITE4(sc, LCD_LCDDMA_FB0_BASE, sc->sc_fb_phys); 
 		LCD_WRITE4(sc, LCD_LCDDMA_FB0_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); 
 		reg &= ~IRQ_EOF0;
 	}
 
 	if (reg & IRQ_EOF1) {
 		LCD_WRITE4(sc, LCD_LCDDMA_FB1_BASE, sc->sc_fb_phys); 
 		LCD_WRITE4(sc, LCD_LCDDMA_FB1_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); 
 		reg &= ~IRQ_EOF1;
 	}
 
 	if (reg & IRQ_FUF) {
 		/* TODO: Handle FUF */
 	}
 
 	if (reg & IRQ_ACB) {
 		/* TODO: Handle ACB */
 	}
 
 done:
 	LCD_WRITE4(sc, LCD_END_OF_INT_IND, 0);
 	/* Read value back to make sure it reached the hardware */
 	reg = LCD_READ4(sc, LCD_END_OF_INT_IND);
 }
 
 static const struct videomode *
 am335x_lcd_pick_mode(struct edid_info *ei)
 {
 	const struct videomode *videomode;
 	const struct videomode *m;
 	int n;
 
 	/* Get standard VGA as default */
 	videomode = NULL;
 
 	/*
 	 * Pick a mode.
 	 */
 	if (ei->edid_preferred_mode != NULL) {
 		if (am335x_mode_is_valid(ei->edid_preferred_mode))
 			videomode = ei->edid_preferred_mode;
 	}
 
 	if (videomode == NULL) {
 		m = ei->edid_modes;
 
 		sort_modes(ei->edid_modes,
 		    &ei->edid_preferred_mode,
 		    ei->edid_nmodes);
 		for (n = 0; n < ei->edid_nmodes; n++)
 			if (am335x_mode_is_valid(&m[n])) {
 				videomode = &m[n];
 				break;
 			}
 	}
 
 	return videomode;
 }
 
 static int
 am335x_lcd_configure(struct am335x_lcd_softc *sc)
 {
 	int div;
 	uint32_t reg, timing0, timing1, timing2;
 	uint32_t burst_log;
 	size_t dma_size;
 	uint32_t hbp, hfp, hsw;
 	uint32_t vbp, vfp, vsw;
 	uint32_t width, height;
 	unsigned int ref_freq;
 	int err;
 
 	/*
 	 * try to adjust clock to get double of requested frequency
 	 * HDMI/DVI displays are very sensitive to error in frequncy value
 	 */
 	if (ti_prcm_clk_set_source_freq(LCDC_CLK, sc->sc_panel.panel_pxl_clk*2)) {
 		device_printf(sc->sc_dev, "can't set source frequency\n");
 		return (ENXIO);
 	}
 
 	if (ti_prcm_clk_get_source_freq(LCDC_CLK, &ref_freq)) {
 		device_printf(sc->sc_dev, "can't get reference frequency\n");
 		return (ENXIO);
 	}
 
 	/* Panle initialization */
 	dma_size = round_page(sc->sc_panel.panel_width*sc->sc_panel.panel_height*sc->sc_panel.bpp/8);
 
 	/*
 	 * Now allocate framebuffer memory
 	 */
 	err = bus_dma_tag_create(
 	    bus_get_dma_tag(sc->sc_dev),
 	    4, 0,		/* alignment, boundary */
 	    BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    dma_size, 1,			/* maxsize, nsegments */
 	    dma_size, 0,			/* maxsegsize, flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &sc->sc_dma_tag);
 	if (err)
 		goto done;
 
 	err = bus_dmamem_alloc(sc->sc_dma_tag, (void **)&sc->sc_fb_base,
 	    BUS_DMA_COHERENT, &sc->sc_dma_map);
 
 	if (err) {
 		device_printf(sc->sc_dev, "cannot allocate framebuffer\n");
 		goto done;
 	}
 
 	err = bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, sc->sc_fb_base,
 	    dma_size, am335x_fb_dmamap_cb, &sc->sc_fb_phys, BUS_DMA_NOWAIT);
 
 	if (err) {
 		device_printf(sc->sc_dev, "cannot load DMA map\n");
 		goto done;
 	}
 
 	/* Make sure it's blank */
 	memset(sc->sc_fb_base, 0x0, dma_size);
 
 	/* Calculate actual FB Size */
 	sc->sc_fb_size = sc->sc_panel.panel_width*sc->sc_panel.panel_height*sc->sc_panel.bpp/8;
 
 	/* Only raster mode is supported */
 	reg = CTRL_RASTER_MODE;
 	div = am335x_lcd_calc_divisor(ref_freq, sc->sc_panel.panel_pxl_clk);
 	reg |= (div << CTRL_DIV_SHIFT);
 	LCD_WRITE4(sc, LCD_CTRL, reg); 
 
 	/* Set timing */
 	timing0 = timing1 = timing2 = 0;
 
 	hbp = sc->sc_panel.panel_hbp - 1;
 	hfp = sc->sc_panel.panel_hfp - 1;
 	hsw = sc->sc_panel.panel_hsw - 1;
 
 	vbp = sc->sc_panel.panel_vbp;
 	vfp = sc->sc_panel.panel_vfp;
 	vsw = sc->sc_panel.panel_vsw - 1;
 
 	height = sc->sc_panel.panel_height - 1;
 	width = sc->sc_panel.panel_width - 1;
 
 	/* Horizontal back porch */
 	timing0 |= (hbp & 0xff) << RASTER_TIMING_0_HBP_SHIFT;
 	timing2 |= ((hbp >> 8) & 3) << RASTER_TIMING_2_HBPHI_SHIFT;
 	/* Horizontal front porch */
 	timing0 |= (hfp & 0xff) << RASTER_TIMING_0_HFP_SHIFT;
 	timing2 |= ((hfp >> 8) & 3) << RASTER_TIMING_2_HFPHI_SHIFT;
 	/* Horizontal sync width */
 	timing0 |= (hsw & 0x3f) << RASTER_TIMING_0_HSW_SHIFT;
 	timing2 |= ((hsw >> 6) & 0xf) << RASTER_TIMING_2_HSWHI_SHIFT;
 
 	/* Vertical back porch, front porch, sync width */
 	timing1 |= (vbp & 0xff) << RASTER_TIMING_1_VBP_SHIFT;
 	timing1 |= (vfp & 0xff) << RASTER_TIMING_1_VFP_SHIFT;
 	timing1 |= (vsw & 0x3f) << RASTER_TIMING_1_VSW_SHIFT;
 
 	/* Pixels per line */
 	timing0 |= ((width >> 10) & 1)
 	    << RASTER_TIMING_0_PPLMSB_SHIFT;
 	timing0 |= ((width >> 4) & 0x3f)
 	    << RASTER_TIMING_0_PPLLSB_SHIFT;
 
 	/* Lines per panel */
 	timing1 |= (height & 0x3ff) 
 	    << RASTER_TIMING_1_LPP_SHIFT;
 	timing2 |= ((height >> 10 ) & 1) 
 	    << RASTER_TIMING_2_LPP_B10_SHIFT;
 
 	/* clock signal settings */
 	if (sc->sc_panel.sync_ctrl)
 		timing2 |= RASTER_TIMING_2_PHSVS;
 	if (sc->sc_panel.sync_edge)
 		timing2 |= RASTER_TIMING_2_PHSVS_RISE;
 	else
 		timing2 |= RASTER_TIMING_2_PHSVS_FALL;
 	if (sc->sc_panel.hsync_active == 0)
 		timing2 |= RASTER_TIMING_2_IHS;
 	if (sc->sc_panel.vsync_active == 0)
 		timing2 |= RASTER_TIMING_2_IVS;
 	if (sc->sc_panel.pixelclk_active == 0)
 		timing2 |= RASTER_TIMING_2_IPC;
 
 	/* AC bias */
 	timing2 |= (sc->sc_panel.ac_bias << RASTER_TIMING_2_ACB_SHIFT);
 	timing2 |= (sc->sc_panel.ac_bias_intrpt << RASTER_TIMING_2_ACBI_SHIFT);
 
 	LCD_WRITE4(sc, LCD_RASTER_TIMING_0, timing0); 
 	LCD_WRITE4(sc, LCD_RASTER_TIMING_1, timing1); 
 	LCD_WRITE4(sc, LCD_RASTER_TIMING_2, timing2); 
 
 	/* DMA settings */
 	reg = LCDDMA_CTRL_FB0_FB1;
 	/* Find power of 2 for current burst size */
 	switch (sc->sc_panel.dma_burst_sz) {
 	case 1:
 		burst_log = 0;
 		break;
 	case 2:
 		burst_log = 1;
 		break;
 	case 4:
 		burst_log = 2;
 		break;
 	case 8:
 		burst_log = 3;
 		break;
 	case 16:
 	default:
 		burst_log = 4;
 		break;
 	}
 	reg |= (burst_log << LCDDMA_CTRL_BURST_SIZE_SHIFT);
 	/* XXX: FIFO TH */
 	reg |= (0 << LCDDMA_CTRL_TH_FIFO_RDY_SHIFT);
 	LCD_WRITE4(sc, LCD_LCDDMA_CTRL, reg); 
 
 	LCD_WRITE4(sc, LCD_LCDDMA_FB0_BASE, sc->sc_fb_phys); 
 	LCD_WRITE4(sc, LCD_LCDDMA_FB0_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); 
 	LCD_WRITE4(sc, LCD_LCDDMA_FB1_BASE, sc->sc_fb_phys); 
 	LCD_WRITE4(sc, LCD_LCDDMA_FB1_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); 
 
 	/* Enable LCD */
 	reg = RASTER_CTRL_LCDTFT;
 	reg |= (sc->sc_panel.fdd << RASTER_CTRL_REQDLY_SHIFT);
 	reg |= (PALETTE_DATA_ONLY << RASTER_CTRL_PALMODE_SHIFT);
 	if (sc->sc_panel.bpp >= 24)
 		reg |= RASTER_CTRL_TFT24;
 	if (sc->sc_panel.bpp == 32)
 		reg |= RASTER_CTRL_TFT24_UNPACKED;
 	LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); 
 
 	LCD_WRITE4(sc, LCD_CLKC_ENABLE,
 	    CLKC_ENABLE_DMA | CLKC_ENABLE_LDID | CLKC_ENABLE_CORE);
 
 	LCD_WRITE4(sc, LCD_CLKC_RESET, CLKC_RESET_MAIN);
 	DELAY(100);
 	LCD_WRITE4(sc, LCD_CLKC_RESET, 0);
 
 	reg = IRQ_EOF1 | IRQ_EOF0 | IRQ_FUF | IRQ_PL |
 	    IRQ_ACB | IRQ_SYNC_LOST |  IRQ_RASTER_DONE |
 	    IRQ_FRAME_DONE;
 	LCD_WRITE4(sc, LCD_IRQENABLE_SET, reg);
 
 	reg = LCD_READ4(sc, LCD_RASTER_CTRL);
  	reg |= RASTER_CTRL_LCDEN;
 	LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); 
 
 	LCD_WRITE4(sc, LCD_SYSCONFIG,
 	    SYSCONFIG_STANDBY_SMART | SYSCONFIG_IDLE_SMART); 
 
 	sc->sc_fb_info.fb_name = device_get_nameunit(sc->sc_dev);
 	sc->sc_fb_info.fb_vbase = (intptr_t)sc->sc_fb_base;
 	sc->sc_fb_info.fb_pbase = sc->sc_fb_phys;
 	sc->sc_fb_info.fb_size = sc->sc_fb_size;
 	sc->sc_fb_info.fb_bpp = sc->sc_fb_info.fb_depth = sc->sc_panel.bpp;
 	sc->sc_fb_info.fb_stride = sc->sc_panel.panel_width*sc->sc_panel.bpp / 8;
 	sc->sc_fb_info.fb_width = sc->sc_panel.panel_width;
 	sc->sc_fb_info.fb_height = sc->sc_panel.panel_height;
 
 #ifdef	DEV_SC
 	err = (sc_attach_unit(device_get_unit(sc->sc_dev),
 	    device_get_flags(sc->sc_dev) | SC_AUTODETECT_KBD));
 
 	if (err) {
 		device_printf(sc->sc_dev, "failed to attach syscons\n");
 		goto fail;
 	}
 
 	am335x_lcd_syscons_setup((vm_offset_t)sc->sc_fb_base, sc->sc_fb_phys, &panel);
 #else /* VT */
 	device_t fbd = device_add_child(sc->sc_dev, "fbd",
 	device_get_unit(sc->sc_dev));
 	if (fbd != NULL) {
 		if (device_probe_and_attach(fbd) != 0)
 			device_printf(sc->sc_dev, "failed to attach fbd device\n");
 	} else
 		device_printf(sc->sc_dev, "failed to add fbd child\n");
 #endif
 
 done:
 	return (err);
 }
 
 static void
-am335x_lcd_hdmi_event(void *arg)
+am335x_lcd_hdmi_event(void *arg, device_t hdmi, int event)
 {
 	struct am335x_lcd_softc *sc;
 	const struct videomode *videomode;
 	struct videomode hdmi_mode;
 	device_t hdmi_dev;
 	uint8_t *edid;
 	uint32_t edid_len;
 	struct edid_info ei;
 
 	sc = arg;
 
 	/* Nothing to work with */
 	if (!sc->sc_hdmi_framer) {
 		device_printf(sc->sc_dev, "HDMI event without HDMI framer set\n");
 		return;
 	}
 
 	hdmi_dev = OF_device_from_xref(sc->sc_hdmi_framer);
 	if (!hdmi_dev) {
 		device_printf(sc->sc_dev, "no actual device for \"hdmi\" property\n");
 		return;
 	}
 
 	edid = NULL;
 	edid_len = 0;
 	if (HDMI_GET_EDID(hdmi_dev, &edid, &edid_len) != 0) {
 		device_printf(sc->sc_dev, "failed to get EDID info from HDMI framer\n");
 		return;
 	}
 
 	videomode = NULL;
 
 	if (edid_parse(edid, &ei) == 0) {
 		edid_print(&ei);
 		videomode = am335x_lcd_pick_mode(&ei);
 	} else
 		device_printf(sc->sc_dev, "failed to parse EDID\n");
 
 	/* Use standard VGA as fallback */
 	if (videomode == NULL)
 		videomode = pick_mode_by_ref(640, 480, 60);
 
 	if (videomode == NULL) {
 		device_printf(sc->sc_dev, "failed to find usable videomode");
 		return;
 	}
 
 	device_printf(sc->sc_dev, "detected videomode: %dx%d @ %dKHz\n", videomode->hdisplay,
 		videomode->vdisplay, am335x_mode_vrefresh(videomode));
 
 	sc->sc_panel.panel_width = videomode->hdisplay;
 	sc->sc_panel.panel_height = videomode->vdisplay;
 	sc->sc_panel.panel_hfp = videomode->hsync_start - videomode->hdisplay;
 	sc->sc_panel.panel_hbp = videomode->htotal - videomode->hsync_end;
 	sc->sc_panel.panel_hsw = videomode->hsync_end - videomode->hsync_start;
 	sc->sc_panel.panel_vfp = videomode->vsync_start - videomode->vdisplay;
 	sc->sc_panel.panel_vbp = videomode->vtotal - videomode->vsync_end;
 	sc->sc_panel.panel_vsw = videomode->vsync_end - videomode->vsync_start;
 	sc->sc_panel.pixelclk_active = 1;
 
 	/* logic for HSYNC should be reversed */
 	if (videomode->flags & VID_NHSYNC)
 		sc->sc_panel.hsync_active = 1;
 	else
 		sc->sc_panel.hsync_active = 0;
 
 	if (videomode->flags & VID_NVSYNC)
 		sc->sc_panel.vsync_active = 0;
 	else
 		sc->sc_panel.vsync_active = 1;
 
 	sc->sc_panel.panel_pxl_clk = videomode->dot_clock * 1000;
 
 	am335x_lcd_configure(sc);
 
 	memcpy(&hdmi_mode, videomode, sizeof(hdmi_mode));
 	hdmi_mode.hskew = videomode->hsync_end - videomode->hsync_start;
 	hdmi_mode.flags |= VID_HSKEW;
 
 	HDMI_SET_VIDEOMODE(hdmi_dev, &hdmi_mode);
 }
 
 static int
 am335x_lcd_probe(device_t dev)
 {
 #ifdef DEV_SC
 	int err;
 #endif
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "ti,am33xx-tilcdc"))
 		return (ENXIO);
 
 	device_set_desc(dev, "AM335x LCD controller");
 
 #ifdef DEV_SC
 	err = sc_probe_unit(device_get_unit(dev), 
 	    device_get_flags(dev) | SC_AUTODETECT_KBD);
 	if (err != 0)
 		return (err);
 #endif
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 am335x_lcd_attach(device_t dev)
 {
 	struct am335x_lcd_softc *sc;
 
 	int err;
 	int rid;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	phandle_t root, panel_node;
 
 	err = 0;
 	sc = device_get_softc(dev);
 	sc->sc_dev = dev;
 
 	am335x_read_hdmi_property(dev);
 
 	root = OF_finddevice("/");
 	if (root == 0) {
 		device_printf(dev, "failed to get FDT root node\n");
 		return (ENXIO);
 	}
 
 	sc->sc_panel.ac_bias = 255;
 	sc->sc_panel.ac_bias_intrpt = 0;
 	sc->sc_panel.dma_burst_sz = 16;
 	sc->sc_panel.bpp = 16;
 	sc->sc_panel.fdd = 128;
 	sc->sc_panel.sync_edge = 0;
 	sc->sc_panel.sync_ctrl = 1;
 
 	panel_node = fdt_find_compatible(root, "ti,tilcdc,panel", 1);
 	if (panel_node != 0) {
 		device_printf(dev, "using static panel info\n");
 		if (am335x_read_panel_info(dev, panel_node, &sc->sc_panel)) {
 			device_printf(dev, "failed to read panel info\n");
 			return (ENXIO);
 		}
 
 		if (am335x_read_timing(dev, panel_node, &sc->sc_panel)) {
 			device_printf(dev, "failed to read timings\n");
 			return (ENXIO);
 		}
 	}
 
 	ti_prcm_clk_enable(LCDC_CLK);
 
 	rid = 0;
 	sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (!sc->sc_mem_res) {
 		device_printf(dev, "cannot allocate memory window\n");
 		return (ENXIO);
 	}
 
 	rid = 0;
 	sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	    RF_ACTIVE);
 	if (!sc->sc_irq_res) {
 		bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res);
 		device_printf(dev, "cannot allocate interrupt\n");
 		return (ENXIO);
 	}
 
 	if (bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
 			NULL, am335x_lcd_intr, sc,
 			&sc->sc_intr_hl) != 0) {
 		bus_release_resource(dev, SYS_RES_IRQ, rid,
 		    sc->sc_irq_res);
 		bus_release_resource(dev, SYS_RES_MEMORY, rid,
 		    sc->sc_mem_res);
 		device_printf(dev, "Unable to setup the irq handler.\n");
 		return (ENXIO);
 	}
 
 	LCD_LOCK_INIT(sc);
 
 	/* Init backlight interface */
 	ctx = device_get_sysctl_ctx(sc->sc_dev);
 	tree = device_get_sysctl_tree(sc->sc_dev);
 	sc->sc_oid = SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 	    "backlight", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
 	    am335x_lcd_sysctl_backlight, "I", "LCD backlight");
 	sc->sc_backlight = 0;
 	/* Check if eCAS interface is available at this point */
 	if (am335x_pwm_config_ecap(PWM_UNIT,
 	    PWM_PERIOD, PWM_PERIOD) == 0)
 		sc->sc_backlight = 100;
 
 	if (panel_node != 0)
 		am335x_lcd_configure(sc);
 	else
 		sc->sc_hdmi_evh = EVENTHANDLER_REGISTER(hdmi_event,
-		    am335x_lcd_hdmi_event, sc, 0);
+		    am335x_lcd_hdmi_event, sc, EVENTHANDLER_PRI_ANY);
 
 	return (0);
 }
 
 static int
 am335x_lcd_detach(device_t dev)
 {
 	/* Do not let unload driver */
 	return (EBUSY);
 }
 
 static struct fb_info *
 am335x_lcd_fb_getinfo(device_t dev)
 {
 	struct am335x_lcd_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (&sc->sc_fb_info);
 }
 
 static device_method_t am335x_lcd_methods[] = {
 	DEVMETHOD(device_probe,		am335x_lcd_probe),
 	DEVMETHOD(device_attach,	am335x_lcd_attach),
 	DEVMETHOD(device_detach,	am335x_lcd_detach),
 
 	/* Framebuffer service methods */
 	DEVMETHOD(fb_getinfo,		am335x_lcd_fb_getinfo),
 
 	DEVMETHOD_END
 };
 
 static driver_t am335x_lcd_driver = {
 	"fb",
 	am335x_lcd_methods,
 	sizeof(struct am335x_lcd_softc),
 };
 
 static devclass_t am335x_lcd_devclass;
 
 DRIVER_MODULE(am335x_lcd, simplebus, am335x_lcd_driver, am335x_lcd_devclass, 0, 0);
 MODULE_VERSION(am335x_lcd, 1);
 MODULE_DEPEND(am335x_lcd, simplebus, 1, 1, 1);
Index: projects/powernv/arm/ti/am335x/tda19988.c
===================================================================
--- projects/powernv/arm/ti/am335x/tda19988.c	(revision 290990)
+++ projects/powernv/arm/ti/am335x/tda19988.c	(revision 290991)
@@ -1,809 +1,809 @@
 /*-
  * Copyright (c) 2015 Oleksandr Tymoshenko <gonzo@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 /*
 * NXP TDA19988 HDMI encoder 
 */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/clock.h>
 #include <sys/time.h>
 #include <sys/bus.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <dev/iicbus/iicbus.h>
 #include <dev/iicbus/iiconf.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/videomode/videomode.h>
 #include <dev/videomode/edidvar.h>
 
 #include "iicbus_if.h"
 #include "hdmi_if.h"
 
 #define	MKREG(page, addr)	(((page) << 8) | (addr))
 
 #define	REGPAGE(reg)		(((reg) >> 8) & 0xff)
 #define	REGADDR(reg)		((reg) & 0xff)
 
 #define TDA_VERSION		MKREG(0x00, 0x00)
 #define TDA_MAIN_CNTRL0		MKREG(0x00, 0x01)
 #define 	MAIN_CNTRL0_SR		(1 << 0)
 #define TDA_VERSION_MSB		MKREG(0x00, 0x02)
 #define	TDA_SOFTRESET		MKREG(0x00, 0x0a)
 #define		SOFTRESET_I2C		(1 << 1)
 #define		SOFTRESET_AUDIO		(1 << 0)
 #define	TDA_DDC_CTRL		MKREG(0x00, 0x0b)
 #define		DDC_ENABLE		0
 #define	TDA_CCLK		MKREG(0x00, 0x0c)
 #define		CCLK_ENABLE		1
 #define	TDA_INT_FLAGS_2		MKREG(0x00, 0x11)
 #define		INT_FLAGS_2_EDID_BLK_RD	(1 << 1)
 
 #define	TDA_VIP_CNTRL_0		MKREG(0x00, 0x20)
 #define	TDA_VIP_CNTRL_1		MKREG(0x00, 0x21)
 #define	TDA_VIP_CNTRL_2		MKREG(0x00, 0x22)
 #define	TDA_VIP_CNTRL_3		MKREG(0x00, 0x23)
 #define		VIP_CNTRL_3_SYNC_HS	(2 << 4)
 #define		VIP_CNTRL_3_V_TGL	(1 << 2)
 #define		VIP_CNTRL_3_H_TGL	(1 << 1)
 
 #define	TDA_VIP_CNTRL_4		MKREG(0x00, 0x24)
 #define		VIP_CNTRL_4_BLANKIT_NDE		(0 << 2)
 #define		VIP_CNTRL_4_BLANKIT_HS_VS	(1 << 2)
 #define		VIP_CNTRL_4_BLANKIT_NHS_VS	(2 << 2)
 #define		VIP_CNTRL_4_BLANKIT_HE_VE	(3 << 2)
 #define		VIP_CNTRL_4_BLC_NONE		(0 << 0)
 #define		VIP_CNTRL_4_BLC_RGB444		(1 << 0)
 #define		VIP_CNTRL_4_BLC_YUV444		(2 << 0)
 #define		VIP_CNTRL_4_BLC_YUV422		(3 << 0)
 #define	TDA_VIP_CNTRL_5		MKREG(0x00, 0x25)
 #define		VIP_CNTRL_5_SP_CNT(n)	(((n) & 3) << 1)
 #define	TDA_MUX_VP_VIP_OUT	MKREG(0x00, 0x27)
 #define TDA_MAT_CONTRL		MKREG(0x00, 0x80)
 #define		MAT_CONTRL_MAT_BP	(1 << 2)
 #define	TDA_VIDFORMAT		MKREG(0x00, 0xa0)
 #define	TDA_REFPIX_MSB		MKREG(0x00, 0xa1)
 #define	TDA_REFPIX_LSB		MKREG(0x00, 0xa2)
 #define	TDA_REFLINE_MSB		MKREG(0x00, 0xa3)
 #define	TDA_REFLINE_LSB		MKREG(0x00, 0xa4)
 #define	TDA_NPIX_MSB		MKREG(0x00, 0xa5)
 #define	TDA_NPIX_LSB		MKREG(0x00, 0xa6)
 #define	TDA_NLINE_MSB		MKREG(0x00, 0xa7)
 #define	TDA_NLINE_LSB		MKREG(0x00, 0xa8)
 #define	TDA_VS_LINE_STRT_1_MSB	MKREG(0x00, 0xa9)
 #define	TDA_VS_LINE_STRT_1_LSB	MKREG(0x00, 0xaa)
 #define	TDA_VS_PIX_STRT_1_MSB	MKREG(0x00, 0xab)
 #define	TDA_VS_PIX_STRT_1_LSB	MKREG(0x00, 0xac)
 #define	TDA_VS_LINE_END_1_MSB	MKREG(0x00, 0xad)
 #define	TDA_VS_LINE_END_1_LSB	MKREG(0x00, 0xae)
 #define	TDA_VS_PIX_END_1_MSB	MKREG(0x00, 0xaf)
 #define	TDA_VS_PIX_END_1_LSB	MKREG(0x00, 0xb0)
 #define	TDA_VS_LINE_STRT_2_MSB	MKREG(0x00, 0xb1)
 #define	TDA_VS_LINE_STRT_2_LSB	MKREG(0x00, 0xb2)
 #define	TDA_VS_PIX_STRT_2_MSB	MKREG(0x00, 0xb3)
 #define	TDA_VS_PIX_STRT_2_LSB	MKREG(0x00, 0xb4)
 #define	TDA_VS_LINE_END_2_MSB	MKREG(0x00, 0xb5)
 #define	TDA_VS_LINE_END_2_LSB	MKREG(0x00, 0xb6)
 #define	TDA_VS_PIX_END_2_MSB	MKREG(0x00, 0xb7)
 #define	TDA_VS_PIX_END_2_LSB	MKREG(0x00, 0xb8)
 #define	TDA_HS_PIX_START_MSB	MKREG(0x00, 0xb9)
 #define	TDA_HS_PIX_START_LSB	MKREG(0x00, 0xba)
 #define	TDA_HS_PIX_STOP_MSB	MKREG(0x00, 0xbb)
 #define	TDA_HS_PIX_STOP_LSB	MKREG(0x00, 0xbc)
 #define	TDA_VWIN_START_1_MSB	MKREG(0x00, 0xbd)
 #define	TDA_VWIN_START_1_LSB	MKREG(0x00, 0xbe)
 #define	TDA_VWIN_END_1_MSB	MKREG(0x00, 0xbf)
 #define	TDA_VWIN_END_1_LSB	MKREG(0x00, 0xc0)
 #define	TDA_VWIN_START_2_MSB	MKREG(0x00, 0xc1)
 #define	TDA_VWIN_START_2_LSB	MKREG(0x00, 0xc2)
 #define	TDA_VWIN_END_2_MSB	MKREG(0x00, 0xc3)
 #define	TDA_VWIN_END_2_LSB	MKREG(0x00, 0xc4)
 #define	TDA_DE_START_MSB	MKREG(0x00, 0xc5)
 #define	TDA_DE_START_LSB	MKREG(0x00, 0xc6)
 #define	TDA_DE_STOP_MSB		MKREG(0x00, 0xc7)
 #define	TDA_DE_STOP_LSB		MKREG(0x00, 0xc8)
 
 #define	TDA_TBG_CNTRL_0		MKREG(0x00, 0xca)
 #define		TBG_CNTRL_0_SYNC_ONCE	(1 << 7)
 #define		TBG_CNTRL_0_SYNC_MTHD	(1 << 6)
 
 #define	TDA_TBG_CNTRL_1		MKREG(0x00, 0xcb)
 #define		TBG_CNTRL_1_DWIN_DIS	(1 << 6)
 #define		TBG_CNTRL_1_TGL_EN	(1 << 2)
 #define		TBG_CNTRL_1_V_TGL	(1 << 1)
 #define		TBG_CNTRL_1_H_TGL	(1 << 0)
 
 #define	TDA_HVF_CNTRL_0		MKREG(0x00, 0xe4)
 #define		HVF_CNTRL_0_PREFIL_NONE		(0 << 2)
 #define		HVF_CNTRL_0_INTPOL_BYPASS	(0 << 0)
 #define	TDA_HVF_CNTRL_1		MKREG(0x00, 0xe5)
 #define		HVF_CNTRL_1_VQR(x)	(((x) & 3) << 2)
 #define		HVF_CNTRL_1_VQR_FULL	HVF_CNTRL_1_VQR(0)
 #define	TDA_ENABLE_SPACE	MKREG(0x00, 0xd6)
 #define	TDA_RPT_CNTRL		MKREG(0x00, 0xf0)
 
 #define	TDA_PLL_SERIAL_1	MKREG(0x02, 0x00)
 #define		PLL_SERIAL_1_SRL_MAN_IP	(1 << 6)
 #define	TDA_PLL_SERIAL_2	MKREG(0x02, 0x01)
 #define		PLL_SERIAL_2_SRL_PR(x)		(((x) & 0xf) << 4)
 #define		PLL_SERIAL_2_SRL_NOSC(x)	(((x) & 0x3) << 0)
 #define	TDA_PLL_SERIAL_3	MKREG(0x02, 0x02)
 #define		PLL_SERIAL_3_SRL_PXIN_SEL	(1 << 4)
 #define		PLL_SERIAL_3_SRL_DE		(1 << 2)
 #define		PLL_SERIAL_3_SRL_CCIR		(1 << 0)
 #define	TDA_SERIALIZER		MKREG(0x02, 0x03)
 #define	TDA_BUFFER_OUT		MKREG(0x02, 0x04)
 #define	TDA_PLL_SCG1		MKREG(0x02, 0x05)
 #define	TDA_PLL_SCG2		MKREG(0x02, 0x06)
 #define	TDA_PLL_SCGN1		MKREG(0x02, 0x07)
 #define	TDA_PLL_SCGN2		MKREG(0x02, 0x08)
 #define	TDA_PLL_SCGR1		MKREG(0x02, 0x09)
 #define	TDA_PLL_SCGR2		MKREG(0x02, 0x0a)
 
 #define	TDA_SEL_CLK		MKREG(0x02, 0x11)
 #define		SEL_CLK_ENA_SC_CLK	(1 << 3)
 #define		SEL_CLK_SEL_VRF_CLK(x)	(((x) & 3) << 1)
 #define		SEL_CLK_SEL_CLK1	(1 << 0)
 #define	TDA_ANA_GENERAL		MKREG(0x02, 0x12)
 
 #define	TDA_EDID_DATA0		MKREG(0x09, 0x00)
 #define	TDA_EDID_CTRL		MKREG(0x09, 0xfa)
 #define	TDA_DDC_ADDR		MKREG(0x09, 0xfb)
 #define	TDA_DDC_OFFS		MKREG(0x09, 0xfc)
 #define	TDA_DDC_SEGM_ADDR	MKREG(0x09, 0xfd)
 #define	TDA_DDC_SEGM		MKREG(0x09, 0xfe)
 
 #define	TDA_IF_VSP		MKREG(0x10, 0x20)
 #define	TDA_IF_AVI		MKREG(0x10, 0x40)
 #define	TDA_IF_SPD		MKREG(0x10, 0x60)
 #define	TDA_IF_AUD		MKREG(0x10, 0x80)
 #define	TDA_IF_MPS		MKREG(0x10, 0xa0)
 
 #define	TDA_ENC_CNTRL		MKREG(0x11, 0x0d)
 #define		ENC_CNTRL_DVI_MODE	(0 << 2)
 #define		ENC_CNTRL_HDMI_MODE	(1 << 2)
 #define	TDA_DIP_IF_FLAGS	MKREG(0x11, 0x0f)
 #define		DIP_IF_FLAGS_IF5	(1 << 5)
 #define		DIP_IF_FLAGS_IF4	(1 << 4)
 #define		DIP_IF_FLAGS_IF3	(1 << 3)
 #define		DIP_IF_FLAGS_IF2	(1 << 2) /* AVI IF on page 10h */
 #define		DIP_IF_FLAGS_IF1	(1 << 1)
 
 #define	TDA_TX3			MKREG(0x12, 0x9a)
 #define	TDA_TX4			MKREG(0x12, 0x9b)
 #define		TX4_PD_RAM		(1 << 1)
 #define	TDA_HDCP_TX33		MKREG(0x12, 0xb8)
 #define		HDCP_TX33_HDMI		(1 << 1)
 
 #define	TDA_CURPAGE_ADDR	0xff
 
 #define	TDA_CEC_ENAMODS		0xff
 #define		ENAMODS_RXSENS		(1 << 2)
 #define		ENAMODS_HDMI		(1 << 1)
 #define	TDA_CEC_FRO_IM_CLK_CTRL	0xfb
 #define		CEC_FRO_IM_CLK_CTRL_GHOST_DIS	(1 << 7)
 #define		CEC_FRO_IM_CLK_CTRL_IMCLK_SEL	(1 << 1)
 
 /* EDID reading */ 
 #define EDID_LENGTH		0x80
 #define	MAX_READ_ATTEMPTS	100
 
 /* EDID fields */
 #define	EDID_MODES0		35
 #define	EDID_MODES1		36
 #define	EDID_TIMING_START	38
 #define	EDID_TIMING_END		54
 #define	EDID_TIMING_X(v)	(((v) + 31) * 8)
 #define	EDID_FREQ(v)		(((v) & 0x3f) + 60)
 #define	EDID_RATIO(v)		(((v) >> 6) & 0x3)
 #define	EDID_RATIO_10x16	0
 #define	EDID_RATIO_3x4		1	
 #define	EDID_RATIO_4x5		2	
 #define	EDID_RATIO_9x16		3
 
 #define	TDA19988		0x0301
 
 struct tda19988_softc {
 	device_t		sc_dev;
 	uint32_t		sc_addr;
 	uint32_t		sc_cec_addr;
 	uint16_t		sc_version;
 	struct intr_config_hook enum_hook;
 	int			sc_current_page;
 	uint8_t			*sc_edid;
 	uint32_t		sc_edid_len;
 };
 
 static int
 tda19988_set_page(struct tda19988_softc *sc, uint8_t page)
 {
 	uint8_t addr = TDA_CURPAGE_ADDR;
 	uint8_t cmd[2];
 	int result;
 	struct iic_msg msg[] = {
 		{ sc->sc_addr, IIC_M_WR, 2, cmd },
 	};
 
 	cmd[0] = addr;
 	cmd[1] = page;
 
 	result = (iicbus_transfer(sc->sc_dev, msg, 1));
 	if (result)
 		printf("tda19988_set_page failed: %d\n", result);
 	else
 		sc->sc_current_page = page;
 
 	return (result);
 }
 
 static int
 tda19988_cec_read(struct tda19988_softc *sc, uint8_t addr, uint8_t *data)
 {
 	int result;
 	struct iic_msg msg[] = {
 		{ sc->sc_cec_addr, IIC_M_WR, 1, &addr },
 		{ sc->sc_cec_addr, IIC_M_RD, 1, data },
 	};
 
 	result =  iicbus_transfer(sc->sc_dev, msg, 2);
 	if (result)
 		printf("tda19988_cec_read failed: %d\n", result);
 	return (result);
 }
 
 static int
 tda19988_cec_write(struct tda19988_softc *sc, uint8_t address, uint8_t data)
 {
 	uint8_t cmd[2];
 	int result;
 	struct iic_msg msg[] = {
 		{ sc->sc_cec_addr, IIC_M_WR, 2, cmd },
 	};
 
 	cmd[0] = address;
 	cmd[1] = data;
 
 	result = iicbus_transfer(sc->sc_dev, msg, 1);
 	if (result)
 		printf("tda19988_cec_write failed: %d\n", result);
 	return (result);
 }
 
 static int
 tda19988_block_read(struct tda19988_softc *sc, uint16_t addr, uint8_t *data, int len)
 {
 	uint8_t reg;
 	int result;
 	struct iic_msg msg[] = {
 		{ sc->sc_addr, IIC_M_WR, 1, &reg },
 		{ sc->sc_addr, IIC_M_RD, len, data },
 	};
 
 	reg = REGADDR(addr);
 
 	if (sc->sc_current_page != REGPAGE(addr))
 		tda19988_set_page(sc, REGPAGE(addr));
 
 	result = (iicbus_transfer(sc->sc_dev, msg, 2));
 	if (result)
 		device_printf(sc->sc_dev, "tda19988_block_read failed: %d\n", result);
 	return (result);
 }
 
 static int
 tda19988_reg_read(struct tda19988_softc *sc, uint16_t addr, uint8_t *data)
 {
 	uint8_t reg;
 	int result;
 	struct iic_msg msg[] = {
 		{ sc->sc_addr, IIC_M_WR, 1, &reg },
 		{ sc->sc_addr, IIC_M_RD, 1, data },
 	};
 
 	reg = REGADDR(addr);
 
 	if (sc->sc_current_page != REGPAGE(addr))
 		tda19988_set_page(sc, REGPAGE(addr));
 
 	result = (iicbus_transfer(sc->sc_dev, msg, 2));
 	if (result)
 		device_printf(sc->sc_dev, "tda19988_reg_read failed: %d\n", result);
 	return (result);
 }
 
 static int
 tda19988_reg_write(struct tda19988_softc *sc, uint16_t address, uint8_t data)
 {
 	uint8_t cmd[2];
 	int result;
 	struct iic_msg msg[] = {
 		{ sc->sc_addr, IIC_M_WR, 2, cmd },
 	};
 
 	cmd[0] = REGADDR(address);
 	cmd[1] = data;
 
 	if (sc->sc_current_page != REGPAGE(address))
 		tda19988_set_page(sc, REGPAGE(address));
 
 	result = iicbus_transfer(sc->sc_dev, msg, 1);
 	if (result)
 		device_printf(sc->sc_dev, "tda19988_reg_write failed: %d\n", result);
 
 	return (result);
 }
 
 static int
 tda19988_reg_write2(struct tda19988_softc *sc, uint16_t address, uint16_t data)
 {
 	uint8_t cmd[3];
 	int result;
 	struct iic_msg msg[] = {
 		{ sc->sc_addr, IIC_M_WR, 3, cmd },
 	};
 
 	cmd[0] = REGADDR(address);
 	cmd[1] = (data >> 8);
 	cmd[2] = (data & 0xff);
 
 	if (sc->sc_current_page != REGPAGE(address))
 		tda19988_set_page(sc, REGPAGE(address));
 
 	result = iicbus_transfer(sc->sc_dev, msg, 1);
 	if (result)
 		device_printf(sc->sc_dev, "tda19988_reg_write2 failed: %d\n", result);
 
 	return (result);
 }
 
 static void
 tda19988_reg_set(struct tda19988_softc *sc, uint16_t addr, uint8_t flags)
 {
 	uint8_t data;
 
 	tda19988_reg_read(sc, addr, &data);
 	data |= flags;
 	tda19988_reg_write(sc, addr, data);
 }
 
 static void
 tda19988_reg_clear(struct tda19988_softc *sc, uint16_t addr, uint8_t flags)
 {
 	uint8_t data;
 
 	tda19988_reg_read(sc, addr, &data);
 	data &= ~flags;
 	tda19988_reg_write(sc, addr, data);
 }
 
 static int
 tda19988_probe(device_t dev)
 {
 
 	if (!ofw_bus_is_compatible(dev, "nxp,tda998x"))
 		return (ENXIO);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void
 tda19988_init_encoder(struct tda19988_softc *sc, const struct videomode *mode)
 {
 	uint16_t ref_pix, ref_line, n_pix, n_line;
 	uint16_t hs_pix_start, hs_pix_stop;
 	uint16_t vs1_pix_start, vs1_pix_stop;
 	uint16_t vs1_line_start, vs1_line_end;
 	uint16_t vs2_pix_start, vs2_pix_stop;
 	uint16_t vs2_line_start, vs2_line_end;
 	uint16_t vwin1_line_start, vwin1_line_end;
 	uint16_t vwin2_line_start, vwin2_line_end;
 	uint16_t de_start, de_stop;
 	uint8_t reg, div;
 
 	n_pix = mode->htotal;
 	n_line = mode->vtotal;
 
 	hs_pix_stop = mode->hsync_end - mode->hdisplay;
 	hs_pix_start = mode->hsync_start - mode->hdisplay;
 
 	de_stop = mode->htotal;
 	de_start = mode->htotal - mode->hdisplay;
 	ref_pix = hs_pix_start + 3;
 
 	if (mode->flags & VID_HSKEW)
 		ref_pix += mode->hskew;
 
 	if ((mode->flags & VID_INTERLACE) == 0) {
 		ref_line = 1 + mode->vsync_start - mode->vdisplay;
 		vwin1_line_start = mode->vtotal - mode->vdisplay - 1;
 		vwin1_line_end = vwin1_line_start + mode->vdisplay;
 
 		vs1_pix_start = vs1_pix_stop = hs_pix_start;
 		vs1_line_start = mode->vsync_start - mode->vdisplay;
 		vs1_line_end = vs1_line_start + mode->vsync_end - mode->vsync_start;
 
 		vwin2_line_start = vwin2_line_end = 0;
 		vs2_pix_start = vs2_pix_stop = 0;
 		vs2_line_start = vs2_line_end = 0;
 	} else {
 		ref_line = 1 + (mode->vsync_start - mode->vdisplay)/2;
 		vwin1_line_start = (mode->vtotal - mode->vdisplay)/2;
 		vwin1_line_end = vwin1_line_start + mode->vdisplay/2;
 
 		vs1_pix_start = vs1_pix_stop = hs_pix_start;
 		vs1_line_start = (mode->vsync_start - mode->vdisplay)/2;
 		vs1_line_end = vs1_line_start + (mode->vsync_end - mode->vsync_start)/2;
 
 		vwin2_line_start = vwin1_line_start + mode->vtotal/2;
 		vwin2_line_end = vwin2_line_start + mode->vdisplay/2;
 
 		vs2_pix_start = vs2_pix_stop = hs_pix_start + mode->htotal/2;
 		vs2_line_start = vs1_line_start + mode->vtotal/2 ;
 		vs2_line_end = vs2_line_start + (mode->vsync_end - mode->vsync_start)/2;
 	}
 
 	div = 148500 / mode->dot_clock;
 	if (div != 0) {
 		div--;
 		if (div > 3)
 			div = 3;
 	}
 
 	/* set HDMI HDCP mode off */
 	tda19988_reg_set(sc, TDA_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS);
 	tda19988_reg_clear(sc, TDA_HDCP_TX33, HDCP_TX33_HDMI);
 	tda19988_reg_write(sc, TDA_ENC_CNTRL, ENC_CNTRL_DVI_MODE);
 
 	/* no pre-filter or interpolator */
 	tda19988_reg_write(sc, TDA_HVF_CNTRL_0,
 	    HVF_CNTRL_0_INTPOL_BYPASS | HVF_CNTRL_0_PREFIL_NONE);
 	tda19988_reg_write(sc, TDA_VIP_CNTRL_5, VIP_CNTRL_5_SP_CNT(0));
 	tda19988_reg_write(sc, TDA_VIP_CNTRL_4,
 	    VIP_CNTRL_4_BLANKIT_NDE | VIP_CNTRL_4_BLC_NONE);
 
 	tda19988_reg_clear(sc, TDA_PLL_SERIAL_3, PLL_SERIAL_3_SRL_CCIR);
 	tda19988_reg_clear(sc, TDA_PLL_SERIAL_1, PLL_SERIAL_1_SRL_MAN_IP);
 	tda19988_reg_clear(sc, TDA_PLL_SERIAL_3, PLL_SERIAL_3_SRL_DE);
 	tda19988_reg_write(sc, TDA_SERIALIZER, 0);
 	tda19988_reg_write(sc, TDA_HVF_CNTRL_1, HVF_CNTRL_1_VQR_FULL);
 
 	tda19988_reg_write(sc, TDA_RPT_CNTRL, 0);
 	tda19988_reg_write(sc, TDA_SEL_CLK, SEL_CLK_SEL_VRF_CLK(0) |
 			SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK);
 
 	tda19988_reg_write(sc, TDA_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(div) |
 			PLL_SERIAL_2_SRL_PR(0));
 
 	tda19988_reg_set(sc, TDA_MAT_CONTRL, MAT_CONTRL_MAT_BP);
 
 	tda19988_reg_write(sc, TDA_ANA_GENERAL, 0x09);
 
 	tda19988_reg_clear(sc, TDA_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_MTHD);
 
 	/*
 	 * Sync on rising HSYNC/VSYNC
 	 */
 	reg = VIP_CNTRL_3_SYNC_HS;
 	if (mode->flags & VID_NHSYNC)
 		reg |= VIP_CNTRL_3_H_TGL;
 	if (mode->flags & VID_NVSYNC)
 		reg |= VIP_CNTRL_3_V_TGL;
 	tda19988_reg_write(sc, TDA_VIP_CNTRL_3, reg);
 
 	reg = TBG_CNTRL_1_TGL_EN;
 	if (mode->flags & VID_NHSYNC)
 		reg |= TBG_CNTRL_1_H_TGL;
 	if (mode->flags & VID_NVSYNC)
 		reg |= TBG_CNTRL_1_V_TGL;
 	tda19988_reg_write(sc, TDA_TBG_CNTRL_1, reg);
 
 	/* Program timing */
 	tda19988_reg_write(sc, TDA_VIDFORMAT, 0x00);
 
 	tda19988_reg_write2(sc, TDA_REFPIX_MSB, ref_pix);
 	tda19988_reg_write2(sc, TDA_REFLINE_MSB, ref_line);
 	tda19988_reg_write2(sc, TDA_NPIX_MSB, n_pix);
 	tda19988_reg_write2(sc, TDA_NLINE_MSB, n_line);
 
 	tda19988_reg_write2(sc, TDA_VS_LINE_STRT_1_MSB, vs1_line_start);
 	tda19988_reg_write2(sc, TDA_VS_PIX_STRT_1_MSB, vs1_pix_start);
 	tda19988_reg_write2(sc, TDA_VS_LINE_END_1_MSB, vs1_line_end);
 	tda19988_reg_write2(sc, TDA_VS_PIX_END_1_MSB, vs1_pix_stop);
 	tda19988_reg_write2(sc, TDA_VS_LINE_STRT_2_MSB, vs2_line_start);
 	tda19988_reg_write2(sc, TDA_VS_PIX_STRT_2_MSB, vs2_pix_start);
 	tda19988_reg_write2(sc, TDA_VS_LINE_END_2_MSB, vs2_line_end);
 	tda19988_reg_write2(sc, TDA_VS_PIX_END_2_MSB, vs2_pix_stop);
 	tda19988_reg_write2(sc, TDA_HS_PIX_START_MSB, hs_pix_start);
 	tda19988_reg_write2(sc, TDA_HS_PIX_STOP_MSB, hs_pix_stop);
 	tda19988_reg_write2(sc, TDA_VWIN_START_1_MSB, vwin1_line_start);
 	tda19988_reg_write2(sc, TDA_VWIN_END_1_MSB, vwin1_line_end);
 	tda19988_reg_write2(sc, TDA_VWIN_START_2_MSB, vwin2_line_start);
 	tda19988_reg_write2(sc, TDA_VWIN_END_2_MSB, vwin2_line_end);
 	tda19988_reg_write2(sc, TDA_DE_START_MSB, de_start);
 	tda19988_reg_write2(sc, TDA_DE_STOP_MSB, de_stop);
 
 	if (sc->sc_version == TDA19988)
 		tda19988_reg_write(sc, TDA_ENABLE_SPACE, 0x00);
 
 	/* must be last register set */
 	tda19988_reg_clear(sc, TDA_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_ONCE);
 }
 
 static int
 tda19988_read_edid_block(struct tda19988_softc *sc, uint8_t *buf, int block)
 {
 	int attempt, err;
 	uint8_t data;
 
 	err = 0;
 
 	tda19988_reg_set(sc, TDA_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
 
 	/* Block 0 */
 	tda19988_reg_write(sc, TDA_DDC_ADDR, 0xa0);
 	tda19988_reg_write(sc, TDA_DDC_OFFS, (block % 2) ? 128 : 0);
 	tda19988_reg_write(sc, TDA_DDC_SEGM_ADDR, 0x60);
 	tda19988_reg_write(sc, TDA_DDC_SEGM, block / 2);
 
 	tda19988_reg_write(sc, TDA_EDID_CTRL, 1);
 	tda19988_reg_write(sc, TDA_EDID_CTRL, 0);
 
 	data = 0;
 	for (attempt = 0; attempt < MAX_READ_ATTEMPTS; attempt++) {
 		tda19988_reg_read(sc, TDA_INT_FLAGS_2, &data);
 		if (data & INT_FLAGS_2_EDID_BLK_RD)
 			break;
 		pause("EDID", 1);
 	}
 
 	if (attempt == MAX_READ_ATTEMPTS) {
 		err = -1;
 		goto done;
 	}
 
 	if (tda19988_block_read(sc, TDA_EDID_DATA0, buf, EDID_LENGTH) != 0) {
 		err = -1;
 		goto done;
 	}
 
 done:
 	tda19988_reg_clear(sc, TDA_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
 
 	return (err);
 }
 
 static int
 tda19988_read_edid(struct tda19988_softc *sc)
 {
 	int err;
 	int blocks, i;
 	uint8_t *buf;
 
 	err = 0;
 	if (sc->sc_version == TDA19988)
 		tda19988_reg_clear(sc, TDA_TX4, TX4_PD_RAM);
 
 	err = tda19988_read_edid_block(sc, sc->sc_edid, 0);
 	if (err)
 		goto done;
 
 	blocks = sc->sc_edid[0x7e];
 	if (blocks > 0) {
 		sc->sc_edid = realloc(sc->sc_edid, 
 		    EDID_LENGTH*(blocks+1), M_DEVBUF, M_WAITOK);
 		sc->sc_edid_len = EDID_LENGTH*(blocks+1);
 		for (i = 0; i < blocks; i++) {
 			/* TODO: check validity */
 			buf = sc->sc_edid + EDID_LENGTH*(i+1);
 			err = tda19988_read_edid_block(sc, buf, i);
 			if (err)
 				goto done;
 		}
 	}
 
-	EVENTHANDLER_INVOKE(hdmi_event, 0);
+	EVENTHANDLER_INVOKE(hdmi_event, sc->sc_dev, HDMI_EVENT_CONNECTED);
 done:
 	if (sc->sc_version == TDA19988)
 		tda19988_reg_set(sc, TDA_TX4, TX4_PD_RAM);
 
 	return (err);
 }
 
 static void
 tda19988_start(void *xdev)
 {
 	struct tda19988_softc *sc;
 	device_t dev = (device_t)xdev;
 	uint8_t data;
 	uint16_t version;
 
 	sc = device_get_softc(dev);
 
 	tda19988_cec_write(sc, TDA_CEC_ENAMODS, ENAMODS_RXSENS | ENAMODS_HDMI);
 	DELAY(1000);
 	tda19988_cec_read(sc, 0xfe, &data);
 
 	/* Reset core */
 	tda19988_reg_set(sc, TDA_SOFTRESET, 3);
 	DELAY(100);
 	tda19988_reg_clear(sc, TDA_SOFTRESET, 3);
 	DELAY(100);
 
 	/* reset transmitter: */
 	tda19988_reg_set(sc, TDA_MAIN_CNTRL0, MAIN_CNTRL0_SR);
 	tda19988_reg_clear(sc, TDA_MAIN_CNTRL0, MAIN_CNTRL0_SR);
 
 	/* PLL registers common configuration */
 	tda19988_reg_write(sc, TDA_PLL_SERIAL_1, 0x00);
 	tda19988_reg_write(sc, TDA_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(1));
 	tda19988_reg_write(sc, TDA_PLL_SERIAL_3, 0x00);
 	tda19988_reg_write(sc, TDA_SERIALIZER, 0x00);
 	tda19988_reg_write(sc, TDA_BUFFER_OUT, 0x00);
 	tda19988_reg_write(sc, TDA_PLL_SCG1, 0x00);
 	tda19988_reg_write(sc, TDA_SEL_CLK, SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK);
 	tda19988_reg_write(sc, TDA_PLL_SCGN1, 0xfa);
 	tda19988_reg_write(sc, TDA_PLL_SCGN2, 0x00);
 	tda19988_reg_write(sc, TDA_PLL_SCGR1, 0x5b);
 	tda19988_reg_write(sc, TDA_PLL_SCGR2, 0x00);
 	tda19988_reg_write(sc, TDA_PLL_SCG2, 0x10);
 
 	/* Write the default value MUX register */
 	tda19988_reg_write(sc, TDA_MUX_VP_VIP_OUT, 0x24);
 
 	version = 0;
 	tda19988_reg_read(sc, TDA_VERSION, &data);
 	version |= data;
 	tda19988_reg_read(sc, TDA_VERSION_MSB, &data);
 	version |= (data << 8);
 
 	/* Clear feature bits */
 	sc->sc_version = version & ~0x30;
 	switch (sc->sc_version) {
 		case TDA19988:
 			device_printf(dev, "TDA19988\n");
 			break;
 		default:
 			device_printf(dev, "Unknown device: %04x\n", sc->sc_version);
 			goto done;
 	}
 
 	tda19988_reg_write(sc, TDA_DDC_CTRL, DDC_ENABLE);
 	tda19988_reg_write(sc, TDA_TX3, 39);
 
     	tda19988_cec_write(sc, TDA_CEC_FRO_IM_CLK_CTRL,
             CEC_FRO_IM_CLK_CTRL_GHOST_DIS | CEC_FRO_IM_CLK_CTRL_IMCLK_SEL);
 
 	if (tda19988_read_edid(sc) < 0) {
 		device_printf(dev, "failed to read EDID\n");
 		goto done;
 	}
 
 	/* Default values for RGB 4:4:4 mapping */
 	tda19988_reg_write(sc, TDA_VIP_CNTRL_0, 0x23);
 	tda19988_reg_write(sc, TDA_VIP_CNTRL_1, 0x01);
 	tda19988_reg_write(sc, TDA_VIP_CNTRL_2, 0x45);
 
 done:
 	config_intrhook_disestablish(&sc->enum_hook);
 }
 
 static int
 tda19988_attach(device_t dev)
 {
 	struct tda19988_softc *sc;
 	phandle_t node;
 
 	sc = device_get_softc(dev);
 
 	sc->sc_dev = dev;
 	sc->sc_addr = iicbus_get_addr(dev);
 	sc->sc_cec_addr = (0x34 << 1); /* hardcoded */
 	sc->sc_edid = malloc(EDID_LENGTH, M_DEVBUF, M_WAITOK | M_ZERO);
 	sc->sc_edid_len = EDID_LENGTH;
 
 	device_set_desc(dev, "NXP TDA19988 HDMI transmitter");
 
 	sc->enum_hook.ich_func = tda19988_start;
 	sc->enum_hook.ich_arg = dev;
 
 	if (config_intrhook_establish(&sc->enum_hook) != 0)
 		return (ENOMEM);
 
 	node = ofw_bus_get_node(dev);
 	OF_device_register_xref(OF_xref_from_node(node), dev);
 
 	return (0);
 }
 
 static int
 tda19988_detach(device_t dev)
 {
 
 	/* XXX: Do not let unload drive */
 	return (EBUSY);
 }
 
 static int
 tda19988_get_edid(device_t dev, uint8_t **edid, uint32_t *edid_len)
 {
 	struct tda19988_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	if (sc->sc_edid) {
 		*edid = sc->sc_edid;
 		*edid_len = sc->sc_edid_len;
 	} else
 		return (ENXIO);
 
 	return (0);
 }
 
 static int
 tda19988_set_videomode(device_t dev, const struct videomode *mode)
 {
 	struct tda19988_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	tda19988_init_encoder(sc, mode);
 
 	return (0);
 }
 
 static device_method_t tda_methods[] = {
 	DEVMETHOD(device_probe,		tda19988_probe),
 	DEVMETHOD(device_attach,	tda19988_attach),
 	DEVMETHOD(device_detach,	tda19988_detach),
 
 	/* HDMI methods */
 	DEVMETHOD(hdmi_get_edid,	tda19988_get_edid),
 	DEVMETHOD(hdmi_set_videomode,	tda19988_set_videomode),
 	{0, 0},
 };
 
 static driver_t tda_driver = {
 	"tda",
 	tda_methods,
 	sizeof(struct tda19988_softc),
 };
 
 static devclass_t tda_devclass;
 
 DRIVER_MODULE(tda, iicbus, tda_driver, tda_devclass, 0, 0);
 MODULE_VERSION(tda, 1);
 MODULE_DEPEND(tda, iicbus, 1, 1, 1);
Index: projects/powernv/conf/kern.pre.mk
===================================================================
--- projects/powernv/conf/kern.pre.mk	(revision 290990)
+++ projects/powernv/conf/kern.pre.mk	(revision 290991)
@@ -1,261 +1,261 @@
 # $FreeBSD$
 
 # Part of a unified Makefile for building kernels.  This part contains all
 # of the definitions that need to be before %BEFORE_DEPEND.
 
 # Allow user to configure things that only effect src tree builds.
 # Note: This is duplicated from src.sys.mk to ensure that we include
 # /etc/src.conf when building the kernel. Kernels can be built without
 # the rest of /usr/src, but they still always process SRCCONF even though
 # the normal mechanisms to prevent that (compiling out of tree) won't
 # work. To ensure they do work, we have to duplicate thee few lines here.
 SRCCONF?=	/etc/src.conf
 .if (exists(${SRCCONF}) || ${SRCCONF} != "/etc/src.conf") && !target(_srcconf_included_)
 .include "${SRCCONF}"
 _srcconf_included_:
 .endif
 
 .include <bsd.own.mk>
 .include <bsd.compiler.mk>
 .include "kern.opts.mk"
 
 # Can be overridden by makeoptions or /etc/make.conf
 KERNEL_KO?=	kernel
 KERNEL?=	kernel
 KODIR?=		/boot/${KERNEL}
 LDSCRIPT_NAME?=	ldscript.$M
 LDSCRIPT?=	$S/conf/${LDSCRIPT_NAME}
 
 M=		${MACHINE}
 
 AWK?=		awk
 CP?=		cp
 LINT?=		lint
 NM?=		nm
 OBJCOPY?=	objcopy
 SIZE?=		size
 
 .if defined(DEBUG)
 _MINUS_O=	-O
 CTFFLAGS+=	-g
 .else
 .if ${MACHINE_CPUARCH} == "powerpc"
 _MINUS_O=	-O	# gcc miscompiles some code at -O2
 .else
 _MINUS_O=	-O2
 .endif
 .endif
 .if ${MACHINE_CPUARCH} == "amd64"
 .if ${COMPILER_TYPE} == "clang"
 COPTFLAGS?=-O2 -pipe
 .else
 COPTFLAGS?=-O2 -frename-registers -pipe
 .endif
 .else
 COPTFLAGS?=${_MINUS_O} -pipe
 .endif
 .if !empty(COPTFLAGS:M-O[23s]) && empty(COPTFLAGS:M-fno-strict-aliasing)
 COPTFLAGS+= -fno-strict-aliasing
 .endif
 .if !defined(NO_CPU_COPTFLAGS)
 COPTFLAGS+= ${_CPUCFLAGS}
 .endif
 NOSTDINC= -nostdinc
 
 INCLUDES= ${NOSTDINC} ${INCLMAGIC} -I. -I$S
 
 .if make(depend) || make(kernel-depend)
 
 # This hack lets us use the ipfilter code without spamming a new
 # include path into contrib'ed source files.
 INCLUDES+= -I$S/contrib/ipfilter
 
 # ... and the same for ath
 INCLUDES+= -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal
 
 # ... and the same for the NgATM stuff
 INCLUDES+= -I$S/contrib/ngatm
 
 # ... and the same for vchiq
 INCLUDES+= -I$S/contrib/vchiq
 
 # ... and the same for twa
 INCLUDES+= -I$S/dev/twa
 
 # ... and the same for cxgb and cxgbe
 INCLUDES+= -I$S/dev/cxgb -I$S/dev/cxgbe
 
 .endif
 
 CFLAGS=	${COPTFLAGS} ${DEBUG}
 CFLAGS+= ${INCLUDES} -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h
 CFLAGS_PARAM_INLINE_UNIT_GROWTH?=100
 CFLAGS_PARAM_LARGE_FUNCTION_GROWTH?=1000
 .if ${MACHINE_CPUARCH} == "mips"
 CFLAGS_ARCH_PARAMS?=--param max-inline-insns-single=1000
 .endif
 CFLAGS.gcc+= -fno-common -fms-extensions -finline-limit=${INLINE_LIMIT}
 CFLAGS.gcc+= --param inline-unit-growth=${CFLAGS_PARAM_INLINE_UNIT_GROWTH}
 CFLAGS.gcc+= --param large-function-growth=${CFLAGS_PARAM_LARGE_FUNCTION_GROWTH}
 .if defined(CFLAGS_ARCH_PARAMS)
 CFLAGS.gcc+=${CFLAGS_ARCH_PARAMS}
 .endif
 WERROR?= -Werror
 
 # XXX LOCORE means "don't declare C stuff" not "for locore.s".
 ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${CFLAGS} ${ASM_CFLAGS.${.IMPSRC:T}} 
 
 .if defined(PROFLEVEL) && ${PROFLEVEL} >= 1
 CFLAGS+=	-DGPROF
 CFLAGS.gcc+=	-falign-functions=16
 .if ${PROFLEVEL} >= 2
 CFLAGS+=	-DGPROF4 -DGUPROF
 PROF=		-pg
 .if ${COMPILER_TYPE} == "gcc"
 PROF+=		-mprofiler-epilogue
 .endif
 .else
 PROF=		-pg
 .endif
 .endif
 DEFINED_PROF=	${PROF}
 
 # Put configuration-specific C flags last (except for ${PROF}) so that they
 # can override the others.
 CFLAGS+=	${CONF_CFLAGS}
 
 # Optional linting. This can be overridden in /etc/make.conf.
 LINTFLAGS=	${LINTOBJKERNFLAGS}
 
 NORMAL_C= ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.IMPSRC}
 NORMAL_S= ${CC:N${CCACHE_BIN}} -c ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}
 PROFILE_C= ${CC} -c ${CFLAGS} ${WERROR} ${.IMPSRC}
 NORMAL_C_NOWERROR= ${CC} -c ${CFLAGS} ${PROF} ${.IMPSRC}
 
 NORMAL_M= ${AWK} -f $S/tools/makeobjops.awk ${.IMPSRC} -c ; \
 	  ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.PREFIX}.c
 
 NORMAL_FW= uudecode -o ${.TARGET} ${.ALLSRC}
 NORMAL_FWO= ${LD} -b binary --no-warn-mismatch -d -warn-common -r \
 	-o ${.TARGET} ${.ALLSRC:M*.fw}
 
 # Common for dtrace / zfs
 CDDL_CFLAGS=	-DFREEBSD_NAMECACHE -nostdinc -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common -I$S -I$S/cddl/contrib/opensolaris/common ${CFLAGS} -Wno-unknown-pragmas -Wno-missing-prototypes -Wno-undef -Wno-strict-prototypes -Wno-cast-qual -Wno-parentheses -Wno-redundant-decls -Wno-missing-braces -Wno-uninitialized -Wno-unused -Wno-inline -Wno-switch -Wno-pointer-arith -Wno-unknown-pragmas
 CDDL_CFLAGS+=	-include $S/cddl/compat/opensolaris/sys/debug_compat.h
 CDDL_C=		${CC} -c ${CDDL_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC}
 
 # Special flags for managing the compat compiles for ZFS
 ZFS_CFLAGS=	-DBUILDING_ZFS -I$S/cddl/contrib/opensolaris/uts/common/fs/zfs -I$S/cddl/contrib/opensolaris/uts/common/zmod -I$S/cddl/contrib/opensolaris/common/zfs ${CDDL_CFLAGS}
 ZFS_ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${ZFS_CFLAGS}
 ZFS_C=		${CC} -c ${ZFS_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC}
 ZFS_S=		${CC} -c ${ZFS_ASM_CFLAGS} ${WERROR} ${.IMPSRC}
 
 # Special flags for managing the compat compiles for DTrace
 DTRACE_CFLAGS=	-DBUILDING_DTRACE ${CDDL_CFLAGS} -I$S/cddl/dev/dtrace -I$S/cddl/dev/dtrace/${MACHINE_CPUARCH}
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 DTRACE_CFLAGS+=	-I$S/cddl/contrib/opensolaris/uts/intel -I$S/cddl/dev/dtrace/x86
 .endif
 DTRACE_CFLAGS+=	-I$S/cddl/contrib/opensolaris/common/util -I$S -DDIS_MEM -DSMP
 DTRACE_ASM_CFLAGS=	-x assembler-with-cpp -DLOCORE ${DTRACE_CFLAGS}
-DTRACE_C=	${CC} -c ${DTRACE_CFLAGS} ${CDDL_CFLAGS}	${WERROR} ${PROF} ${.IMPSRC}
-DTRACE_S=	${CC} -c ${DTRACE_ASM_CFLAGS} ${CDDL_CFLAGS}	${WERROR} ${.IMPSRC}
+DTRACE_C=	${CC} -c ${DTRACE_CFLAGS}	${WERROR} ${PROF} ${.IMPSRC}
+DTRACE_S=	${CC} -c ${DTRACE_ASM_CFLAGS}	${WERROR} ${.IMPSRC}
 
 # Special flags for managing the compat compiles for DTrace/FBT
 FBT_CFLAGS=	-DBUILDING_DTRACE -nostdinc -I$S/cddl/dev/fbt/${MACHINE_CPUARCH} -I$S/cddl/dev/fbt -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common -I$S ${CDDL_CFLAGS}
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 FBT_CFLAGS+=	-I$S/cddl/dev/fbt/x86
 .endif
-FBT_C=		${CC} -c ${FBT_CFLAGS} ${CDDL_CFLAGS}		${WERROR} ${PROF} ${.IMPSRC}
+FBT_C=		${CC} -c ${FBT_CFLAGS}		${WERROR} ${PROF} ${.IMPSRC}
 
 .if ${MK_CTF} != "no"
 NORMAL_CTFCONVERT=	${CTFCONVERT} ${CTFFLAGS} ${.TARGET}
 .elif ${MAKE_VERSION} >= 5201111300
 NORMAL_CTFCONVERT=
 .else
 NORMAL_CTFCONVERT=	@:
 .endif
 
 NORMAL_LINT=	${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC}
 
 # Linux Kernel Programming Interface C-flags
 LINUXKPI_INCLUDES=	-I$S/compat/linuxkpi/common/include
 LINUXKPI_C=		${NORMAL_C} ${LINUXKPI_INCLUDES}
 
 # Infiniband C flags.  Correct include paths and omit errors that linux
 # does not honor.
 OFEDINCLUDES=	-I$S/ofed/include ${LINUXKPI_INCLUDES}
 OFEDNOERR=	-Wno-cast-qual -Wno-pointer-arith
 OFEDCFLAGS=	${CFLAGS:N-I*} ${OFEDINCLUDES} ${CFLAGS:M-I*} ${OFEDNOERR}
 OFED_C_NOIMP=	${CC} -c -o ${.TARGET} ${OFEDCFLAGS} ${WERROR} ${PROF}
 OFED_C=		${OFED_C_NOIMP} ${.IMPSRC}
 
 GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/}
 SYSTEM_CFILES= config.c env.c hints.c vnode_if.c
 SYSTEM_DEP= Makefile ${SYSTEM_OBJS}
 SYSTEM_OBJS= locore.o ${MDOBJS} ${OBJS}
 SYSTEM_OBJS+= ${SYSTEM_CFILES:.c=.o}
 SYSTEM_OBJS+= hack.So
 .if ${MFS_IMAGE:Uno} != "no"
 SYSTEM_OBJS+= embedfs_${MFS_IMAGE:T:R}.o
 .endif
 SYSTEM_LD= @${LD} -Bdynamic -T ${LDSCRIPT} ${_LDFLAGS} --no-warn-mismatch \
 	--warn-common --export-dynamic --dynamic-linker /red/herring \
 	-o ${.TARGET} -X ${SYSTEM_OBJS} vers.o
 SYSTEM_LD_TAIL= @${OBJCOPY} --strip-symbol gcc2_compiled. ${.TARGET} ; \
 	${SIZE} ${.TARGET} ; chmod 755 ${.TARGET}
 SYSTEM_DEP+= ${LDSCRIPT}
 
 # Calculate path for .m files early, if needed.
 .if !defined(_MPATH)
 __MPATH!=find ${S:tA}/ -name \*_if.m
 _MPATH=${__MPATH:H:O:u}
 .endif
 
 # MKMODULESENV is set here so that port makefiles can augment
 # them.
 
 MKMODULESENV+=	MAKEOBJDIRPREFIX=${.OBJDIR}/modules KMODDIR=${KODIR}
 MKMODULESENV+=	MACHINE_CPUARCH=${MACHINE_CPUARCH}
 MKMODULESENV+=	MACHINE=${MACHINE} MACHINE_ARCH=${MACHINE_ARCH}
 MKMODULESENV+=	MODULES_EXTRA="${MODULES_EXTRA}" WITHOUT_MODULES="${WITHOUT_MODULES}"
 .if (${KERN_IDENT} == LINT)
 MKMODULESENV+=	ALL_MODULES=LINT
 .endif
 .if defined(MODULES_OVERRIDE)
 MKMODULESENV+=	MODULES_OVERRIDE="${MODULES_OVERRIDE}"
 .endif
 .if defined(DEBUG)
 MKMODULESENV+=	DEBUG_FLAGS="${DEBUG}"
 .endif
 MKMODULESENV+=	_MPATH="${_MPATH}"
 
 # Architecture and output format arguments for objdump to convert image to
 # object file
 .if ${MFS_IMAGE:Uno} != "no"
 
 .if !defined(EMBEDFS_FORMAT.${MACHINE_ARCH})
 EMBEDFS_FORMAT.${MACHINE_ARCH}!= awk -F'"' '/OUTPUT_FORMAT/ {print $$2}' ${LDSCRIPT}
 .if empty(EMBEDFS_FORMAT.${MACHINE_ARCH})
 .undef EMBEDFS_FORMAT.${MACHINE_ARCH}
 .endif
 .endif
 
 .if !defined(EMBEDFS_ARCH.${MACHINE_ARCH})
 EMBEDFS_ARCH.${MACHINE_ARCH}!= sed -n '/OUTPUT_ARCH/s/.*(\(.*\)).*/\1/p' ${LDSCRIPT}
 .if empty(EMBEDFS_ARCH.${MACHINE_ARCH})
 .undef EMBEDFS_ARCH.${MACHINE_ARCH}
 .endif
 .endif
 
 EMBEDFS_FORMAT.arm?=		elf32-littlearm
 EMBEDFS_FORMAT.armv6?=		elf32-littlearm
 EMBEDFS_FORMAT.mips?=		elf32-tradbigmips
 EMBEDFS_FORMAT.mipsel?=		elf32-tradlittlemips
 EMBEDFS_FORMAT.mips64?=		elf64-tradbigmips
 EMBEDFS_FORMAT.mips64el?=	elf64-tradlittlemips
 .endif
 
 # Detect kernel config options that force stack frames to be turned on.
 DDB_ENABLED!=	grep DDB opt_ddb.h || true ; echo
 DTR_ENABLED!=	grep KDTRACE_FRAME opt_kdtrace.h || true ; echo
 HWPMC_ENABLED!=	grep HWPMC opt_hwpmc_hooks.h || true ; echo
Index: projects/powernv/conf
===================================================================
--- projects/powernv/conf	(revision 290990)
+++ projects/powernv/conf	(revision 290991)

Property changes on: projects/powernv/conf
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/conf:r290829-290990
Index: projects/powernv/dev/ata/chipsets/ata-intel.c
===================================================================
--- projects/powernv/dev/ata/chipsets/ata-intel.c	(revision 290990)
+++ projects/powernv/dev/ata/chipsets/ata-intel.c	(revision 290991)
@@ -1,926 +1,926 @@
 /*-
  * Copyright (c) 1998 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ata.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sema.h>
 #include <sys/taskqueue.h>
 #include <vm/uma.h>
 #include <machine/stdarg.h>
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/ata/ata-all.h>
 #include <dev/ata/ata-pci.h>
 #include <ata_if.h>
 
 /* local prototypes */
 static int ata_intel_chipinit(device_t dev);
 static int ata_intel_chipdeinit(device_t dev);
 static int ata_intel_ch_attach(device_t dev);
 static void ata_intel_reset(device_t dev);
 static int ata_intel_old_setmode(device_t dev, int target, int mode);
 static int ata_intel_new_setmode(device_t dev, int target, int mode);
 static int ata_intel_sch_setmode(device_t dev, int target, int mode);
 static int ata_intel_sata_getrev(device_t dev, int target);
 static int ata_intel_sata_status(device_t dev);
 static int ata_intel_sata_ahci_read(device_t dev, int port,
     int reg, u_int32_t *result);
 static int ata_intel_sata_cscr_read(device_t dev, int port,
     int reg, u_int32_t *result);
 static int ata_intel_sata_sidpr_read(device_t dev, int port,
     int reg, u_int32_t *result);
 static int ata_intel_sata_ahci_write(device_t dev, int port,
     int reg, u_int32_t result);
 static int ata_intel_sata_cscr_write(device_t dev, int port,
     int reg, u_int32_t result);
 static int ata_intel_sata_sidpr_write(device_t dev, int port,
     int reg, u_int32_t result);
 static int ata_intel_sata_sidpr_test(device_t dev);
 static int ata_intel_31244_ch_attach(device_t dev);
 static int ata_intel_31244_ch_detach(device_t dev);
 static int ata_intel_31244_status(device_t dev);
 static void ata_intel_31244_tf_write(struct ata_request *request);
 static void ata_intel_31244_reset(device_t dev);
 
 /* misc defines */
 #define INTEL_ICH5	2
 #define INTEL_6CH	4
 #define INTEL_6CH2	8
 #define INTEL_ICH7	16
 
 struct ata_intel_data {
 	struct mtx	lock;
 	u_char		smap[4];
 };
 
 #define ATA_INTEL_SMAP(ctlr, ch) \
     &((struct ata_intel_data *)((ctlr)->chipset_data))->smap[(ch)->unit * 2]
 #define ATA_INTEL_LOCK(ctlr) \
     mtx_lock(&((struct ata_intel_data *)((ctlr)->chipset_data))->lock)
 #define ATA_INTEL_UNLOCK(ctlr) \
     mtx_unlock(&((struct ata_intel_data *)((ctlr)->chipset_data))->lock)
 
 /*
  * Intel chipset support functions
  */
 static int
 ata_intel_probe(device_t dev)
 {
     struct ata_pci_controller *ctlr = device_get_softc(dev);
     static const struct ata_chip_id ids[] =
     {{ ATA_I82371FB,     0,          0, 2, ATA_WDMA2, "PIIX" },
      { ATA_I82371SB,     0,          0, 2, ATA_WDMA2, "PIIX3" },
      { ATA_I82371AB,     0,          0, 2, ATA_UDMA2, "PIIX4" },
      { ATA_I82443MX,     0,          0, 2, ATA_UDMA2, "PIIX4" },
      { ATA_I82451NX,     0,          0, 2, ATA_UDMA2, "PIIX4" },
      { ATA_I82801AB,     0,          0, 2, ATA_UDMA2, "ICH0" },
      { ATA_I82801AA,     0,          0, 2, ATA_UDMA4, "ICH" },
      { ATA_I82372FB,     0,          0, 2, ATA_UDMA4, "ICH" },
      { ATA_I82801BA,     0,          0, 2, ATA_UDMA5, "ICH2" },
      { ATA_I82801BA_1,   0,          0, 2, ATA_UDMA5, "ICH2" },
      { ATA_I82801CA,     0,          0, 2, ATA_UDMA5, "ICH3" },
      { ATA_I82801CA_1,   0,          0, 2, ATA_UDMA5, "ICH3" },
      { ATA_I82801DB,     0,          0, 2, ATA_UDMA5, "ICH4" },
      { ATA_I82801DB_1,   0,          0, 2, ATA_UDMA5, "ICH4" },
      { ATA_I82801EB,     0,          0, 2, ATA_UDMA5, "ICH5" },
      { ATA_I82801EB_S1,  0, INTEL_ICH5, 2, ATA_SA150, "ICH5" },
      { ATA_I82801EB_R1,  0, INTEL_ICH5, 2, ATA_SA150, "ICH5" },
      { ATA_I6300ESB,     0,          0, 2, ATA_UDMA5, "6300ESB" },
      { ATA_I6300ESB_S1,  0, INTEL_ICH5, 2, ATA_SA150, "6300ESB" },
      { ATA_I6300ESB_R1,  0, INTEL_ICH5, 2, ATA_SA150, "6300ESB" },
      { ATA_I82801FB,     0,          0, 2, ATA_UDMA5, "ICH6" },
      { ATA_I82801FB_S1,  0,          0, 0, ATA_SA150, "ICH6" },
      { ATA_I82801FB_R1,  0,          0, 0, ATA_SA150, "ICH6" },
      { ATA_I82801FBM,    0,          0, 0, ATA_SA150, "ICH6M" },
      { ATA_I82801GB,     0,          0, 1, ATA_UDMA5, "ICH7" },
      { ATA_I82801GB_S1,  0, INTEL_ICH7, 0, ATA_SA300, "ICH7" },
      { ATA_I82801GBM_S1, 0, INTEL_ICH7, 0, ATA_SA150, "ICH7M" },
      { ATA_I63XXESB2,    0,          0, 1, ATA_UDMA5, "63XXESB2" },
      { ATA_I63XXESB2_S1, 0,          0, 0, ATA_SA300, "63XXESB2" },
      { ATA_I82801HB_S1,  0, INTEL_6CH,  0, ATA_SA300, "ICH8" },
      { ATA_I82801HB_S2,  0, INTEL_6CH2, 0, ATA_SA300, "ICH8" },
      { ATA_I82801HBM,    0,          0, 1, ATA_UDMA5, "ICH8M" },
      { ATA_I82801HBM_S1, 0, INTEL_6CH,  0, ATA_SA300, "ICH8M" },
      { ATA_I82801IB_S1,  0, INTEL_6CH,  0, ATA_SA300, "ICH9" },
      { ATA_I82801IB_S2,  0, INTEL_6CH2, 0, ATA_SA300, "ICH9" },
      { ATA_I82801IB_S3,  0, INTEL_6CH2, 0, ATA_SA300, "ICH9" },
      { ATA_I82801IBM_S1, 0, INTEL_6CH2, 0, ATA_SA300, "ICH9M" },
      { ATA_I82801IBM_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH9M" },
      { ATA_I82801JIB_S1, 0, INTEL_6CH,  0, ATA_SA300, "ICH10" },
      { ATA_I82801JIB_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH10" },
      { ATA_I82801JD_S1,  0, INTEL_6CH,  0, ATA_SA300, "ICH10" },
      { ATA_I82801JD_S2,  0, INTEL_6CH2, 0, ATA_SA300, "ICH10" },
      { ATA_I82801JI_S1,  0, INTEL_6CH,  0, ATA_SA300, "ICH10" },
      { ATA_I82801JI_S2,  0, INTEL_6CH2, 0, ATA_SA300, "ICH10" },
      { ATA_5Series_S1,   0, INTEL_6CH,  0, ATA_SA300, "5 Series/3400 Series PCH" },
      { ATA_5Series_S2,   0, INTEL_6CH2, 0, ATA_SA300, "5 Series/3400 Series PCH" },
      { ATA_5Series_S3,   0, INTEL_6CH2, 0, ATA_SA300, "5 Series/3400 Series PCH" },
      { ATA_5Series_S4,   0, INTEL_6CH,  0, ATA_SA300, "5 Series/3400 Series PCH" },
      { ATA_5Series_S5,   0, INTEL_6CH2, 0, ATA_SA300, "5 Series/3400 Series PCH" },
      { ATA_5Series_S6,   0, INTEL_6CH,  0, ATA_SA300, "5 Series/3400 Series PCH" },
      { ATA_CPT_S1,       0, INTEL_6CH,  0, ATA_SA600, "Cougar Point" },
      { ATA_CPT_S2,       0, INTEL_6CH,  0, ATA_SA600, "Cougar Point" },
      { ATA_CPT_S3,       0, INTEL_6CH2, 0, ATA_SA300, "Cougar Point" },
      { ATA_CPT_S4,       0, INTEL_6CH2, 0, ATA_SA300, "Cougar Point" },
      { ATA_PBG_S1,       0, INTEL_6CH,  0, ATA_SA600, "Patsburg" },
      { ATA_PBG_S2,       0, INTEL_6CH2, 0, ATA_SA300, "Patsburg" },
      { ATA_PPT_S1,       0, INTEL_6CH,  0, ATA_SA600, "Panther Point" },
      { ATA_PPT_S2,       0, INTEL_6CH,  0, ATA_SA600, "Panther Point" },
      { ATA_PPT_S3,       0, INTEL_6CH2, 0, ATA_SA300, "Panther Point" },
      { ATA_PPT_S4,       0, INTEL_6CH2, 0, ATA_SA300, "Panther Point" },
      { ATA_AVOTON_S1,    0, INTEL_6CH,  0, ATA_SA600, "Avoton" },
      { ATA_AVOTON_S2,    0, INTEL_6CH,  0, ATA_SA600, "Avoton" },
      { ATA_AVOTON_S3,    0, INTEL_6CH2, 0, ATA_SA300, "Avoton" },
      { ATA_AVOTON_S4,    0, INTEL_6CH2, 0, ATA_SA300, "Avoton" },
      { ATA_LPT_S1,       0, INTEL_6CH,  0, ATA_SA600, "Lynx Point" },
      { ATA_LPT_S2,       0, INTEL_6CH,  0, ATA_SA600, "Lynx Point" },
      { ATA_LPT_S3,       0, INTEL_6CH2, 0, ATA_SA600, "Lynx Point" },
      { ATA_LPT_S4,       0, INTEL_6CH2, 0, ATA_SA600, "Lynx Point" },
      { ATA_WCPT_S1,      0, INTEL_6CH,  0, ATA_SA600, "Wildcat Point" },
      { ATA_WCPT_S2,      0, INTEL_6CH,  0, ATA_SA600, "Wildcat Point" },
      { ATA_WCPT_S3,      0, INTEL_6CH2, 0, ATA_SA600, "Wildcat Point" },
      { ATA_WCPT_S4,      0, INTEL_6CH2, 0, ATA_SA600, "Wildcat Point" },
      { ATA_WELLS_S1,     0, INTEL_6CH,  0, ATA_SA600, "Wellsburg" },
      { ATA_WELLS_S2,     0, INTEL_6CH2, 0, ATA_SA600, "Wellsburg" },
      { ATA_WELLS_S3,     0, INTEL_6CH,  0, ATA_SA600, "Wellsburg" },
      { ATA_WELLS_S4,     0, INTEL_6CH2, 0, ATA_SA600, "Wellsburg" },
      { ATA_LPTLP_S1,     0, INTEL_6CH,  0, ATA_SA600, "Lynx Point-LP" },
      { ATA_LPTLP_S2,     0, INTEL_6CH,  0, ATA_SA600, "Lynx Point-LP" },
      { ATA_LPTLP_S3,     0, INTEL_6CH2, 0, ATA_SA300, "Lynx Point-LP" },
      { ATA_LPTLP_S4,     0, INTEL_6CH2, 0, ATA_SA300, "Lynx Point-LP" },
      { ATA_I31244,       0,          0, 2, ATA_SA150, "31244" },
      { ATA_ISCH,         0,          0, 1, ATA_UDMA5, "SCH" },
      { ATA_COLETOCRK_S1, 0, INTEL_6CH2, 0, ATA_SA300, "COLETOCRK" },
      { ATA_COLETOCRK_S2, 0, INTEL_6CH2, 0, ATA_SA300, "COLETOCRK" },
      { 0, 0, 0, 0, 0, 0}};
 
     if (pci_get_vendor(dev) != ATA_INTEL_ID)
 	return ENXIO;
 
     if (!(ctlr->chip = ata_match_chip(dev, ids)))
 	return ENXIO;
 
     ata_set_desc(dev);
     ctlr->chipinit = ata_intel_chipinit;
     ctlr->chipdeinit = ata_intel_chipdeinit;
     return (BUS_PROBE_LOW_PRIORITY);
 }
 
 static int
 ata_intel_chipinit(device_t dev)
 {
     struct ata_pci_controller *ctlr = device_get_softc(dev);
     struct ata_intel_data *data;
 
     if (ata_setup_interrupt(dev, ata_generic_intr))
 	return ENXIO;
 
     data = malloc(sizeof(struct ata_intel_data), M_ATAPCI, M_WAITOK | M_ZERO);
     mtx_init(&data->lock, "Intel SATA lock", NULL, MTX_DEF);
     ctlr->chipset_data = (void *)data;
 
     /* good old PIIX needs special treatment (not implemented) */
     if (ctlr->chip->chipid == ATA_I82371FB) {
 	ctlr->setmode = ata_intel_old_setmode;
     }
 
     /* the intel 31244 needs special care if in DPA mode */
     else if (ctlr->chip->chipid == ATA_I31244) {
 	if (pci_get_subclass(dev) != PCIS_STORAGE_IDE) {
 	    ctlr->r_type2 = SYS_RES_MEMORY;
 	    ctlr->r_rid2 = PCIR_BAR(0);
 	    if (!(ctlr->r_res2 = bus_alloc_resource_any(dev, ctlr->r_type2,
 							&ctlr->r_rid2,
 							RF_ACTIVE)))
 		return ENXIO;
 	    ctlr->channels = 4;
 	    ctlr->ch_attach = ata_intel_31244_ch_attach;
 	    ctlr->ch_detach = ata_intel_31244_ch_detach;
 	    ctlr->reset = ata_intel_31244_reset;
 	}
 	ctlr->setmode = ata_sata_setmode;
 	ctlr->getrev = ata_sata_getrev;
     }
     /* SCH */
     else if (ctlr->chip->chipid == ATA_ISCH) {
 	ctlr->channels = 1;
 	ctlr->ch_attach = ata_intel_ch_attach;
 	ctlr->ch_detach = ata_pci_ch_detach;
 	ctlr->setmode = ata_intel_sch_setmode;
     }
     /* non SATA intel chips goes here */
     else if (ctlr->chip->max_dma < ATA_SA150) {
 	ctlr->channels = ctlr->chip->cfg2;
 	ctlr->ch_attach = ata_intel_ch_attach;
 	ctlr->ch_detach = ata_pci_ch_detach;
 	ctlr->setmode = ata_intel_new_setmode;
     }
 
     /* SATA parts can be either compat or AHCI */
     else {
 	/* force all ports active "the legacy way" */
 	pci_write_config(dev, 0x92, pci_read_config(dev, 0x92, 2) | 0x0f, 2);
 
 	ctlr->ch_attach = ata_intel_ch_attach;
 	ctlr->ch_detach = ata_pci_ch_detach;
 	ctlr->reset = ata_intel_reset;
 
 	/* BAR(5) may point to SATA interface registers */
 	if ((ctlr->chip->cfg1 & INTEL_ICH7)) {
 		ctlr->r_type2 = SYS_RES_MEMORY;
 		ctlr->r_rid2 = PCIR_BAR(5);
 		ctlr->r_res2 = bus_alloc_resource_any(dev, ctlr->r_type2,
 		    &ctlr->r_rid2, RF_ACTIVE);
 		if (ctlr->r_res2 != NULL) {
 			/* Set SCRAE bit to enable registers access. */
 			pci_write_config(dev, 0x94,
 			    pci_read_config(dev, 0x94, 4) | (1 << 9), 4);
 			/* Set Ports Implemented register bits. */
 			ATA_OUTL(ctlr->r_res2, 0x0C,
 			    ATA_INL(ctlr->r_res2, 0x0C) | 0xf);
 		}
 	/* Skip BAR(5) on ICH8M Apples, system locks up on access. */
 	} else if (ctlr->chip->chipid != ATA_I82801HBM_S1 ||
 	    pci_get_subvendor(dev) != 0x106b) {
 		ctlr->r_type2 = SYS_RES_IOPORT;
 		ctlr->r_rid2 = PCIR_BAR(5);
 		ctlr->r_res2 = bus_alloc_resource_any(dev, ctlr->r_type2,
 		    &ctlr->r_rid2, RF_ACTIVE);
 	}
 	if (ctlr->r_res2 != NULL ||
 	    (ctlr->chip->cfg1 & INTEL_ICH5))
 		ctlr->getrev = ata_intel_sata_getrev;
 	ctlr->setmode = ata_sata_setmode;
     }
     return 0;
 }
 
 static int
 ata_intel_chipdeinit(device_t dev)
 {
 	struct ata_pci_controller *ctlr = device_get_softc(dev);
 	struct ata_intel_data *data;
 
 	data = ctlr->chipset_data;
 	mtx_destroy(&data->lock);
 	free(data, M_ATAPCI);
 	ctlr->chipset_data = NULL;
 	return (0);
 }
 
 static int
 ata_intel_ch_attach(device_t dev)
 {
 	struct ata_pci_controller *ctlr;
 	struct ata_channel *ch;
 	u_char *smap;
 	u_int map;
 
 	/* setup the usual register normal pci style */
 	if (ata_pci_ch_attach(dev))
 		return (ENXIO);
 
 	ctlr = device_get_softc(device_get_parent(dev));
 	ch = device_get_softc(dev);
 
 	/* if r_res2 is valid it points to SATA interface registers */
 	if (ctlr->r_res2) {
 		ch->r_io[ATA_IDX_ADDR].res = ctlr->r_res2;
 		ch->r_io[ATA_IDX_ADDR].offset = 0x00;
 		ch->r_io[ATA_IDX_DATA].res = ctlr->r_res2;
 		ch->r_io[ATA_IDX_DATA].offset = 0x04;
 	}
 
 	ch->flags |= ATA_ALWAYS_DMASTAT;
 	if (ctlr->chip->max_dma >= ATA_SA150) {
 		smap = ATA_INTEL_SMAP(ctlr, ch);
 		map = pci_read_config(device_get_parent(dev), 0x90, 1);
 		if (ctlr->chip->cfg1 & INTEL_ICH5) {
 			map &= 0x07;
 			if ((map & 0x04) == 0) {
 				ch->flags |= ATA_SATA;
 				ch->flags |= ATA_NO_SLAVE;
 				smap[0] = (map & 0x01) ^ ch->unit;
 				smap[1] = 0;
 			} else if ((map & 0x02) == 0 && ch->unit == 0) {
 				ch->flags |= ATA_SATA;
 				smap[0] = (map & 0x01) ? 1 : 0;
 				smap[1] = (map & 0x01) ? 0 : 1;
 			} else if ((map & 0x02) != 0 && ch->unit == 1) {
 				ch->flags |= ATA_SATA;
 				smap[0] = (map & 0x01) ? 1 : 0;
 				smap[1] = (map & 0x01) ? 0 : 1;
 			}
 		} else if (ctlr->chip->cfg1 & INTEL_6CH2) {
 			ch->flags |= ATA_SATA;
 			ch->flags |= ATA_NO_SLAVE;
 			smap[0] = (ch->unit == 0) ? 0 : 1;
 			smap[1] = 0;
 		} else {
 			map &= 0x03;
 			if (map == 0x00) {
 				ch->flags |= ATA_SATA;
 				smap[0] = (ch->unit == 0) ? 0 : 1;
 				smap[1] = (ch->unit == 0) ? 2 : 3;
 			} else if (map == 0x02 && ch->unit == 0) {
 				ch->flags |= ATA_SATA;
 				smap[0] = 0;
 				smap[1] = 2;
 			} else if (map == 0x01 && ch->unit == 1) {
 				ch->flags |= ATA_SATA;
 				smap[0] = 1;
 				smap[1] = 3;
 			}
 		}
 		if (ch->flags & ATA_SATA) {
 			if ((ctlr->chip->cfg1 & INTEL_ICH5)) {
 				ch->hw.pm_read = ata_intel_sata_cscr_read;
 				ch->hw.pm_write = ata_intel_sata_cscr_write;
 			} else if (ctlr->r_res2) {
 				if ((ctlr->chip->cfg1 & INTEL_ICH7)) {
 					ch->hw.pm_read = ata_intel_sata_ahci_read;
 					ch->hw.pm_write = ata_intel_sata_ahci_write;
 				} else if (ata_intel_sata_sidpr_test(dev)) {
 					ch->hw.pm_read = ata_intel_sata_sidpr_read;
 					ch->hw.pm_write = ata_intel_sata_sidpr_write;
 				};
 			}
 			if (ch->hw.pm_write != NULL) {
 				ch->flags |= ATA_PERIODIC_POLL;
 				ch->hw.status = ata_intel_sata_status;
 				ata_sata_scr_write(ch, 0,
 				    ATA_SERROR, 0xffffffff);
 				if ((ch->flags & ATA_NO_SLAVE) == 0) {
 					ata_sata_scr_write(ch, 1,
 					    ATA_SERROR, 0xffffffff);
 				}
 			}
 		} else
 			ctlr->setmode = ata_intel_new_setmode;
 		if (ctlr->chip->max_dma >= ATA_SA600)
 			ch->flags |= ATA_USE_16BIT;
 	} else if (ctlr->chip->chipid != ATA_ISCH)
 		ch->flags |= ATA_CHECKS_CABLE;
 	return (0);
 }
 
 static void
 ata_intel_reset(device_t dev)
 {
 	device_t parent = device_get_parent(dev);
 	struct ata_pci_controller *ctlr = device_get_softc(parent);
 	struct ata_channel *ch = device_get_softc(dev);
 	int mask, pshift, timeout, devs;
 	u_char *smap;
 	uint16_t pcs;
 
 	/* In combined mode, skip SATA stuff for PATA channel. */
 	if ((ch->flags & ATA_SATA) == 0)
 		return (ata_generic_reset(dev));
 
 	/* Do hard-reset on respective SATA ports. */
 	smap = ATA_INTEL_SMAP(ctlr, ch);
 	mask = 1 << smap[0];
 	if ((ch->flags & ATA_NO_SLAVE) == 0)
 		mask |= (1 << smap[1]);
 	pci_write_config(parent, 0x92,
 	    pci_read_config(parent, 0x92, 2) & ~mask, 2);
-	DELAY(10);
+	DELAY(100);
 	pci_write_config(parent, 0x92,
 	    pci_read_config(parent, 0x92, 2) | mask, 2);
 
 	/* Wait up to 1 sec for "connect well". */
 	if (ctlr->chip->cfg1 & (INTEL_6CH | INTEL_6CH2))
 		pshift = 8;
 	else
 		pshift = 4;
 	for (timeout = 0; timeout < 100 ; timeout++) {
 		pcs = (pci_read_config(parent, 0x92, 2) >> pshift) & mask;
 		if ((pcs == mask) && (ATA_IDX_INB(ch, ATA_STATUS) != 0xff))
 			break;
 		ata_udelay(10000);
 	}
 
 	if (bootverbose)
 		device_printf(dev, "SATA reset: ports status=0x%02x\n", pcs);
 	/* If any device found, do soft-reset. */
 	if (ch->hw.pm_read != NULL) {
 		devs = ata_sata_phy_reset(dev, 0, 2) ? ATA_ATA_MASTER : 0;
 		if ((ch->flags & ATA_NO_SLAVE) == 0)
 			devs |= ata_sata_phy_reset(dev, 1, 2) ?
 			    ATA_ATA_SLAVE : 0;
 	} else {
 		devs = (pcs & (1 << smap[0])) ? ATA_ATA_MASTER : 0;
 		if ((ch->flags & ATA_NO_SLAVE) == 0)
 			devs |= (pcs & (1 << smap[1])) ?
 			    ATA_ATA_SLAVE : 0;
 	}
 	if (devs) {
 		ata_generic_reset(dev);
 		/* Reset may give fake slave when only ATAPI master present. */
 		ch->devices &= (devs | (devs * ATA_ATAPI_MASTER));
 	} else
 		ch->devices = 0;
 }
 
 static int
 ata_intel_old_setmode(device_t dev, int target, int mode)
 {
 	device_t parent = device_get_parent(dev);
 	struct ata_pci_controller *ctlr = device_get_softc(parent);
 
 	mode = min(mode, ctlr->chip->max_dma);
 	return (mode);
 }
 
 static int
 ata_intel_new_setmode(device_t dev, int target, int mode)
 {
 	device_t parent = device_get_parent(dev);
 	struct ata_pci_controller *ctlr = device_get_softc(parent);
 	struct ata_channel *ch = device_get_softc(dev);
 	int devno = (ch->unit << 1) + target;
 	int piomode;
 	u_int32_t reg40 = pci_read_config(parent, 0x40, 4);
 	u_int8_t reg44 = pci_read_config(parent, 0x44, 1);
 	u_int8_t reg48 = pci_read_config(parent, 0x48, 1);
 	u_int16_t reg4a = pci_read_config(parent, 0x4a, 2);
 	u_int16_t reg54 = pci_read_config(parent, 0x54, 2);
 	u_int32_t mask40 = 0, new40 = 0;
 	u_int8_t mask44 = 0, new44 = 0;
 	static const uint8_t timings[] =
 	    { 0x00, 0x00, 0x10, 0x21, 0x23, 0x00, 0x21, 0x23 };
 	static const uint8_t utimings[] =
 	    { 0x00, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02 };
 
 	/* In combined mode, skip PATA stuff for SATA channel. */
 	if (ch->flags & ATA_SATA)
 		return (ata_sata_setmode(dev, target, mode));
 
 	mode = min(mode, ctlr->chip->max_dma);
 	if (ata_dma_check_80pin && mode > ATA_UDMA2 &&
 	    !(reg54 & (0x10 << devno))) {
 		ata_print_cable(dev, "controller");
 		mode = ATA_UDMA2;
 	}
 	/* Enable/disable UDMA and set timings. */
 	if (mode >= ATA_UDMA0) {
 	    pci_write_config(parent, 0x48, reg48 | (0x0001 << devno), 2);
 	    pci_write_config(parent, 0x4a,
 		(reg4a & ~(0x3 << (devno << 2))) |
 		(utimings[mode & ATA_MODE_MASK] << (devno<<2)), 2);
 	    piomode = ATA_PIO4;
 	} else {
 	    pci_write_config(parent, 0x48, reg48 & ~(0x0001 << devno), 2);
 	    pci_write_config(parent, 0x4a, (reg4a & ~(0x3 << (devno << 2))),2);
 	    piomode = mode;
 	}
 	reg54 |= 0x0400;
 	/* Set UDMA reference clock (33/66/133MHz). */
 	reg54 &= ~(0x1001 << devno);
 	if (mode >= ATA_UDMA5)
 	    reg54 |= (0x1000 << devno);
 	else if (mode >= ATA_UDMA3)
 	    reg54 |= (0x1 << devno);
 	pci_write_config(parent, 0x54, reg54, 2);
 	/* Allow PIO/WDMA timing controls. */
 	reg40 &= ~0x00ff00ff;
 	reg40 |= 0x40774077;
 	/* Set PIO/WDMA timings. */
 	if (target == 0) {
 	    mask40 = 0x3300;
 	    new40 = timings[ata_mode2idx(piomode)] << 8;
 	} else {
 	    mask44 = 0x0f;
 	    new44 = ((timings[ata_mode2idx(piomode)] & 0x30) >> 2) |
 		    (timings[ata_mode2idx(piomode)] & 0x03);
 	}
 	if (ch->unit) {
 	    mask40 <<= 16;
 	    new40 <<= 16;
 	    mask44 <<= 4;
 	    new44 <<= 4;
 	}
 	pci_write_config(parent, 0x40, (reg40 & ~mask40) | new40, 4);
 	pci_write_config(parent, 0x44, (reg44 & ~mask44) | new44, 1);
 	return (mode);
 }
 
 static int
 ata_intel_sch_setmode(device_t dev, int target, int mode)
 {
 	device_t parent = device_get_parent(dev);
 	struct ata_pci_controller *ctlr = device_get_softc(parent);
 	u_int8_t dtim = 0x80 + (target << 2);
 	u_int32_t tim = pci_read_config(parent, dtim, 4);
 	int piomode;
 
 	mode = min(mode, ctlr->chip->max_dma);
 	if (mode >= ATA_UDMA0) {
 		tim |= (0x1 << 31);
 		tim &= ~(0x7 << 16);
 		tim |= ((mode & ATA_MODE_MASK) << 16);
 		piomode = ATA_PIO4;
 	} else if (mode >= ATA_WDMA0) {
 		tim &= ~(0x1 << 31);
 		tim &= ~(0x3 << 8);
 		tim |= ((mode & ATA_MODE_MASK) << 8);
 		piomode = (mode == ATA_WDMA0) ? ATA_PIO0 :
 		    (mode == ATA_WDMA1) ? ATA_PIO3 : ATA_PIO4;
 	} else
 		piomode = mode;
 	tim &= ~(0x7);
 	tim |= (piomode & 0x7);
 	pci_write_config(parent, dtim, tim, 4);
 	return (mode);
 }
 
 static int
 ata_intel_sata_getrev(device_t dev, int target)
 {
 	struct ata_channel *ch = device_get_softc(dev);
 	uint32_t status;
 
 	if (ata_sata_scr_read(ch, target, ATA_SSTATUS, &status) == 0)
 		return ((status & 0x0f0) >> 4);
 	return (0xff);
 }
 
 static int
 ata_intel_sata_status(device_t dev)
 {
 	struct ata_channel *ch = device_get_softc(dev);
 
 	ata_sata_phy_check_events(dev, 0);
 	if ((ch->flags & ATA_NO_SLAVE) == 0)
 		ata_sata_phy_check_events(dev, 1);
 
 	return ata_pci_status(dev);
 }
 
 static int
 ata_intel_sata_ahci_read(device_t dev, int port, int reg, u_int32_t *result)
 {
 	struct ata_pci_controller *ctlr;
 	struct ata_channel *ch;
 	device_t parent;
 	u_char *smap;
 	int offset;
 
 	parent = device_get_parent(dev);
 	ctlr = device_get_softc(parent);
 	ch = device_get_softc(dev);
 	port = (port == 1) ? 1 : 0;
 	smap = ATA_INTEL_SMAP(ctlr, ch);
 	offset = 0x100 + smap[port] * 0x80;
 	switch (reg) {
 	case ATA_SSTATUS:
 	    reg = 0x28;
 	    break;
 	case ATA_SCONTROL:
 	    reg = 0x2c;
 	    break;
 	case ATA_SERROR:
 	    reg = 0x30;
 	    break;
 	default:
 	    return (EINVAL);
 	}
 	*result = ATA_INL(ctlr->r_res2, offset + reg);
 	return (0);
 }
 
 static int
 ata_intel_sata_cscr_read(device_t dev, int port, int reg, u_int32_t *result)
 {
 	struct ata_pci_controller *ctlr;
 	struct ata_channel *ch;
 	device_t parent;
 	u_char *smap;
 
 	parent = device_get_parent(dev);
 	ctlr = device_get_softc(parent);
 	ch = device_get_softc(dev);
 	smap = ATA_INTEL_SMAP(ctlr, ch);
 	port = (port == 1) ? 1 : 0;
 	switch (reg) {
 	case ATA_SSTATUS:
 	    reg = 0;
 	    break;
 	case ATA_SERROR:
 	    reg = 1;
 	    break;
 	case ATA_SCONTROL:
 	    reg = 2;
 	    break;
 	default:
 	    return (EINVAL);
 	}
 	ATA_INTEL_LOCK(ctlr);
 	pci_write_config(parent, 0xa0,
 	    0x50 + smap[port] * 0x10 + reg * 4, 4);
 	*result = pci_read_config(parent, 0xa4, 4);
 	ATA_INTEL_UNLOCK(ctlr);
 	return (0);
 }
 
 static int
 ata_intel_sata_sidpr_read(device_t dev, int port, int reg, u_int32_t *result)
 {
 	struct ata_pci_controller *ctlr;
 	struct ata_channel *ch;
 	device_t parent;
 
 	parent = device_get_parent(dev);
 	ctlr = device_get_softc(parent);
 	ch = device_get_softc(dev);
 	port = (port == 1) ? 1 : 0;
 	switch (reg) {
 	case ATA_SSTATUS:
 	    reg = 0;
 	    break;
 	case ATA_SCONTROL:
 	    reg = 1;
 	    break;
 	case ATA_SERROR:
 	    reg = 2;
 	    break;
 	default:
 	    return (EINVAL);
 	}
 	ATA_INTEL_LOCK(ctlr);
 	ATA_IDX_OUTL(ch, ATA_IDX_ADDR, ((ch->unit * 2 + port) << 8) + reg);
 	*result = ATA_IDX_INL(ch, ATA_IDX_DATA);
 	ATA_INTEL_UNLOCK(ctlr);
 	return (0);
 }
 
 static int
 ata_intel_sata_ahci_write(device_t dev, int port, int reg, u_int32_t value)
 {
 	struct ata_pci_controller *ctlr;
 	struct ata_channel *ch;
 	device_t parent;
 	u_char *smap;
 	int offset;
 
 	parent = device_get_parent(dev);
 	ctlr = device_get_softc(parent);
 	ch = device_get_softc(dev);
 	port = (port == 1) ? 1 : 0;
 	smap = ATA_INTEL_SMAP(ctlr, ch);
 	offset = 0x100 + smap[port] * 0x80;
 	switch (reg) {
 	case ATA_SSTATUS:
 	    reg = 0x28;
 	    break;
 	case ATA_SCONTROL:
 	    reg = 0x2c;
 	    break;
 	case ATA_SERROR:
 	    reg = 0x30;
 	    break;
 	default:
 	    return (EINVAL);
 	}
 	ATA_OUTL(ctlr->r_res2, offset + reg, value);
 	return (0);
 }
 
 static int
 ata_intel_sata_cscr_write(device_t dev, int port, int reg, u_int32_t value)
 {
 	struct ata_pci_controller *ctlr;
 	struct ata_channel *ch;
 	device_t parent;
 	u_char *smap;
 
 	parent = device_get_parent(dev);
 	ctlr = device_get_softc(parent);
 	ch = device_get_softc(dev);
 	smap = ATA_INTEL_SMAP(ctlr, ch);
 	port = (port == 1) ? 1 : 0;
 	switch (reg) {
 	case ATA_SSTATUS:
 	    reg = 0;
 	    break;
 	case ATA_SERROR:
 	    reg = 1;
 	    break;
 	case ATA_SCONTROL:
 	    reg = 2;
 	    break;
 	default:
 	    return (EINVAL);
 	}
 	ATA_INTEL_LOCK(ctlr);
 	pci_write_config(parent, 0xa0,
 	    0x50 + smap[port] * 0x10 + reg * 4, 4);
 	pci_write_config(parent, 0xa4, value, 4);
 	ATA_INTEL_UNLOCK(ctlr);
 	return (0);
 }
 
 static int
 ata_intel_sata_sidpr_write(device_t dev, int port, int reg, u_int32_t value)
 {
 	struct ata_pci_controller *ctlr;
 	struct ata_channel *ch;
 	device_t parent;
 
 	parent = device_get_parent(dev);
 	ctlr = device_get_softc(parent);
 	ch = device_get_softc(dev);
 	port = (port == 1) ? 1 : 0;
 	switch (reg) {
 	case ATA_SSTATUS:
 	    reg = 0;
 	    break;
 	case ATA_SCONTROL:
 	    reg = 1;
 	    break;
 	case ATA_SERROR:
 	    reg = 2;
 	    break;
 	default:
 	    return (EINVAL);
 	}
 	ATA_INTEL_LOCK(ctlr);
 	ATA_IDX_OUTL(ch, ATA_IDX_ADDR, ((ch->unit * 2 + port) << 8) + reg);
 	ATA_IDX_OUTL(ch, ATA_IDX_DATA, value);
 	ATA_INTEL_UNLOCK(ctlr);
 	return (0);
 }
 
 static int
 ata_intel_sata_sidpr_test(device_t dev)
 {
 	struct ata_channel *ch = device_get_softc(dev);
 	int port;
 	uint32_t val;
 
 	port = (ch->flags & ATA_NO_SLAVE) ? 0 : 1;
 	for (; port >= 0; port--) {
 		ata_intel_sata_sidpr_read(dev, port, ATA_SCONTROL, &val);
 		if ((val & ATA_SC_IPM_MASK) ==
 		    (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER))
 			return (1);
 		val |= ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER;
 		ata_intel_sata_sidpr_write(dev, port, ATA_SCONTROL, val);
 		ata_intel_sata_sidpr_read(dev, port, ATA_SCONTROL, &val);
 		if ((val & ATA_SC_IPM_MASK) ==
 		    (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER))
 			return (1);
 	}
 	if (bootverbose)
 		device_printf(dev,
 		    "SControl registers are not functional: %08x\n", val);
 	return (0);
 }
 
 static int
 ata_intel_31244_ch_attach(device_t dev)
 {
     struct ata_pci_controller *ctlr = device_get_softc(device_get_parent(dev));
     struct ata_channel *ch = device_get_softc(dev);
     int i;
     int ch_offset;
 
     ata_pci_dmainit(dev);
 
     ch_offset = 0x200 + ch->unit * 0x200;
 
     for (i = ATA_DATA; i < ATA_MAX_RES; i++)
 	ch->r_io[i].res = ctlr->r_res2;
 
     /* setup ATA registers */
     ch->r_io[ATA_DATA].offset = ch_offset + 0x00;
     ch->r_io[ATA_FEATURE].offset = ch_offset + 0x06;
     ch->r_io[ATA_COUNT].offset = ch_offset + 0x08;
     ch->r_io[ATA_SECTOR].offset = ch_offset + 0x0c;
     ch->r_io[ATA_CYL_LSB].offset = ch_offset + 0x10;
     ch->r_io[ATA_CYL_MSB].offset = ch_offset + 0x14;
     ch->r_io[ATA_DRIVE].offset = ch_offset + 0x18;
     ch->r_io[ATA_COMMAND].offset = ch_offset + 0x1d;
     ch->r_io[ATA_ERROR].offset = ch_offset + 0x04;
     ch->r_io[ATA_STATUS].offset = ch_offset + 0x1c;
     ch->r_io[ATA_ALTSTAT].offset = ch_offset + 0x28;
     ch->r_io[ATA_CONTROL].offset = ch_offset + 0x29;
 
     /* setup DMA registers */
     ch->r_io[ATA_SSTATUS].offset = ch_offset + 0x100;
     ch->r_io[ATA_SERROR].offset = ch_offset + 0x104;
     ch->r_io[ATA_SCONTROL].offset = ch_offset + 0x108;
 
     /* setup SATA registers */
     ch->r_io[ATA_BMCMD_PORT].offset = ch_offset + 0x70;
     ch->r_io[ATA_BMSTAT_PORT].offset = ch_offset + 0x72;
     ch->r_io[ATA_BMDTP_PORT].offset = ch_offset + 0x74;
 
     ch->flags |= ATA_NO_SLAVE;
     ch->flags |= ATA_SATA;
     ata_pci_hw(dev);
     ch->hw.status = ata_intel_31244_status;
     ch->hw.tf_write = ata_intel_31244_tf_write;
 
     /* enable PHY state change interrupt */
     ATA_OUTL(ctlr->r_res2, 0x4,
 	     ATA_INL(ctlr->r_res2, 0x04) | (0x01 << (ch->unit << 3)));
     return 0;
 }
 
 static int
 ata_intel_31244_ch_detach(device_t dev)
 {
 
     ata_pci_dmafini(dev);
     return (0);
 }
 
 static int
 ata_intel_31244_status(device_t dev)
 {
     /* do we have any PHY events ? */
     ata_sata_phy_check_events(dev, -1);
 
     /* any drive action to take care of ? */
     return ata_pci_status(dev);
 }
 
 static void
 ata_intel_31244_tf_write(struct ata_request *request)
 {
     struct ata_channel *ch = device_get_softc(request->parent);
 
     if (request->flags & ATA_R_48BIT) {
 	ATA_IDX_OUTW(ch, ATA_FEATURE, request->u.ata.feature);
 	ATA_IDX_OUTW(ch, ATA_COUNT, request->u.ata.count);
 	ATA_IDX_OUTW(ch, ATA_SECTOR, ((request->u.ata.lba >> 16) & 0xff00) |
 				      (request->u.ata.lba & 0x00ff));
 	ATA_IDX_OUTW(ch, ATA_CYL_LSB, ((request->u.ata.lba >> 24) & 0xff00) |
 				       ((request->u.ata.lba >> 8) & 0x00ff));
 	ATA_IDX_OUTW(ch, ATA_CYL_MSB, ((request->u.ata.lba >> 32) & 0xff00) | 
 				       ((request->u.ata.lba >> 16) & 0x00ff));
 	ATA_IDX_OUTW(ch, ATA_DRIVE, ATA_D_LBA | ATA_DEV(request->unit));
     }
     else {
 	ATA_IDX_OUTB(ch, ATA_FEATURE, request->u.ata.feature);
 	ATA_IDX_OUTB(ch, ATA_COUNT, request->u.ata.count);
 	    ATA_IDX_OUTB(ch, ATA_SECTOR, request->u.ata.lba);
 	    ATA_IDX_OUTB(ch, ATA_CYL_LSB, request->u.ata.lba >> 8);
 	    ATA_IDX_OUTB(ch, ATA_CYL_MSB, request->u.ata.lba >> 16);
 	    ATA_IDX_OUTB(ch, ATA_DRIVE,
 			 ATA_D_IBM | ATA_D_LBA | ATA_DEV(request->unit) |
 			 ((request->u.ata.lba >> 24) & 0x0f));
     }
 }
 
 static void
 ata_intel_31244_reset(device_t dev)
 {
     struct ata_channel *ch = device_get_softc(dev);
 
     if (ata_sata_phy_reset(dev, -1, 1))
 	ata_generic_reset(dev);
     else
 	ch->devices = 0;
 }
 
 ATA_DECLARE_DRIVER(ata_intel);
Index: projects/powernv/dev/hwpmc/hwpmc_mod.c
===================================================================
--- projects/powernv/dev/hwpmc/hwpmc_mod.c	(revision 290990)
+++ projects/powernv/dev/hwpmc/hwpmc_mod.c	(revision 290991)
@@ -1,5191 +1,5211 @@
 /*-
  * Copyright (c) 2003-2008 Joseph Koshy
  * Copyright (c) 2007 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/pmc.h>
 #include <sys/pmckern.h>
 #include <sys/pmclog.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 
 #include <sys/linker.h>		/* needs to be after <sys/malloc.h> */
 
 #include <machine/atomic.h>
 #include <machine/md_var.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 #include "hwpmc_soft.h"
 
 /*
  * Types
  */
 
 enum pmc_flags {
 	PMC_FLAG_NONE	  = 0x00, /* do nothing */
 	PMC_FLAG_REMOVE   = 0x01, /* atomically remove entry from hash */
 	PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */
 };
 
 /*
  * The offset in sysent where the syscall is allocated.
  */
 
 static int pmc_syscall_num = NO_SYSCALL;
 struct pmc_cpu		**pmc_pcpu;	 /* per-cpu state */
 pmc_value_t		*pmc_pcpu_saved; /* saved PMC values: CSW handling */
 
 #define	PMC_PCPU_SAVED(C,R)	pmc_pcpu_saved[(R) + md->pmd_npmc*(C)]
 
 struct mtx_pool		*pmc_mtxpool;
 static int		*pmc_pmcdisp;	 /* PMC row dispositions */
 
 #define	PMC_ROW_DISP_IS_FREE(R)		(pmc_pmcdisp[(R)] == 0)
 #define	PMC_ROW_DISP_IS_THREAD(R)	(pmc_pmcdisp[(R)] > 0)
 #define	PMC_ROW_DISP_IS_STANDALONE(R)	(pmc_pmcdisp[(R)] < 0)
 
 #define	PMC_MARK_ROW_FREE(R) do {					  \
 	pmc_pmcdisp[(R)] = 0;						  \
 } while (0)
 
 #define	PMC_MARK_ROW_STANDALONE(R) do {					  \
 	KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
 		    __LINE__));						  \
 	atomic_add_int(&pmc_pmcdisp[(R)], -1);				  \
 	KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()),		  \
 		("[pmc,%d] row disposition error", __LINE__));		  \
 } while (0)
 
 #define	PMC_UNMARK_ROW_STANDALONE(R) do { 				  \
 	atomic_add_int(&pmc_pmcdisp[(R)], 1);				  \
 	KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
 		    __LINE__));						  \
 } while (0)
 
 #define	PMC_MARK_ROW_THREAD(R) do {					  \
 	KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
 		    __LINE__));						  \
 	atomic_add_int(&pmc_pmcdisp[(R)], 1);				  \
 } while (0)
 
 #define	PMC_UNMARK_ROW_THREAD(R) do {					  \
 	atomic_add_int(&pmc_pmcdisp[(R)], -1);				  \
 	KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
 		    __LINE__));						  \
 } while (0)
 
 
 /* various event handlers */
 static eventhandler_tag	pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag,
     pmc_kld_unload_tag;
 
 /* Module statistics */
 struct pmc_op_getdriverstats pmc_stats;
 
 /* Machine/processor dependent operations */
 static struct pmc_mdep  *md;
 
 /*
  * Hash tables mapping owner processes and target threads to PMCs.
  */
 
 struct mtx pmc_processhash_mtx;		/* spin mutex */
 static u_long pmc_processhashmask;
 static LIST_HEAD(pmc_processhash, pmc_process)	*pmc_processhash;
 
 /*
  * Hash table of PMC owner descriptors.  This table is protected by
  * the shared PMC "sx" lock.
  */
 
 static u_long pmc_ownerhashmask;
 static LIST_HEAD(pmc_ownerhash, pmc_owner)	*pmc_ownerhash;
 
 /*
  * List of PMC owners with system-wide sampling PMCs.
  */
 
 static LIST_HEAD(, pmc_owner)			pmc_ss_owners;
 
 
 /*
  * A map of row indices to classdep structures.
  */
 static struct pmc_classdep **pmc_rowindex_to_classdep;
 
 /*
  * Prototypes
  */
 
 #ifdef	HWPMC_DEBUG
 static int	pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS);
 static int	pmc_debugflags_parse(char *newstr, char *fence);
 #endif
 
 static int	load(struct module *module, int cmd, void *arg);
 static int	pmc_attach_process(struct proc *p, struct pmc *pm);
 static struct pmc *pmc_allocate_pmc_descriptor(void);
 static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p);
 static int	pmc_attach_one_process(struct proc *p, struct pmc *pm);
 static int	pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
     int cpu);
 static int	pmc_can_attach(struct pmc *pm, struct proc *p);
 static void	pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf);
 static void	pmc_cleanup(void);
 static int	pmc_detach_process(struct proc *p, struct pmc *pm);
 static int	pmc_detach_one_process(struct proc *p, struct pmc *pm,
     int flags);
 static void	pmc_destroy_owner_descriptor(struct pmc_owner *po);
 static void	pmc_destroy_pmc_descriptor(struct pmc *pm);
 static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p);
 static int	pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm);
 static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
     pmc_id_t pmc);
 static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
     uint32_t mode);
 static void	pmc_force_context_switch(void);
 static void	pmc_link_target_process(struct pmc *pm,
     struct pmc_process *pp);
 static void	pmc_log_all_process_mappings(struct pmc_owner *po);
 static void	pmc_log_kernel_mappings(struct pmc *pm);
 static void	pmc_log_process_mappings(struct pmc_owner *po, struct proc *p);
 static void	pmc_maybe_remove_owner(struct pmc_owner *po);
 static void	pmc_process_csw_in(struct thread *td);
 static void	pmc_process_csw_out(struct thread *td);
 static void	pmc_process_exit(void *arg, struct proc *p);
 static void	pmc_process_fork(void *arg, struct proc *p1,
     struct proc *p2, int n);
 static void	pmc_process_samples(int cpu, int soft);
 static void	pmc_release_pmc_descriptor(struct pmc *pmc);
 static void	pmc_remove_owner(struct pmc_owner *po);
 static void	pmc_remove_process_descriptor(struct pmc_process *pp);
 static void	pmc_restore_cpu_binding(struct pmc_binding *pb);
 static void	pmc_save_cpu_binding(struct pmc_binding *pb);
 static void	pmc_select_cpu(int cpu);
 static int	pmc_start(struct pmc *pm);
 static int	pmc_stop(struct pmc *pm);
 static int	pmc_syscall_handler(struct thread *td, void *syscall_args);
 static void	pmc_unlink_target_process(struct pmc *pmc,
     struct pmc_process *pp);
 static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp);
 static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp);
 static struct pmc_mdep *pmc_generic_cpu_initialize(void);
 static void pmc_generic_cpu_finalize(struct pmc_mdep *md);
 
 /*
  * Kernel tunables and sysctl(8) interface.
  */
 
 SYSCTL_DECL(_kern_hwpmc);
 
 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN,
     &pmc_callchaindepth, 0, "depth of call chain records");
 
 #ifdef	HWPMC_DEBUG
 struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
 char	pmc_debugstr[PMC_DEBUG_STRSIZE];
 TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr,
     sizeof(pmc_debugstr));
 SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags,
     CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
     0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags");
 #endif
 
 /*
  * kern.hwpmc.hashrows -- determines the number of rows in the
  * of the hash table used to look up threads
  */
 
 static int pmc_hashsize = PMC_HASH_SIZE;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN,
     &pmc_hashsize, 0, "rows in hash tables");
 
 /*
  * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU
  */
 
 static int pmc_nsamples = PMC_NSAMPLES;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN,
     &pmc_nsamples, 0, "number of PC samples per CPU");
 
 
 /*
  * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
  */
 
 static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN,
     &pmc_mtxpool_size, 0, "size of spin mutex pool");
 
 
 /*
  * security.bsd.unprivileged_syspmcs -- allow non-root processes to
  * allocate system-wide PMCs.
  *
  * Allowing unprivileged processes to allocate system PMCs is convenient
  * if system-wide measurements need to be taken concurrently with other
  * per-process measurements.  This feature is turned off by default.
  */
 
 static int pmc_unprivileged_syspmcs = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN,
     &pmc_unprivileged_syspmcs, 0,
     "allow unprivileged process to allocate system PMCs");
 
 /*
  * Hash function.  Discard the lower 2 bits of the pointer since
  * these are always zero for our uses.  The hash multiplier is
  * round((2^LONG_BIT) * ((sqrt(5)-1)/2)).
  */
 
 #if	LONG_BIT == 64
 #define	_PMC_HM		11400714819323198486u
 #elif	LONG_BIT == 32
 #define	_PMC_HM		2654435769u
 #else
 #error 	Must know the size of 'long' to compile
 #endif
 
 #define	PMC_HASH_PTR(P,M)	((((unsigned long) (P) >> 2) * _PMC_HM) & (M))
 
 /*
  * Syscall structures
  */
 
 /* The `sysent' for the new syscall */
 static struct sysent pmc_sysent = {
 	2,			/* sy_narg */
 	pmc_syscall_handler	/* sy_call */
 };
 
 static struct syscall_module_data pmc_syscall_mod = {
 	load,
 	NULL,
 	&pmc_syscall_num,
 	&pmc_sysent,
 #if (__FreeBSD_version >= 1100000)
 	{ 0, NULL },
 	SY_THR_STATIC_KLD,
 #else
 	{ 0, NULL }
 #endif
 };
 
 static moduledata_t pmc_mod = {
 	PMC_MODULE_NAME,
 	syscall_module_handler,
 	&pmc_syscall_mod
 };
 
 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY);
 MODULE_VERSION(pmc, PMC_VERSION);
 
 #ifdef	HWPMC_DEBUG
 enum pmc_dbgparse_state {
 	PMCDS_WS,		/* in whitespace */
 	PMCDS_MAJOR,		/* seen a major keyword */
 	PMCDS_MINOR
 };
 
 static int
 pmc_debugflags_parse(char *newstr, char *fence)
 {
 	char c, *p, *q;
 	struct pmc_debugflags *tmpflags;
 	int error, found, *newbits, tmp;
 	size_t kwlen;
 
 	tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO);
 
 	p = newstr;
 	error = 0;
 
 	for (; p < fence && (c = *p); p++) {
 
 		/* skip white space */
 		if (c == ' ' || c == '\t')
 			continue;
 
 		/* look for a keyword followed by "=" */
 		for (q = p; p < fence && (c = *p) && c != '='; p++)
 			;
 		if (c != '=') {
 			error = EINVAL;
 			goto done;
 		}
 
 		kwlen = p - q;
 		newbits = NULL;
 
 		/* lookup flag group name */
 #define	DBG_SET_FLAG_MAJ(S,F)						\
 		if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0)	\
 			newbits = &tmpflags->pdb_ ## F;
 
 		DBG_SET_FLAG_MAJ("cpu",		CPU);
 		DBG_SET_FLAG_MAJ("csw",		CSW);
 		DBG_SET_FLAG_MAJ("logging",	LOG);
 		DBG_SET_FLAG_MAJ("module",	MOD);
 		DBG_SET_FLAG_MAJ("md", 		MDP);
 		DBG_SET_FLAG_MAJ("owner",	OWN);
 		DBG_SET_FLAG_MAJ("pmc",		PMC);
 		DBG_SET_FLAG_MAJ("process",	PRC);
 		DBG_SET_FLAG_MAJ("sampling", 	SAM);
 
 		if (newbits == NULL) {
 			error = EINVAL;
 			goto done;
 		}
 
 		p++;		/* skip the '=' */
 
 		/* Now parse the individual flags */
 		tmp = 0;
 	newflag:
 		for (q = p; p < fence && (c = *p); p++)
 			if (c == ' ' || c == '\t' || c == ',')
 				break;
 
 		/* p == fence or c == ws or c == "," or c == 0 */
 
 		if ((kwlen = p - q) == 0) {
 			*newbits = tmp;
 			continue;
 		}
 
 		found = 0;
 #define	DBG_SET_FLAG_MIN(S,F)						\
 		if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0)	\
 			tmp |= found = (1 << PMC_DEBUG_MIN_ ## F)
 
 		/* a '*' denotes all possible flags in the group */
 		if (kwlen == 1 && *q == '*')
 			tmp = found = ~0;
 		/* look for individual flag names */
 		DBG_SET_FLAG_MIN("allocaterow", ALR);
 		DBG_SET_FLAG_MIN("allocate",	ALL);
 		DBG_SET_FLAG_MIN("attach",	ATT);
 		DBG_SET_FLAG_MIN("bind",	BND);
 		DBG_SET_FLAG_MIN("config",	CFG);
 		DBG_SET_FLAG_MIN("exec",	EXC);
 		DBG_SET_FLAG_MIN("exit",	EXT);
 		DBG_SET_FLAG_MIN("find",	FND);
 		DBG_SET_FLAG_MIN("flush",	FLS);
 		DBG_SET_FLAG_MIN("fork",	FRK);
 		DBG_SET_FLAG_MIN("getbuf",	GTB);
 		DBG_SET_FLAG_MIN("hook",	PMH);
 		DBG_SET_FLAG_MIN("init",	INI);
 		DBG_SET_FLAG_MIN("intr",	INT);
 		DBG_SET_FLAG_MIN("linktarget",	TLK);
 		DBG_SET_FLAG_MIN("mayberemove", OMR);
 		DBG_SET_FLAG_MIN("ops",		OPS);
 		DBG_SET_FLAG_MIN("read",	REA);
 		DBG_SET_FLAG_MIN("register",	REG);
 		DBG_SET_FLAG_MIN("release",	REL);
 		DBG_SET_FLAG_MIN("remove",	ORM);
 		DBG_SET_FLAG_MIN("sample",	SAM);
 		DBG_SET_FLAG_MIN("scheduleio",	SIO);
 		DBG_SET_FLAG_MIN("select",	SEL);
 		DBG_SET_FLAG_MIN("signal",	SIG);
 		DBG_SET_FLAG_MIN("swi",		SWI);
 		DBG_SET_FLAG_MIN("swo",		SWO);
 		DBG_SET_FLAG_MIN("start",	STA);
 		DBG_SET_FLAG_MIN("stop",	STO);
 		DBG_SET_FLAG_MIN("syscall",	PMS);
 		DBG_SET_FLAG_MIN("unlinktarget", TUL);
 		DBG_SET_FLAG_MIN("write",	WRI);
 		if (found == 0) {
 			/* unrecognized flag name */
 			error = EINVAL;
 			goto done;
 		}
 
 		if (c == 0 || c == ' ' || c == '\t') {	/* end of flag group */
 			*newbits = tmp;
 			continue;
 		}
 
 		p++;
 		goto newflag;
 	}
 
 	/* save the new flag set */
 	bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags));
 
  done:
 	free(tmpflags, M_PMC);
 	return error;
 }
 
 static int
 pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	char *fence, *newstr;
 	int error;
 	unsigned int n;
 
 	(void) arg1; (void) arg2; /* unused parameters */
 
 	n = sizeof(pmc_debugstr);
 	newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO);
 	(void) strlcpy(newstr, pmc_debugstr, n);
 
 	error = sysctl_handle_string(oidp, newstr, n, req);
 
 	/* if there is a new string, parse and copy it */
 	if (error == 0 && req->newptr != NULL) {
 		fence = newstr + (n < req->newlen ? n : req->newlen + 1);
 		if ((error = pmc_debugflags_parse(newstr, fence)) == 0)
 			(void) strlcpy(pmc_debugstr, newstr,
 			    sizeof(pmc_debugstr));
 	}
 
 	free(newstr, M_PMC);
 
 	return error;
 }
 #endif
 
 /*
  * Map a row index to a classdep structure and return the adjusted row
  * index for the PMC class index.
  */
 static struct pmc_classdep *
 pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri)
 {
 	struct pmc_classdep *pcd;
 
 	(void) md;
 
 	KASSERT(ri >= 0 && ri < md->pmd_npmc,
 	    ("[pmc,%d] illegal row-index %d", __LINE__, ri));
 
 	pcd = pmc_rowindex_to_classdep[ri];
 
 	KASSERT(pcd != NULL,
 	    ("[pmc,%d] ri %d null pcd", __LINE__, ri));
 
 	*adjri = ri - pcd->pcd_ri;
 
 	KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num,
 	    ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri));
 
 	return (pcd);
 }
 
 /*
  * Concurrency Control
  *
  * The driver manages the following data structures:
  *
  *   - target process descriptors, one per target process
  *   - owner process descriptors (and attached lists), one per owner process
  *   - lookup hash tables for owner and target processes
  *   - PMC descriptors (and attached lists)
  *   - per-cpu hardware state
  *   - the 'hook' variable through which the kernel calls into
  *     this module
  *   - the machine hardware state (managed by the MD layer)
  *
  * These data structures are accessed from:
  *
  * - thread context-switch code
  * - interrupt handlers (possibly on multiple cpus)
  * - kernel threads on multiple cpus running on behalf of user
  *   processes doing system calls
  * - this driver's private kernel threads
  *
  * = Locks and Locking strategy =
  *
  * The driver uses four locking strategies for its operation:
  *
  * - The global SX lock "pmc_sx" is used to protect internal
  *   data structures.
  *
  *   Calls into the module by syscall() start with this lock being
  *   held in exclusive mode.  Depending on the requested operation,
  *   the lock may be downgraded to 'shared' mode to allow more
  *   concurrent readers into the module.  Calls into the module from
  *   other parts of the kernel acquire the lock in shared mode.
  *
  *   This SX lock is held in exclusive mode for any operations that
  *   modify the linkages between the driver's internal data structures.
  *
  *   The 'pmc_hook' function pointer is also protected by this lock.
  *   It is only examined with the sx lock held in exclusive mode.  The
  *   kernel module is allowed to be unloaded only with the sx lock held
  *   in exclusive mode.  In normal syscall handling, after acquiring the
  *   pmc_sx lock we first check that 'pmc_hook' is non-null before
  *   proceeding.  This prevents races between the thread unloading the module
  *   and other threads seeking to use the module.
  *
  * - Lookups of target process structures and owner process structures
  *   cannot use the global "pmc_sx" SX lock because these lookups need
  *   to happen during context switches and in other critical sections
  *   where sleeping is not allowed.  We protect these lookup tables
  *   with their own private spin-mutexes, "pmc_processhash_mtx" and
  *   "pmc_ownerhash_mtx".
  *
  * - Interrupt handlers work in a lock free manner.  At interrupt
  *   time, handlers look at the PMC pointer (phw->phw_pmc) configured
  *   when the PMC was started.  If this pointer is NULL, the interrupt
  *   is ignored after updating driver statistics.  We ensure that this
  *   pointer is set (using an atomic operation if necessary) before the
  *   PMC hardware is started.  Conversely, this pointer is unset atomically
  *   only after the PMC hardware is stopped.
  *
  *   We ensure that everything needed for the operation of an
  *   interrupt handler is available without it needing to acquire any
  *   locks.  We also ensure that a PMC's software state is destroyed only
  *   after the PMC is taken off hardware (on all CPUs).
  *
  * - Context-switch handling with process-private PMCs needs more
  *   care.
  *
  *   A given process may be the target of multiple PMCs.  For example,
  *   PMCATTACH and PMCDETACH may be requested by a process on one CPU
  *   while the target process is running on another.  A PMC could also
  *   be getting released because its owner is exiting.  We tackle
  *   these situations in the following manner:
  *
  *   - each target process structure 'pmc_process' has an array
  *     of 'struct pmc *' pointers, one for each hardware PMC.
  *
  *   - At context switch IN time, each "target" PMC in RUNNING state
  *     gets started on hardware and a pointer to each PMC is copied into
  *     the per-cpu phw array.  The 'runcount' for the PMC is
  *     incremented.
  *
  *   - At context switch OUT time, all process-virtual PMCs are stopped
  *     on hardware.  The saved value is added to the PMCs value field
  *     only if the PMC is in a non-deleted state (the PMCs state could
  *     have changed during the current time slice).
  *
  *     Note that since in-between a switch IN on a processor and a switch
  *     OUT, the PMC could have been released on another CPU.  Therefore
  *     context switch OUT always looks at the hardware state to turn
  *     OFF PMCs and will update a PMC's saved value only if reachable
  *     from the target process record.
  *
  *   - OP PMCRELEASE could be called on a PMC at any time (the PMC could
  *     be attached to many processes at the time of the call and could
  *     be active on multiple CPUs).
  *
  *     We prevent further scheduling of the PMC by marking it as in
  *     state 'DELETED'.  If the runcount of the PMC is non-zero then
  *     this PMC is currently running on a CPU somewhere.  The thread
  *     doing the PMCRELEASE operation waits by repeatedly doing a
  *     pause() till the runcount comes to zero.
  *
  * The contents of a PMC descriptor (struct pmc) are protected using
  * a spin-mutex.  In order to save space, we use a mutex pool.
  *
  * In terms of lock types used by witness(4), we use:
  * - Type "pmc-sx", used by the global SX lock.
  * - Type "pmc-sleep", for sleep mutexes used by logger threads.
  * - Type "pmc-per-proc", for protecting PMC owner descriptors.
  * - Type "pmc-leaf", used for all other spin mutexes.
  */
 
 /*
  * save the cpu binding of the current kthread
  */
 
 static void
 pmc_save_cpu_binding(struct pmc_binding *pb)
 {
 	PMCDBG0(CPU,BND,2, "save-cpu");
 	thread_lock(curthread);
 	pb->pb_bound = sched_is_bound(curthread);
 	pb->pb_cpu   = curthread->td_oncpu;
 	thread_unlock(curthread);
 	PMCDBG1(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu);
 }
 
 /*
  * restore the cpu binding of the current thread
  */
 
 static void
 pmc_restore_cpu_binding(struct pmc_binding *pb)
 {
 	PMCDBG2(CPU,BND,2, "restore-cpu curcpu=%d restore=%d",
 	    curthread->td_oncpu, pb->pb_cpu);
 	thread_lock(curthread);
 	if (pb->pb_bound)
 		sched_bind(curthread, pb->pb_cpu);
 	else
 		sched_unbind(curthread);
 	thread_unlock(curthread);
 	PMCDBG0(CPU,BND,2, "restore-cpu done");
 }
 
 /*
  * move execution over the specified cpu and bind it there.
  */
 
 static void
 pmc_select_cpu(int cpu)
 {
 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d] bad cpu number %d", __LINE__, cpu));
 
 	/* Never move to an inactive CPU. */
 	KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive "
 	    "CPU %d", __LINE__, cpu));
 
 	PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d", cpu);
 	thread_lock(curthread);
 	sched_bind(curthread, cpu);
 	thread_unlock(curthread);
 
 	KASSERT(curthread->td_oncpu == cpu,
 	    ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__,
 		cpu, curthread->td_oncpu));
 
 	PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d ok", cpu);
 }
 
 /*
  * Force a context switch.
  *
  * We do this by pause'ing for 1 tick -- invoking mi_switch() is not
  * guaranteed to force a context switch.
  */
 
 static void
 pmc_force_context_switch(void)
 {
 
 	pause("pmcctx", 1);
 }
 
 /*
  * Get the file name for an executable.  This is a simple wrapper
  * around vn_fullpath(9).
  */
 
 static void
 pmc_getfilename(struct vnode *v, char **fullpath, char **freepath)
 {
 
 	*fullpath = "unknown";
 	*freepath = NULL;
 	vn_fullpath(curthread, v, fullpath, freepath);
 }
 
 /*
  * remove an process owning PMCs
  */
 
 void
 pmc_remove_owner(struct pmc_owner *po)
 {
 	struct pmc *pm, *tmp;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	PMCDBG1(OWN,ORM,1, "remove-owner po=%p", po);
 
 	/* Remove descriptor from the owner hash table */
 	LIST_REMOVE(po, po_next);
 
 	/* release all owned PMC descriptors */
 	LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) {
 		PMCDBG1(OWN,ORM,2, "pmc=%p", pm);
 		KASSERT(pm->pm_owner == po,
 		    ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po));
 
 		pmc_release_pmc_descriptor(pm);	/* will unlink from the list */
 		pmc_destroy_pmc_descriptor(pm);
 	}
 
 	KASSERT(po->po_sscount == 0,
 	    ("[pmc,%d] SS count not zero", __LINE__));
 	KASSERT(LIST_EMPTY(&po->po_pmcs),
 	    ("[pmc,%d] PMC list not empty", __LINE__));
 
 	/* de-configure the log file if present */
 	if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		pmclog_deconfigure_log(po);
 }
 
 /*
  * remove an owner process record if all conditions are met.
  */
 
 static void
 pmc_maybe_remove_owner(struct pmc_owner *po)
 {
 
 	PMCDBG1(OWN,OMR,1, "maybe-remove-owner po=%p", po);
 
 	/*
 	 * Remove owner record if
 	 * - this process does not own any PMCs
 	 * - this process has not allocated a system-wide sampling buffer
 	 */
 
 	if (LIST_EMPTY(&po->po_pmcs) &&
 	    ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) {
 		pmc_remove_owner(po);
 		pmc_destroy_owner_descriptor(po);
 	}
 }
 
 /*
  * Add an association between a target process and a PMC.
  */
 
 static void
 pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
 {
 	int ri;
 	struct pmc_target *pt;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	KASSERT(pm != NULL && pp != NULL,
 	    ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
 	KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
 	    ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d",
 		__LINE__, pm, pp->pp_proc->p_pid));
 	KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1),
 	    ("[pmc,%d] Illegal reference count %d for process record %p",
 		__LINE__, pp->pp_refcnt, (void *) pp));
 
 	ri = PMC_TO_ROWINDEX(pm);
 
 	PMCDBG3(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p",
 	    pm, ri, pp);
 
 #ifdef	HWPMC_DEBUG
 	LIST_FOREACH(pt, &pm->pm_targets, pt_next)
 	    if (pt->pt_process == pp)
 		    KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets",
 				__LINE__, pp, pm));
 #endif
 
 	pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO);
 	pt->pt_process = pp;
 
 	LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next);
 
 	atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc,
 	    (uintptr_t)pm);
 
 	if (pm->pm_owner->po_owner == pp->pp_proc)
 		pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER;
 
 	/*
 	 * Initialize the per-process values at this row index.
 	 */
 	pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ?
 	    pm->pm_sc.pm_reloadcount : 0;
 
 	pp->pp_refcnt++;
 
 }
 
 /*
  * Removes the association between a target process and a PMC.
  */
 
 static void
 pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
 {
 	int ri;
 	struct proc *p;
 	struct pmc_target *ptgt;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	KASSERT(pm != NULL && pp != NULL,
 	    ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
 
 	KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc,
 	    ("[pmc,%d] Illegal ref count %d on process record %p",
 		__LINE__, pp->pp_refcnt, (void *) pp));
 
 	ri = PMC_TO_ROWINDEX(pm);
 
 	PMCDBG3(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p",
 	    pm, ri, pp);
 
 	KASSERT(pp->pp_pmcs[ri].pp_pmc == pm,
 	    ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__,
 		ri, pm, pp->pp_pmcs[ri].pp_pmc));
 
 	pp->pp_pmcs[ri].pp_pmc = NULL;
 	pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0;
 
 	/* Remove owner-specific flags */
 	if (pm->pm_owner->po_owner == pp->pp_proc) {
 		pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS;
 		pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER;
 	}
 
 	pp->pp_refcnt--;
 
 	/* Remove the target process from the PMC structure */
 	LIST_FOREACH(ptgt, &pm->pm_targets, pt_next)
 		if (ptgt->pt_process == pp)
 			break;
 
 	KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found "
 		    "in pmc %p", __LINE__, pp->pp_proc, pp, pm));
 
 	LIST_REMOVE(ptgt, pt_next);
 	free(ptgt, M_PMC);
 
 	/* if the PMC now lacks targets, send the owner a SIGIO */
 	if (LIST_EMPTY(&pm->pm_targets)) {
 		p = pm->pm_owner->po_owner;
 		PROC_LOCK(p);
 		kern_psignal(p, SIGIO);
 		PROC_UNLOCK(p);
 
 		PMCDBG2(PRC,SIG,2, "signalling proc=%p signal=%d", p,
 		    SIGIO);
 	}
 }
 
 /*
  * Check if PMC 'pm' may be attached to target process 't'.
  */
 
 static int
 pmc_can_attach(struct pmc *pm, struct proc *t)
 {
 	struct proc *o;		/* pmc owner */
 	struct ucred *oc, *tc;	/* owner, target credentials */
 	int decline_attach, i;
 
 	/*
 	 * A PMC's owner can always attach that PMC to itself.
 	 */
 
 	if ((o = pm->pm_owner->po_owner) == t)
 		return 0;
 
 	PROC_LOCK(o);
 	oc = o->p_ucred;
 	crhold(oc);
 	PROC_UNLOCK(o);
 
 	PROC_LOCK(t);
 	tc = t->p_ucred;
 	crhold(tc);
 	PROC_UNLOCK(t);
 
 	/*
 	 * The effective uid of the PMC owner should match at least one
 	 * of the {effective,real,saved} uids of the target process.
 	 */
 
 	decline_attach = oc->cr_uid != tc->cr_uid &&
 	    oc->cr_uid != tc->cr_svuid &&
 	    oc->cr_uid != tc->cr_ruid;
 
 	/*
 	 * Every one of the target's group ids, must be in the owner's
 	 * group list.
 	 */
 	for (i = 0; !decline_attach && i < tc->cr_ngroups; i++)
 		decline_attach = !groupmember(tc->cr_groups[i], oc);
 
 	/* check the read and saved gids too */
 	if (decline_attach == 0)
 		decline_attach = !groupmember(tc->cr_rgid, oc) ||
 		    !groupmember(tc->cr_svgid, oc);
 
 	crfree(tc);
 	crfree(oc);
 
 	return !decline_attach;
 }
 
 /*
  * Attach a process to a PMC.
  */
 
 static int
 pmc_attach_one_process(struct proc *p, struct pmc *pm)
 {
 	int ri;
 	char *fullpath, *freepath;
 	struct pmc_process	*pp;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	PMCDBG5(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm,
 	    PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
 
 	/*
 	 * Locate the process descriptor corresponding to process 'p',
 	 * allocating space as needed.
 	 *
 	 * Verify that rowindex 'pm_rowindex' is free in the process
 	 * descriptor.
 	 *
 	 * If not, allocate space for a descriptor and link the
 	 * process descriptor and PMC.
 	 */
 	ri = PMC_TO_ROWINDEX(pm);
 
 	if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL)
 		return ENOMEM;
 
 	if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */
 		return EEXIST;
 
 	if (pp->pp_pmcs[ri].pp_pmc != NULL)
 		return EBUSY;
 
 	pmc_link_target_process(pm, pp);
 
 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) &&
 	    (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0)
 		pm->pm_flags |= PMC_F_NEEDS_LOGFILE;
 
 	pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */
 
 	/* issue an attach event to a configured log file */
 	if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) {
 		pmc_getfilename(p->p_textvp, &fullpath, &freepath);
 		if (p->p_flag & P_KTHREAD) {
 			fullpath = kernelname;
 			freepath = NULL;
 		} else
 			pmclog_process_pmcattach(pm, p->p_pid, fullpath);
 		if (freepath)
 			free(freepath, M_TEMP);
 		if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 			pmc_log_process_mappings(pm->pm_owner, p);
 	}
 	/* mark process as using HWPMCs */
 	PROC_LOCK(p);
 	p->p_flag |= P_HWPMC;
 	PROC_UNLOCK(p);
 
 	return 0;
 }
 
 /*
  * Attach a process and optionally its children
  */
 
 static int
 pmc_attach_process(struct proc *p, struct pmc *pm)
 {
 	int error;
 	struct proc *top;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	PMCDBG5(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm,
 	    PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
 
 
 	/*
 	 * If this PMC successfully allowed a GETMSR operation
 	 * in the past, disallow further ATTACHes.
 	 */
 
 	if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0)
 		return EPERM;
 
 	if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
 		return pmc_attach_one_process(p, pm);
 
 	/*
 	 * Traverse all child processes, attaching them to
 	 * this PMC.
 	 */
 
 	sx_slock(&proctree_lock);
 
 	top = p;
 
 	for (;;) {
 		if ((error = pmc_attach_one_process(p, pm)) != 0)
 			break;
 		if (!LIST_EMPTY(&p->p_children))
 			p = LIST_FIRST(&p->p_children);
 		else for (;;) {
 			if (p == top)
 				goto done;
 			if (LIST_NEXT(p, p_sibling)) {
 				p = LIST_NEXT(p, p_sibling);
 				break;
 			}
 			p = p->p_pptr;
 		}
 	}
 
 	if (error)
 		(void) pmc_detach_process(top, pm);
 
  done:
 	sx_sunlock(&proctree_lock);
 	return error;
 }
 
 /*
  * Detach a process from a PMC.  If there are no other PMCs tracking
  * this process, remove the process structure from its hash table.  If
  * 'flags' contains PMC_FLAG_REMOVE, then free the process structure.
  */
 
 static int
 pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags)
 {
 	int ri;
 	struct pmc_process *pp;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	KASSERT(pm != NULL,
 	    ("[pmc,%d] null pm pointer", __LINE__));
 
 	ri = PMC_TO_ROWINDEX(pm);
 
 	PMCDBG6(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x",
 	    pm, ri, p, p->p_pid, p->p_comm, flags);
 
 	if ((pp = pmc_find_process_descriptor(p, 0)) == NULL)
 		return ESRCH;
 
 	if (pp->pp_pmcs[ri].pp_pmc != pm)
 		return EINVAL;
 
 	pmc_unlink_target_process(pm, pp);
 
 	/* Issue a detach entry if a log file is configured */
 	if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE)
 		pmclog_process_pmcdetach(pm, p->p_pid);
 
 	/*
 	 * If there are no PMCs targetting this process, we remove its
 	 * descriptor from the target hash table and unset the P_HWPMC
 	 * flag in the struct proc.
 	 */
 	KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
 	    ("[pmc,%d] Illegal refcnt %d for process struct %p",
 		__LINE__, pp->pp_refcnt, pp));
 
 	if (pp->pp_refcnt != 0)	/* still a target of some PMC */
 		return 0;
 
 	pmc_remove_process_descriptor(pp);
 
 	if (flags & PMC_FLAG_REMOVE)
 		free(pp, M_PMC);
 
 	PROC_LOCK(p);
 	p->p_flag &= ~P_HWPMC;
 	PROC_UNLOCK(p);
 
 	return 0;
 }
 
 /*
  * Detach a process and optionally its descendants from a PMC.
  */
 
 static int
 pmc_detach_process(struct proc *p, struct pmc *pm)
 {
 	struct proc *top;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	PMCDBG5(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm,
 	    PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
 
 	if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
 		return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
 
 	/*
 	 * Traverse all children, detaching them from this PMC.  We
 	 * ignore errors since we could be detaching a PMC from a
 	 * partially attached proc tree.
 	 */
 
 	sx_slock(&proctree_lock);
 
 	top = p;
 
 	for (;;) {
 		(void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
 
 		if (!LIST_EMPTY(&p->p_children))
 			p = LIST_FIRST(&p->p_children);
 		else for (;;) {
 			if (p == top)
 				goto done;
 			if (LIST_NEXT(p, p_sibling)) {
 				p = LIST_NEXT(p, p_sibling);
 				break;
 			}
 			p = p->p_pptr;
 		}
 	}
 
  done:
 	sx_sunlock(&proctree_lock);
 
 	if (LIST_EMPTY(&pm->pm_targets))
 		pm->pm_flags &= ~PMC_F_ATTACH_DONE;
 
 	return 0;
 }
 
 
 /*
  * Thread context switch IN
  */
 
 static void
 pmc_process_csw_in(struct thread *td)
 {
 	int cpu;
 	unsigned int adjri, ri;
 	struct pmc *pm;
 	struct proc *p;
 	struct pmc_cpu *pc;
 	struct pmc_hw *phw;
 	pmc_value_t newvalue;
 	struct pmc_process *pp;
 	struct pmc_classdep *pcd;
 
 	p = td->td_proc;
 
 	if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL)
 		return;
 
 	KASSERT(pp->pp_proc == td->td_proc,
 	    ("[pmc,%d] not my thread state", __LINE__));
 
 	critical_enter(); /* no preemption from this point */
 
 	cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
 
 	PMCDBG5(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
 	    p->p_pid, p->p_comm, pp);
 
 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d] wierd CPU id %d", __LINE__, cpu));
 
 	pc = pmc_pcpu[cpu];
 
 	for (ri = 0; ri < md->pmd_npmc; ri++) {
 
 		if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
 			continue;
 
 		KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
 		    ("[pmc,%d] Target PMC in non-virtual mode (%d)",
 			__LINE__, PMC_TO_MODE(pm)));
 
 		KASSERT(PMC_TO_ROWINDEX(pm) == ri,
 		    ("[pmc,%d] Row index mismatch pmc %d != ri %d",
 			__LINE__, PMC_TO_ROWINDEX(pm), ri));
 
 		/*
 		 * Only PMCs that are marked as 'RUNNING' need
 		 * be placed on hardware.
 		 */
 
 		if (pm->pm_state != PMC_STATE_RUNNING)
 			continue;
 
 		/* increment PMC runcount */
 		atomic_add_rel_int(&pm->pm_runcount, 1);
 
 		/* configure the HWPMC we are going to use. */
 		pcd = pmc_ri_to_classdep(md, ri, &adjri);
 		pcd->pcd_config_pmc(cpu, adjri, pm);
 
 		phw = pc->pc_hwpmcs[ri];
 
 		KASSERT(phw != NULL,
 		    ("[pmc,%d] null hw pointer", __LINE__));
 
 		KASSERT(phw->phw_pmc == pm,
 		    ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__,
 			phw->phw_pmc, pm));
 
 		/*
 		 * Write out saved value and start the PMC.
 		 *
 		 * Sampling PMCs use a per-process value, while
 		 * counting mode PMCs use a per-pmc value that is
 		 * inherited across descendants.
 		 */
 		if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
 			mtx_pool_lock_spin(pmc_mtxpool, pm);
+
+			/*
+			 * Use the saved value calculated after the most recent
+			 * thread switch out to start this counter.  Reset
+			 * the saved count in case another thread from this
+			 * process switches in before any threads switch out.
+			 */
 			newvalue = PMC_PCPU_SAVED(cpu,ri) =
 			    pp->pp_pmcs[ri].pp_pmcval;
+			pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount;
 			mtx_pool_unlock_spin(pmc_mtxpool, pm);
 		} else {
 			KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
 			    ("[pmc,%d] illegal mode=%d", __LINE__,
 			    PMC_TO_MODE(pm)));
 			mtx_pool_lock_spin(pmc_mtxpool, pm);
 			newvalue = PMC_PCPU_SAVED(cpu, ri) =
 			    pm->pm_gv.pm_savedvalue;
 			mtx_pool_unlock_spin(pmc_mtxpool, pm);
 		}
 
 		PMCDBG3(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue);
 
 		pcd->pcd_write_pmc(cpu, adjri, newvalue);
 
 		/* If a sampling mode PMC, reset stalled state. */
 		if (PMC_TO_MODE(pm) == PMC_MODE_TS)
 			CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
 
 		/* Indicate that we desire this to run. */
 		CPU_SET_ATOMIC(cpu, &pm->pm_cpustate);
 
 		/* Start the PMC. */
 		pcd->pcd_start_pmc(cpu, adjri);
 	}
 
 	/*
 	 * perform any other architecture/cpu dependent thread
 	 * switch-in actions.
 	 */
 
 	(void) (*md->pmd_switch_in)(pc, pp);
 
 	critical_exit();
 
 }
 
 /*
  * Thread context switch OUT.
  */
 
 static void
 pmc_process_csw_out(struct thread *td)
 {
 	int cpu;
 	int64_t tmp;
 	struct pmc *pm;
 	struct proc *p;
 	enum pmc_mode mode;
 	struct pmc_cpu *pc;
 	pmc_value_t newvalue;
 	unsigned int adjri, ri;
 	struct pmc_process *pp;
 	struct pmc_classdep *pcd;
 
 
 	/*
 	 * Locate our process descriptor; this may be NULL if
 	 * this process is exiting and we have already removed
 	 * the process from the target process table.
 	 *
 	 * Note that due to kernel preemption, multiple
 	 * context switches may happen while the process is
 	 * exiting.
 	 *
 	 * Note also that if the target process cannot be
 	 * found we still need to deconfigure any PMCs that
 	 * are currently running on hardware.
 	 */
 
 	p = td->td_proc;
 	pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE);
 
 	/*
 	 * save PMCs
 	 */
 
 	critical_enter();
 
 	cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
 
 	PMCDBG5(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
 	    p->p_pid, p->p_comm, pp);
 
 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d wierd CPU id %d", __LINE__, cpu));
 
 	pc = pmc_pcpu[cpu];
 
 	/*
 	 * When a PMC gets unlinked from a target PMC, it will
 	 * be removed from the target's pp_pmc[] array.
 	 *
 	 * However, on a MP system, the target could have been
 	 * executing on another CPU at the time of the unlink.
 	 * So, at context switch OUT time, we need to look at
 	 * the hardware to determine if a PMC is scheduled on
 	 * it.
 	 */
 
 	for (ri = 0; ri < md->pmd_npmc; ri++) {
 
 		pcd = pmc_ri_to_classdep(md, ri, &adjri);
 		pm  = NULL;
 		(void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
 
 		if (pm == NULL)	/* nothing at this row index */
 			continue;
 
 		mode = PMC_TO_MODE(pm);
 		if (!PMC_IS_VIRTUAL_MODE(mode))
 			continue; /* not a process virtual PMC */
 
 		KASSERT(PMC_TO_ROWINDEX(pm) == ri,
 		    ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
 			__LINE__, PMC_TO_ROWINDEX(pm), ri));
 
 		/*
 		 * Change desired state, and then stop if not stalled.
 		 * This two-step dance should avoid race conditions where
 		 * an interrupt re-enables the PMC after this code has
 		 * already checked the pm_stalled flag.
 		 */
 		CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
 		if (!CPU_ISSET(cpu, &pm->pm_stalled))
 			pcd->pcd_stop_pmc(cpu, adjri);
 
 		/* reduce this PMC's runcount */
 		atomic_subtract_rel_int(&pm->pm_runcount, 1);
 
 		/*
 		 * If this PMC is associated with this process,
 		 * save the reading.
 		 */
 
 		if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) {
 
 			KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
 			    ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__,
 				pm, ri, pp->pp_pmcs[ri].pp_pmc));
 
 			KASSERT(pp->pp_refcnt > 0,
 			    ("[pmc,%d] pp refcnt = %d", __LINE__,
 				pp->pp_refcnt));
 
 			pcd->pcd_read_pmc(cpu, adjri, &newvalue);
 
-			tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
-
-			PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd", cpu, ri,
-			    tmp);
-
 			if (mode == PMC_MODE_TS) {
+				PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (samp)",
+				    cpu, ri, PMC_PCPU_SAVED(cpu,ri) - newvalue);
 
 				/*
 				 * For sampling process-virtual PMCs,
-				 * we expect the count to be
-				 * decreasing as the 'value'
-				 * programmed into the PMC is the
-				 * number of events to be seen till
-				 * the next sampling interrupt.
+				 * newvalue is the number of events to be seen
+				 * until the next sampling interrupt.
+				 * We can just add the events left from this
+				 * invocation to the counter, then adjust
+				 * in case we overflow our range.
+				 *
+				 * (Recall that we reload the counter every
+				 * time we use it.)
 				 */
-				if (tmp < 0)
-					tmp += pm->pm_sc.pm_reloadcount;
 				mtx_pool_lock_spin(pmc_mtxpool, pm);
-				pp->pp_pmcs[ri].pp_pmcval -= tmp;
-				if ((int64_t) pp->pp_pmcs[ri].pp_pmcval <= 0)
-					pp->pp_pmcs[ri].pp_pmcval +=
+
+				pp->pp_pmcs[ri].pp_pmcval += newvalue;
+				if (pp->pp_pmcs[ri].pp_pmcval >
+				    pm->pm_sc.pm_reloadcount)
+					pp->pp_pmcs[ri].pp_pmcval -=
 					    pm->pm_sc.pm_reloadcount;
+				KASSERT(pp->pp_pmcs[ri].pp_pmcval > 0 &&
+				    pp->pp_pmcs[ri].pp_pmcval <=
+				    pm->pm_sc.pm_reloadcount,
+				    ("[pmc,%d] pp_pmcval outside of expected "
+				    "range cpu=%d ri=%d pp_pmcval=%jx "
+				    "pm_reloadcount=%jx", __LINE__, cpu, ri,
+				    pp->pp_pmcs[ri].pp_pmcval,
+				    pm->pm_sc.pm_reloadcount));
 				mtx_pool_unlock_spin(pmc_mtxpool, pm);
 
 			} else {
+				tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
+
+				PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (count)",
+				    cpu, ri, tmp);
 
 				/*
 				 * For counting process-virtual PMCs,
 				 * we expect the count to be
 				 * increasing monotonically, modulo a 64
 				 * bit wraparound.
 				 */
 				KASSERT((int64_t) tmp >= 0,
 				    ("[pmc,%d] negative increment cpu=%d "
 				     "ri=%d newvalue=%jx saved=%jx "
 				     "incr=%jx", __LINE__, cpu, ri,
 				     newvalue, PMC_PCPU_SAVED(cpu,ri), tmp));
 
 				mtx_pool_lock_spin(pmc_mtxpool, pm);
 				pm->pm_gv.pm_savedvalue += tmp;
 				pp->pp_pmcs[ri].pp_pmcval += tmp;
 				mtx_pool_unlock_spin(pmc_mtxpool, pm);
 
 				if (pm->pm_flags & PMC_F_LOG_PROCCSW)
 					pmclog_process_proccsw(pm, pp, tmp);
 			}
 		}
 
 		/* mark hardware as free */
 		pcd->pcd_config_pmc(cpu, adjri, NULL);
 	}
 
 	/*
 	 * perform any other architecture/cpu dependent thread
 	 * switch out functions.
 	 */
 
 	(void) (*md->pmd_switch_out)(pc, pp);
 
 	critical_exit();
 }
 
 /*
  * A mapping change for a process.
  */
 
 static void
 pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm)
 {
 	int ri;
 	pid_t pid;
 	char *fullpath, *freepath;
 	const struct pmc *pm;
 	struct pmc_owner *po;
 	const struct pmc_process *pp;
 
 	freepath = fullpath = NULL;
 	pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
 
 	pid = td->td_proc->p_pid;
 
 	/* Inform owners of all system-wide sampling PMCs. */
 	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
 
 	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 		goto done;
 
 	/*
 	 * Inform sampling PMC owners tracking this process.
 	 */
 	for (ri = 0; ri < md->pmd_npmc; ri++)
 		if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
 		    PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 			pmclog_process_map_in(pm->pm_owner,
 			    pid, pkm->pm_address, fullpath);
 
   done:
 	if (freepath)
 		free(freepath, M_TEMP);
 }
 
 
 /*
  * Log an munmap request.
  */
 
 static void
 pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
 {
 	int ri;
 	pid_t pid;
 	struct pmc_owner *po;
 	const struct pmc *pm;
 	const struct pmc_process *pp;
 
 	pid = td->td_proc->p_pid;
 
 	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		pmclog_process_map_out(po, pid, pkm->pm_address,
 		    pkm->pm_address + pkm->pm_size);
 
 	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 		return;
 
 	for (ri = 0; ri < md->pmd_npmc; ri++)
 		if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
 		    PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 			pmclog_process_map_out(pm->pm_owner, pid,
 			    pkm->pm_address, pkm->pm_address + pkm->pm_size);
 }
 
 /*
  * Log mapping information about the kernel.
  */
 
 static void
 pmc_log_kernel_mappings(struct pmc *pm)
 {
 	struct pmc_owner *po;
 	struct pmckern_map_in *km, *kmbase;
 
 	sx_assert(&pmc_sx, SX_LOCKED);
 	KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
 	    ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
 		__LINE__, (void *) pm));
 
 	po = pm->pm_owner;
 
 	if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE)
 		return;
 
 	/*
 	 * Log the current set of kernel modules.
 	 */
 	kmbase = linker_hwpmc_list_objects();
 	for (km = kmbase; km->pm_file != NULL; km++) {
 		PMCDBG2(LOG,REG,1,"%s %p", (char *) km->pm_file,
 		    (void *) km->pm_address);
 		pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
 		    km->pm_file);
 	}
 	free(kmbase, M_LINKER);
 
 	po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE;
 }
 
 /*
  * Log the mappings for a single process.
  */
 
 static void
 pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
 {
 	vm_map_t map;
 	struct vnode *vp;
 	struct vmspace *vm;
 	vm_map_entry_t entry;
 	vm_offset_t last_end;
 	u_int last_timestamp;
 	struct vnode *last_vp;
 	vm_offset_t start_addr;
 	vm_object_t obj, lobj, tobj;
 	char *fullpath, *freepath;
 
 	last_vp = NULL;
 	last_end = (vm_offset_t) 0;
 	fullpath = freepath = NULL;
 
 	if ((vm = vmspace_acquire_ref(p)) == NULL)
 		return;
 
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 
 	for (entry = map->header.next; entry != &map->header; entry = entry->next) {
 
 		if (entry == NULL) {
 			PMCDBG2(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly "
 			    "NULL! pid=%d vm_map=%p\n", p->p_pid, map);
 			break;
 		}
 
 		/*
 		 * We only care about executable map entries.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
 		    !(entry->protection & VM_PROT_EXECUTE) ||
 		    (entry->object.vm_object == NULL)) {
 			continue;
 		}
 
 		obj = entry->object.vm_object;
 		VM_OBJECT_RLOCK(obj);
 
 		/* 
 		 * Walk the backing_object list to find the base
 		 * (non-shadowed) vm_object.
 		 */
 		for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
 			if (tobj != obj)
 				VM_OBJECT_RLOCK(tobj);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			lobj = tobj;
 		}
 
 		/*
 		 * At this point lobj is the base vm_object and it is locked.
 		 */
 		if (lobj == NULL) {
 			PMCDBG3(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d "
 			    "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj);
 			VM_OBJECT_RUNLOCK(obj);
 			continue;
 		}
 
 		vp = vm_object_vnode(lobj);
 		if (vp == NULL) {
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			VM_OBJECT_RUNLOCK(obj);
 			continue;
 		}
 
 		/*
 		 * Skip contiguous regions that point to the same
 		 * vnode, so we don't emit redundant MAP-IN
 		 * directives.
 		 */
 		if (entry->start == last_end && vp == last_vp) {
 			last_end = entry->end;
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			VM_OBJECT_RUNLOCK(obj);
 			continue;
 		}
 
 		/* 
 		 * We don't want to keep the proc's vm_map or this
 		 * vm_object locked while we walk the pathname, since
 		 * vn_fullpath() can sleep.  However, if we drop the
 		 * lock, it's possible for concurrent activity to
 		 * modify the vm_map list.  To protect against this,
 		 * we save the vm_map timestamp before we release the
 		 * lock, and check it after we reacquire the lock
 		 * below.
 		 */
 		start_addr = entry->start;
 		last_end = entry->end;
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		vref(vp);
 		if (lobj != obj)
 			VM_OBJECT_RUNLOCK(lobj);
 
 		VM_OBJECT_RUNLOCK(obj);
 
 		freepath = NULL;
 		pmc_getfilename(vp, &fullpath, &freepath);
 		last_vp = vp;
 
 		vrele(vp);
 
 		vp = NULL;
 		pmclog_process_map_in(po, p->p_pid, start_addr, fullpath);
 		if (freepath)
 			free(freepath, M_TEMP);
 
 		vm_map_lock_read(map);
 
 		/*
 		 * If our saved timestamp doesn't match, this means
 		 * that the vm_map was modified out from under us and
 		 * we can't trust our current "entry" pointer.  Do a
 		 * new lookup for this entry.  If there is no entry
 		 * for this address range, vm_map_lookup_entry() will
 		 * return the previous one, so we always want to go to
 		 * entry->next on the next loop iteration.
 		 * 
 		 * There is an edge condition here that can occur if
 		 * there is no entry at or before this address.  In
 		 * this situation, vm_map_lookup_entry returns
 		 * &map->header, which would cause our loop to abort
 		 * without processing the rest of the map.  However,
 		 * in practice this will never happen for process
 		 * vm_map.  This is because the executable's text
 		 * segment is the first mapping in the proc's address
 		 * space, and this mapping is never removed until the
 		 * process exits, so there will always be a non-header
 		 * entry at or before the requested address for
 		 * vm_map_lookup_entry to return.
 		 */
 		if (map->timestamp != last_timestamp)
 			vm_map_lookup_entry(map, last_end - 1, &entry);
 	}
 
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	return;
 }
 
 /*
  * Log mappings for all processes in the system.
  */
 
 static void
 pmc_log_all_process_mappings(struct pmc_owner *po)
 {
 	struct proc *p, *top;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	if ((p = pfind(1)) == NULL)
 		panic("[pmc,%d] Cannot find init", __LINE__);
 
 	PROC_UNLOCK(p);
 
 	sx_slock(&proctree_lock);
 
 	top = p;
 
 	for (;;) {
 		pmc_log_process_mappings(po, p);
 		if (!LIST_EMPTY(&p->p_children))
 			p = LIST_FIRST(&p->p_children);
 		else for (;;) {
 			if (p == top)
 				goto done;
 			if (LIST_NEXT(p, p_sibling)) {
 				p = LIST_NEXT(p, p_sibling);
 				break;
 			}
 			p = p->p_pptr;
 		}
 	}
  done:
 	sx_sunlock(&proctree_lock);
 }
 
 /*
  * The 'hook' invoked from the kernel proper
  */
 
 
 #ifdef	HWPMC_DEBUG
 const char *pmc_hooknames[] = {
 	/* these strings correspond to PMC_FN_* in <sys/pmckern.h> */
 	"",
 	"EXEC",
 	"CSW-IN",
 	"CSW-OUT",
 	"SAMPLE",
 	"UNUSED1",
 	"UNUSED2",
 	"MMAP",
 	"MUNMAP",
 	"CALLCHAIN-NMI",
 	"CALLCHAIN-SOFT",
 	"SOFTSAMPLING"
 };
 #endif
 
 static int
 pmc_hook_handler(struct thread *td, int function, void *arg)
 {
 
 	PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function,
 	    pmc_hooknames[function], arg);
 
 	switch (function)
 	{
 
 	/*
 	 * Process exec()
 	 */
 
 	case PMC_FN_PROCESS_EXEC:
 	{
 		char *fullpath, *freepath;
 		unsigned int ri;
 		int is_using_hwpmcs;
 		struct pmc *pm;
 		struct proc *p;
 		struct pmc_owner *po;
 		struct pmc_process *pp;
 		struct pmckern_procexec *pk;
 
 		sx_assert(&pmc_sx, SX_XLOCKED);
 
 		p = td->td_proc;
 		pmc_getfilename(p->p_textvp, &fullpath, &freepath);
 
 		pk = (struct pmckern_procexec *) arg;
 
 		/* Inform owners of SS mode PMCs of the exec event. */
 		LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 		    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 			    pmclog_process_procexec(po, PMC_ID_INVALID,
 				p->p_pid, pk->pm_entryaddr, fullpath);
 
 		PROC_LOCK(p);
 		is_using_hwpmcs = p->p_flag & P_HWPMC;
 		PROC_UNLOCK(p);
 
 		if (!is_using_hwpmcs) {
 			if (freepath)
 				free(freepath, M_TEMP);
 			break;
 		}
 
 		/*
 		 * PMCs are not inherited across an exec():  remove any
 		 * PMCs that this process is the owner of.
 		 */
 
 		if ((po = pmc_find_owner_descriptor(p)) != NULL) {
 			pmc_remove_owner(po);
 			pmc_destroy_owner_descriptor(po);
 		}
 
 		/*
 		 * If the process being exec'ed is not the target of any
 		 * PMC, we are done.
 		 */
 		if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) {
 			if (freepath)
 				free(freepath, M_TEMP);
 			break;
 		}
 
 		/*
 		 * Log the exec event to all monitoring owners.  Skip
 		 * owners who have already recieved the event because
 		 * they had system sampling PMCs active.
 		 */
 		for (ri = 0; ri < md->pmd_npmc; ri++)
 			if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
 				po = pm->pm_owner;
 				if (po->po_sscount == 0 &&
 				    po->po_flags & PMC_PO_OWNS_LOGFILE)
 					pmclog_process_procexec(po, pm->pm_id,
 					    p->p_pid, pk->pm_entryaddr,
 					    fullpath);
 			}
 
 		if (freepath)
 			free(freepath, M_TEMP);
 
 
 		PMCDBG4(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d",
 		    p, p->p_pid, p->p_comm, pk->pm_credentialschanged);
 
 		if (pk->pm_credentialschanged == 0) /* no change */
 			break;
 
 		/*
 		 * If the newly exec()'ed process has a different credential
 		 * than before, allow it to be the target of a PMC only if
 		 * the PMC's owner has sufficient priviledge.
 		 */
 
 		for (ri = 0; ri < md->pmd_npmc; ri++)
 			if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL)
 				if (pmc_can_attach(pm, td->td_proc) != 0)
 					pmc_detach_one_process(td->td_proc,
 					    pm, PMC_FLAG_NONE);
 
 		KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
 		    ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__,
 			pp->pp_refcnt, pp));
 
 		/*
 		 * If this process is no longer the target of any
 		 * PMCs, we can remove the process entry and free
 		 * up space.
 		 */
 
 		if (pp->pp_refcnt == 0) {
 			pmc_remove_process_descriptor(pp);
 			free(pp, M_PMC);
 			break;
 		}
 
 	}
 	break;
 
 	case PMC_FN_CSW_IN:
 		pmc_process_csw_in(td);
 		break;
 
 	case PMC_FN_CSW_OUT:
 		pmc_process_csw_out(td);
 		break;
 
 	/*
 	 * Process accumulated PC samples.
 	 *
 	 * This function is expected to be called by hardclock() for
 	 * each CPU that has accumulated PC samples.
 	 *
 	 * This function is to be executed on the CPU whose samples
 	 * are being processed.
 	 */
 	case PMC_FN_DO_SAMPLES:
 
 		/*
 		 * Clear the cpu specific bit in the CPU mask before
 		 * do the rest of the processing.  If the NMI handler
 		 * gets invoked after the "atomic_clear_int()" call
 		 * below but before "pmc_process_samples()" gets
 		 * around to processing the interrupt, then we will
 		 * come back here at the next hardclock() tick (and
 		 * may find nothing to do if "pmc_process_samples()"
 		 * had already processed the interrupt).  We don't
 		 * lose the interrupt sample.
 		 */
 		CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
 		pmc_process_samples(PCPU_GET(cpuid), PMC_HR);
 		pmc_process_samples(PCPU_GET(cpuid), PMC_SR);
 		break;
 
 	case PMC_FN_MMAP:
 		sx_assert(&pmc_sx, SX_LOCKED);
 		pmc_process_mmap(td, (struct pmckern_map_in *) arg);
 		break;
 
 	case PMC_FN_MUNMAP:
 		sx_assert(&pmc_sx, SX_LOCKED);
 		pmc_process_munmap(td, (struct pmckern_map_out *) arg);
 		break;
 
 	case PMC_FN_USER_CALLCHAIN:
 		/*
 		 * Record a call chain.
 		 */
 		KASSERT(td == curthread, ("[pmc,%d] td != curthread",
 		    __LINE__));
 
 		pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR,
 		    (struct trapframe *) arg);
 		td->td_pflags &= ~TDP_CALLCHAIN;
 		break;
 
 	case PMC_FN_USER_CALLCHAIN_SOFT:
 		/*
 		 * Record a call chain.
 		 */
 		KASSERT(td == curthread, ("[pmc,%d] td != curthread",
 		    __LINE__));
 		pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_SR,
 		    (struct trapframe *) arg);
 		td->td_pflags &= ~TDP_CALLCHAIN;
 		break;
 
 	case PMC_FN_SOFT_SAMPLING:
 		/*
 		 * Call soft PMC sampling intr.
 		 */
 		pmc_soft_intr((struct pmckern_soft *) arg);
 		break;
 
 	default:
 #ifdef	HWPMC_DEBUG
 		KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
 #endif
 		break;
 
 	}
 
 	return 0;
 }
 
 /*
  * allocate a 'struct pmc_owner' descriptor in the owner hash table.
  */
 
 static struct pmc_owner *
 pmc_allocate_owner_descriptor(struct proc *p)
 {
 	uint32_t hindex;
 	struct pmc_owner *po;
 	struct pmc_ownerhash *poh;
 
 	hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
 	poh = &pmc_ownerhash[hindex];
 
 	/* allocate space for N pointers and one descriptor struct */
 	po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO);
 	po->po_owner = p;
 	LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */
 
 	TAILQ_INIT(&po->po_logbuffers);
 	mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN);
 
 	PMCDBG4(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p",
 	    p, p->p_pid, p->p_comm, po);
 
 	return po;
 }
 
 static void
 pmc_destroy_owner_descriptor(struct pmc_owner *po)
 {
 
 	PMCDBG4(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)",
 	    po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm);
 
 	mtx_destroy(&po->po_mtx);
 	free(po, M_PMC);
 }
 
 /*
  * find the descriptor corresponding to process 'p', adding or removing it
  * as specified by 'mode'.
  */
 
 static struct pmc_process *
 pmc_find_process_descriptor(struct proc *p, uint32_t mode)
 {
 	uint32_t hindex;
 	struct pmc_process *pp, *ppnew;
 	struct pmc_processhash *pph;
 
 	hindex = PMC_HASH_PTR(p, pmc_processhashmask);
 	pph = &pmc_processhash[hindex];
 
 	ppnew = NULL;
 
 	/*
 	 * Pre-allocate memory in the FIND_ALLOCATE case since we
 	 * cannot call malloc(9) once we hold a spin lock.
 	 */
 	if (mode & PMC_FLAG_ALLOCATE)
 		ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc *
 		    sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO);
 
 	mtx_lock_spin(&pmc_processhash_mtx);
 	LIST_FOREACH(pp, pph, pp_next)
 	    if (pp->pp_proc == p)
 		    break;
 
 	if ((mode & PMC_FLAG_REMOVE) && pp != NULL)
 		LIST_REMOVE(pp, pp_next);
 
 	if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL &&
 	    ppnew != NULL) {
 		ppnew->pp_proc = p;
 		LIST_INSERT_HEAD(pph, ppnew, pp_next);
 		pp = ppnew;
 		ppnew = NULL;
 	}
 	mtx_unlock_spin(&pmc_processhash_mtx);
 
 	if (pp != NULL && ppnew != NULL)
 		free(ppnew, M_PMC);
 
 	return pp;
 }
 
 /*
  * remove a process descriptor from the process hash table.
  */
 
 static void
 pmc_remove_process_descriptor(struct pmc_process *pp)
 {
 	KASSERT(pp->pp_refcnt == 0,
 	    ("[pmc,%d] Removing process descriptor %p with count %d",
 		__LINE__, pp, pp->pp_refcnt));
 
 	mtx_lock_spin(&pmc_processhash_mtx);
 	LIST_REMOVE(pp, pp_next);
 	mtx_unlock_spin(&pmc_processhash_mtx);
 }
 
 
 /*
  * find an owner descriptor corresponding to proc 'p'
  */
 
 static struct pmc_owner *
 pmc_find_owner_descriptor(struct proc *p)
 {
 	uint32_t hindex;
 	struct pmc_owner *po;
 	struct pmc_ownerhash *poh;
 
 	hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
 	poh = &pmc_ownerhash[hindex];
 
 	po = NULL;
 	LIST_FOREACH(po, poh, po_next)
 	    if (po->po_owner == p)
 		    break;
 
 	PMCDBG5(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> "
 	    "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po);
 
 	return po;
 }
 
 /*
  * pmc_allocate_pmc_descriptor
  *
  * Allocate a pmc descriptor and initialize its
  * fields.
  */
 
 static struct pmc *
 pmc_allocate_pmc_descriptor(void)
 {
 	struct pmc *pmc;
 
 	pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO);
 
 	PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc);
 
 	return pmc;
 }
 
 /*
  * Destroy a pmc descriptor.
  */
 
 static void
 pmc_destroy_pmc_descriptor(struct pmc *pm)
 {
 
 	KASSERT(pm->pm_state == PMC_STATE_DELETED ||
 	    pm->pm_state == PMC_STATE_FREE,
 	    ("[pmc,%d] destroying non-deleted PMC", __LINE__));
 	KASSERT(LIST_EMPTY(&pm->pm_targets),
 	    ("[pmc,%d] destroying pmc with targets", __LINE__));
 	KASSERT(pm->pm_owner == NULL,
 	    ("[pmc,%d] destroying pmc attached to an owner", __LINE__));
 	KASSERT(pm->pm_runcount == 0,
 	    ("[pmc,%d] pmc has non-zero run count %d", __LINE__,
 		pm->pm_runcount));
 
 	free(pm, M_PMC);
 }
 
 static void
 pmc_wait_for_pmc_idle(struct pmc *pm)
 {
 #ifdef HWPMC_DEBUG
 	volatile int maxloop;
 
 	maxloop = 100 * pmc_cpu_max();
 #endif
 	/*
 	 * Loop (with a forced context switch) till the PMC's runcount
 	 * comes down to zero.
 	 */
 	while (atomic_load_acq_32(&pm->pm_runcount) > 0) {
 #ifdef HWPMC_DEBUG
 		maxloop--;
 		KASSERT(maxloop > 0,
 		    ("[pmc,%d] (ri%d, rc%d) waiting too long for "
 			"pmc to be free", __LINE__,
 			PMC_TO_ROWINDEX(pm), pm->pm_runcount));
 #endif
 		pmc_force_context_switch();
 	}
 }
 
 /*
  * This function does the following things:
  *
  *  - detaches the PMC from hardware
  *  - unlinks all target threads that were attached to it
  *  - removes the PMC from its owner's list
  *  - destroys the PMC private mutex
  *
  * Once this function completes, the given pmc pointer can be freed by
  * calling pmc_destroy_pmc_descriptor().
  */
 
 static void
 pmc_release_pmc_descriptor(struct pmc *pm)
 {
 	enum pmc_mode mode;
 	struct pmc_hw *phw;
 	u_int adjri, ri, cpu;
 	struct pmc_owner *po;
 	struct pmc_binding pb;
 	struct pmc_process *pp;
 	struct pmc_classdep *pcd;
 	struct pmc_target *ptgt, *tmp;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	KASSERT(pm, ("[pmc,%d] null pmc", __LINE__));
 
 	ri   = PMC_TO_ROWINDEX(pm);
 	pcd  = pmc_ri_to_classdep(md, ri, &adjri);
 	mode = PMC_TO_MODE(pm);
 
 	PMCDBG3(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri,
 	    mode);
 
 	/*
 	 * First, we take the PMC off hardware.
 	 */
 	cpu = 0;
 	if (PMC_IS_SYSTEM_MODE(mode)) {
 
 		/*
 		 * A system mode PMC runs on a specific CPU.  Switch
 		 * to this CPU and turn hardware off.
 		 */
 		pmc_save_cpu_binding(&pb);
 
 		cpu = PMC_TO_CPU(pm);
 
 		pmc_select_cpu(cpu);
 
 		/* switch off non-stalled CPUs */
 		CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
 		if (pm->pm_state == PMC_STATE_RUNNING &&
 		    !CPU_ISSET(cpu, &pm->pm_stalled)) {
 
 			phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
 
 			KASSERT(phw->phw_pmc == pm,
 			    ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)",
 				__LINE__, ri, phw->phw_pmc, pm));
 			PMCDBG2(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri);
 
 			critical_enter();
 			pcd->pcd_stop_pmc(cpu, adjri);
 			critical_exit();
 		}
 
 		PMCDBG2(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri);
 
 		critical_enter();
 		pcd->pcd_config_pmc(cpu, adjri, NULL);
 		critical_exit();
 
 		/* adjust the global and process count of SS mode PMCs */
 		if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) {
 			po = pm->pm_owner;
 			po->po_sscount--;
 			if (po->po_sscount == 0) {
 				atomic_subtract_rel_int(&pmc_ss_count, 1);
 				LIST_REMOVE(po, po_ssnext);
 			}
 		}
 
 		pm->pm_state = PMC_STATE_DELETED;
 
 		pmc_restore_cpu_binding(&pb);
 
 		/*
 		 * We could have references to this PMC structure in
 		 * the per-cpu sample queues.  Wait for the queue to
 		 * drain.
 		 */
 		pmc_wait_for_pmc_idle(pm);
 
 	} else if (PMC_IS_VIRTUAL_MODE(mode)) {
 
 		/*
 		 * A virtual PMC could be running on multiple CPUs at
 		 * a given instant.
 		 *
 		 * By marking its state as DELETED, we ensure that
 		 * this PMC is never further scheduled on hardware.
 		 *
 		 * Then we wait till all CPUs are done with this PMC.
 		 */
 		pm->pm_state = PMC_STATE_DELETED;
 
 
 		/* Wait for the PMCs runcount to come to zero. */
 		pmc_wait_for_pmc_idle(pm);
 
 		/*
 		 * At this point the PMC is off all CPUs and cannot be
 		 * freshly scheduled onto a CPU.  It is now safe to
 		 * unlink all targets from this PMC.  If a
 		 * process-record's refcount falls to zero, we remove
 		 * it from the hash table.  The module-wide SX lock
 		 * protects us from races.
 		 */
 		LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) {
 			pp = ptgt->pt_process;
 			pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */
 
 			PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt);
 
 			/*
 			 * If the target process record shows that no
 			 * PMCs are attached to it, reclaim its space.
 			 */
 
 			if (pp->pp_refcnt == 0) {
 				pmc_remove_process_descriptor(pp);
 				free(pp, M_PMC);
 			}
 		}
 
 		cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */
 
 	}
 
 	/*
 	 * Release any MD resources
 	 */
 	(void) pcd->pcd_release_pmc(cpu, adjri, pm);
 
 	/*
 	 * Update row disposition
 	 */
 
 	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm)))
 		PMC_UNMARK_ROW_STANDALONE(ri);
 	else
 		PMC_UNMARK_ROW_THREAD(ri);
 
 	/* unlink from the owner's list */
 	if (pm->pm_owner) {
 		LIST_REMOVE(pm, pm_next);
 		pm->pm_owner = NULL;
 	}
 }
 
 /*
  * Register an owner and a pmc.
  */
 
 static int
 pmc_register_owner(struct proc *p, struct pmc *pmc)
 {
 	struct pmc_owner *po;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	if ((po = pmc_find_owner_descriptor(p)) == NULL)
 		if ((po = pmc_allocate_owner_descriptor(p)) == NULL)
 			return ENOMEM;
 
 	KASSERT(pmc->pm_owner == NULL,
 	    ("[pmc,%d] attempting to own an initialized PMC", __LINE__));
 	pmc->pm_owner  = po;
 
 	LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next);
 
 	PROC_LOCK(p);
 	p->p_flag |= P_HWPMC;
 	PROC_UNLOCK(p);
 
 	if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		pmclog_process_pmcallocate(pmc);
 
 	PMCDBG2(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p",
 	    po, pmc);
 
 	return 0;
 }
 
 /*
  * Return the current row disposition:
  * == 0 => FREE
  *  > 0 => PROCESS MODE
  *  < 0 => SYSTEM MODE
  */
 
 int
 pmc_getrowdisp(int ri)
 {
 	return pmc_pmcdisp[ri];
 }
 
 /*
  * Check if a PMC at row index 'ri' can be allocated to the current
  * process.
  *
  * Allocation can fail if:
  *   - the current process is already being profiled by a PMC at index 'ri',
  *     attached to it via OP_PMCATTACH.
  *   - the current process has already allocated a PMC at index 'ri'
  *     via OP_ALLOCATE.
  */
 
 static int
 pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu)
 {
 	enum pmc_mode mode;
 	struct pmc *pm;
 	struct pmc_owner *po;
 	struct pmc_process *pp;
 
 	PMCDBG5(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d "
 	    "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu);
 
 	/*
 	 * We shouldn't have already allocated a process-mode PMC at
 	 * row index 'ri'.
 	 *
 	 * We shouldn't have allocated a system-wide PMC on the same
 	 * CPU and same RI.
 	 */
 	if ((po = pmc_find_owner_descriptor(p)) != NULL)
 		LIST_FOREACH(pm, &po->po_pmcs, pm_next) {
 		    if (PMC_TO_ROWINDEX(pm) == ri) {
 			    mode = PMC_TO_MODE(pm);
 			    if (PMC_IS_VIRTUAL_MODE(mode))
 				    return EEXIST;
 			    if (PMC_IS_SYSTEM_MODE(mode) &&
 				(int) PMC_TO_CPU(pm) == cpu)
 				    return EEXIST;
 		    }
 	        }
 
 	/*
 	 * We also shouldn't be the target of any PMC at this index
 	 * since otherwise a PMC_ATTACH to ourselves will fail.
 	 */
 	if ((pp = pmc_find_process_descriptor(p, 0)) != NULL)
 		if (pp->pp_pmcs[ri].pp_pmc)
 			return EEXIST;
 
 	PMCDBG4(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok",
 	    p, p->p_pid, p->p_comm, ri);
 
 	return 0;
 }
 
 /*
  * Check if a given PMC at row index 'ri' can be currently used in
  * mode 'mode'.
  */
 
 static int
 pmc_can_allocate_row(int ri, enum pmc_mode mode)
 {
 	enum pmc_disp	disp;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
 	PMCDBG2(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode);
 
 	if (PMC_IS_SYSTEM_MODE(mode))
 		disp = PMC_DISP_STANDALONE;
 	else
 		disp = PMC_DISP_THREAD;
 
 	/*
 	 * check disposition for PMC row 'ri':
 	 *
 	 * Expected disposition		Row-disposition		Result
 	 *
 	 * STANDALONE			STANDALONE or FREE	proceed
 	 * STANDALONE			THREAD			fail
 	 * THREAD			THREAD or FREE		proceed
 	 * THREAD			STANDALONE		fail
 	 */
 
 	if (!PMC_ROW_DISP_IS_FREE(ri) &&
 	    !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) &&
 	    !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri)))
 		return EBUSY;
 
 	/*
 	 * All OK
 	 */
 
 	PMCDBG2(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode);
 
 	return 0;
 
 }
 
 /*
  * Find a PMC descriptor with user handle 'pmcid' for thread 'td'.
  */
 
 static struct pmc *
 pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid)
 {
 	struct pmc *pm;
 
 	KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc,
 	    ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__,
 		PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc));
 
 	LIST_FOREACH(pm, &po->po_pmcs, pm_next)
 	    if (pm->pm_id == pmcid)
 		    return pm;
 
 	return NULL;
 }
 
 static int
 pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc)
 {
 
 	struct pmc *pm, *opm;
 	struct pmc_owner *po;
 	struct pmc_process *pp;
 
 	KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc,
 	    ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__,
 		PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc));
 	PMCDBG1(PMC,FND,1, "find-pmc id=%d", pmcid);
 
 	if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) {
 		/*
 		 * In case of PMC_F_DESCENDANTS child processes we will not find
 		 * the current process in the owners hash list.  Find the owner
 		 * process first and from there lookup the po.
 		 */
 		if ((pp = pmc_find_process_descriptor(curthread->td_proc,
 		    PMC_FLAG_NONE)) == NULL) {
 			return ESRCH;
 		} else {
 			opm = pp->pp_pmcs[PMC_ID_TO_ROWINDEX(pmcid)].pp_pmc;
 			if (opm == NULL)
 				return ESRCH;
 			if ((opm->pm_flags & (PMC_F_ATTACHED_TO_OWNER|
 			    PMC_F_DESCENDANTS)) != (PMC_F_ATTACHED_TO_OWNER|
 			    PMC_F_DESCENDANTS))
 				return ESRCH;
 			po = opm->pm_owner;
 		}
 	}
 
 	if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL)
 		return EINVAL;
 
 	PMCDBG2(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm);
 
 	*pmc = pm;
 	return 0;
 }
 
 /*
  * Start a PMC.
  */
 
 static int
 pmc_start(struct pmc *pm)
 {
 	enum pmc_mode mode;
 	struct pmc_owner *po;
 	struct pmc_binding pb;
 	struct pmc_classdep *pcd;
 	int adjri, error, cpu, ri;
 
 	KASSERT(pm != NULL,
 	    ("[pmc,%d] null pm", __LINE__));
 
 	mode = PMC_TO_MODE(pm);
 	ri   = PMC_TO_ROWINDEX(pm);
 	pcd  = pmc_ri_to_classdep(md, ri, &adjri);
 
 	error = 0;
 
 	PMCDBG3(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri);
 
 	po = pm->pm_owner;
 
 	/*
 	 * Disallow PMCSTART if a logfile is required but has not been
 	 * configured yet.
 	 */
 	if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
 	    (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
 		return (EDOOFUS);	/* programming error */
 
 	/*
 	 * If this is a sampling mode PMC, log mapping information for
 	 * the kernel modules that are currently loaded.
 	 */
 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 	    pmc_log_kernel_mappings(pm);
 
 	if (PMC_IS_VIRTUAL_MODE(mode)) {
 
 		/*
 		 * If a PMCATTACH has never been done on this PMC,
 		 * attach it to its owner process.
 		 */
 
 		if (LIST_EMPTY(&pm->pm_targets))
 			error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH :
 			    pmc_attach_process(po->po_owner, pm);
 
 		/*
 		 * If the PMC is attached to its owner, then force a context
 		 * switch to ensure that the MD state gets set correctly.
 		 */
 
 		if (error == 0) {
 			pm->pm_state = PMC_STATE_RUNNING;
 			if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER)
 				pmc_force_context_switch();
 		}
 
 		return (error);
 	}
 
 
 	/*
 	 * A system-wide PMC.
 	 *
 	 * Add the owner to the global list if this is a system-wide
 	 * sampling PMC.
 	 */
 
 	if (mode == PMC_MODE_SS) {
 		if (po->po_sscount == 0) {
 			LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
 			atomic_add_rel_int(&pmc_ss_count, 1);
 			PMCDBG1(PMC,OPS,1, "po=%p in global list", po);
 		}
 		po->po_sscount++;
 
 		/*
 		 * Log mapping information for all existing processes in the
 		 * system.  Subsequent mappings are logged as they happen;
 		 * see pmc_process_mmap().
 		 */
 		if (po->po_logprocmaps == 0) {
 			pmc_log_all_process_mappings(po);
 			po->po_logprocmaps = 1;
 		}
 	}
 
 	/*
 	 * Move to the CPU associated with this
 	 * PMC, and start the hardware.
 	 */
 
 	pmc_save_cpu_binding(&pb);
 
 	cpu = PMC_TO_CPU(pm);
 
 	if (!pmc_cpu_is_active(cpu))
 		return (ENXIO);
 
 	pmc_select_cpu(cpu);
 
 	/*
 	 * global PMCs are configured at allocation time
 	 * so write out the initial value and start the PMC.
 	 */
 
 	pm->pm_state = PMC_STATE_RUNNING;
 
 	critical_enter();
 	if ((error = pcd->pcd_write_pmc(cpu, adjri,
 		 PMC_IS_SAMPLING_MODE(mode) ?
 		 pm->pm_sc.pm_reloadcount :
 		 pm->pm_sc.pm_initial)) == 0) {
 		/* If a sampling mode PMC, reset stalled state. */
 		if (PMC_IS_SAMPLING_MODE(mode))
 			CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
 
 		/* Indicate that we desire this to run. Start it. */
 		CPU_SET_ATOMIC(cpu, &pm->pm_cpustate);
 		error = pcd->pcd_start_pmc(cpu, adjri);
 	}
 	critical_exit();
 
 	pmc_restore_cpu_binding(&pb);
 
 	return (error);
 }
 
 /*
  * Stop a PMC.
  */
 
 static int
 pmc_stop(struct pmc *pm)
 {
 	struct pmc_owner *po;
 	struct pmc_binding pb;
 	struct pmc_classdep *pcd;
 	int adjri, cpu, error, ri;
 
 	KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__));
 
 	PMCDBG3(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm,
 	    PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm));
 
 	pm->pm_state = PMC_STATE_STOPPED;
 
 	/*
 	 * If the PMC is a virtual mode one, changing the state to
 	 * non-RUNNING is enough to ensure that the PMC never gets
 	 * scheduled.
 	 *
 	 * If this PMC is current running on a CPU, then it will
 	 * handled correctly at the time its target process is context
 	 * switched out.
 	 */
 
 	if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
 		return 0;
 
 	/*
 	 * A system-mode PMC.  Move to the CPU associated with
 	 * this PMC, and stop the hardware.  We update the
 	 * 'initial count' so that a subsequent PMCSTART will
 	 * resume counting from the current hardware count.
 	 */
 
 	pmc_save_cpu_binding(&pb);
 
 	cpu = PMC_TO_CPU(pm);
 
 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d] illegal cpu=%d", __LINE__, cpu));
 
 	if (!pmc_cpu_is_active(cpu))
 		return ENXIO;
 
 	pmc_select_cpu(cpu);
 
 	ri = PMC_TO_ROWINDEX(pm);
 	pcd = pmc_ri_to_classdep(md, ri, &adjri);
 
 	CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
 	critical_enter();
 	if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0)
 		error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial);
 	critical_exit();
 
 	pmc_restore_cpu_binding(&pb);
 
 	po = pm->pm_owner;
 
 	/* remove this owner from the global list of SS PMC owners */
 	if (PMC_TO_MODE(pm) == PMC_MODE_SS) {
 		po->po_sscount--;
 		if (po->po_sscount == 0) {
 			atomic_subtract_rel_int(&pmc_ss_count, 1);
 			LIST_REMOVE(po, po_ssnext);
 			PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po);
 		}
 	}
 
 	return (error);
 }
 
 
 #ifdef	HWPMC_DEBUG
 static const char *pmc_op_to_name[] = {
 #undef	__PMC_OP
 #define	__PMC_OP(N, D)	#N ,
 	__PMC_OPS()
 	NULL
 };
 #endif
 
 /*
  * The syscall interface
  */
 
 #define	PMC_GET_SX_XLOCK(...) do {		\
 	sx_xlock(&pmc_sx);			\
 	if (pmc_hook == NULL) {			\
 		sx_xunlock(&pmc_sx);		\
 		return __VA_ARGS__;		\
 	}					\
 } while (0)
 
 #define	PMC_DOWNGRADE_SX() do {			\
 	sx_downgrade(&pmc_sx);			\
 	is_sx_downgraded = 1;			\
 } while (0)
 
 static int
 pmc_syscall_handler(struct thread *td, void *syscall_args)
 {
 	int error, is_sx_downgraded, is_sx_locked, op;
 	struct pmc_syscall_args *c;
 	void *arg;
 
 	PMC_GET_SX_XLOCK(ENOSYS);
 
 	DROP_GIANT();
 
 	is_sx_downgraded = 0;
 	is_sx_locked = 1;
 
 	c = (struct pmc_syscall_args *) syscall_args;
 
 	op = c->pmop_code;
 	arg = c->pmop_data;
 
 	PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op,
 	    pmc_op_to_name[op], arg);
 
 	error = 0;
 	atomic_add_int(&pmc_stats.pm_syscalls, 1);
 
 	switch(op)
 	{
 
 
 	/*
 	 * Configure a log file.
 	 *
 	 * XXX This OP will be reworked.
 	 */
 
 	case PMC_OP_CONFIGURELOG:
 	{
 		struct proc *p;
 		struct pmc *pm;
 		struct pmc_owner *po;
 		struct pmc_op_configurelog cl;
 
 		sx_assert(&pmc_sx, SX_XLOCKED);
 
 		if ((error = copyin(arg, &cl, sizeof(cl))) != 0)
 			break;
 
 		/* mark this process as owning a log file */
 		p = td->td_proc;
 		if ((po = pmc_find_owner_descriptor(p)) == NULL)
 			if ((po = pmc_allocate_owner_descriptor(p)) == NULL) {
 				error = ENOMEM;
 				break;
 			}
 
 		/*
 		 * If a valid fd was passed in, try to configure that,
 		 * otherwise if 'fd' was less than zero and there was
 		 * a log file configured, flush its buffers and
 		 * de-configure it.
 		 */
 		if (cl.pm_logfd >= 0) {
 			sx_xunlock(&pmc_sx);
 			is_sx_locked = 0;
 			error = pmclog_configure_log(md, po, cl.pm_logfd);
 		} else if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
 			pmclog_process_closelog(po);
 			error = pmclog_close(po);
 			if (error == 0) {
 				LIST_FOREACH(pm, &po->po_pmcs, pm_next)
 				    if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
 					pm->pm_state == PMC_STATE_RUNNING)
 					    pmc_stop(pm);
 				error = pmclog_deconfigure_log(po);
 			}
 		} else
 			error = EINVAL;
 
 		if (error)
 			break;
 	}
 	break;
 
 	/*
 	 * Flush a log file.
 	 */
 
 	case PMC_OP_FLUSHLOG:
 	{
 		struct pmc_owner *po;
 
 		sx_assert(&pmc_sx, SX_XLOCKED);
 
 		if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 
 		error = pmclog_flush(po);
 	}
 	break;
 
 	/*
 	 * Close a log file.
 	 */
 
 	case PMC_OP_CLOSELOG:
 	{
 		struct pmc_owner *po;
 
 		sx_assert(&pmc_sx, SX_XLOCKED);
 
 		if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 
 		error = pmclog_close(po);
 	}
 	break;
 
 	/*
 	 * Retrieve hardware configuration.
 	 */
 
 	case PMC_OP_GETCPUINFO:	/* CPU information */
 	{
 		struct pmc_op_getcpuinfo gci;
 		struct pmc_classinfo *pci;
 		struct pmc_classdep *pcd;
 		int cl;
 
 		gci.pm_cputype = md->pmd_cputype;
 		gci.pm_ncpu    = pmc_cpu_max();
 		gci.pm_npmc    = md->pmd_npmc;
 		gci.pm_nclass  = md->pmd_nclass;
 		pci = gci.pm_classes;
 		pcd = md->pmd_classdep;
 		for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) {
 			pci->pm_caps  = pcd->pcd_caps;
 			pci->pm_class = pcd->pcd_class;
 			pci->pm_width = pcd->pcd_width;
 			pci->pm_num   = pcd->pcd_num;
 		}
 		error = copyout(&gci, arg, sizeof(gci));
 	}
 	break;
 
 	/*
 	 * Retrieve soft events list.
 	 */
 	case PMC_OP_GETDYNEVENTINFO:
 	{
 		enum pmc_class			cl;
 		enum pmc_event			ev;
 		struct pmc_op_getdyneventinfo	*gei;
 		struct pmc_dyn_event_descr	dev;
 		struct pmc_soft			*ps;
 		uint32_t			nevent;
 
 		sx_assert(&pmc_sx, SX_LOCKED);
 
 		gei = (struct pmc_op_getdyneventinfo *) arg;
 
 		if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0)
 			break;
 
 		/* Only SOFT class is dynamic. */
 		if (cl != PMC_CLASS_SOFT) {
 			error = EINVAL;
 			break;
 		}
 
 		nevent = 0;
 		for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) {
 			ps = pmc_soft_ev_acquire(ev);
 			if (ps == NULL)
 				continue;
 			bcopy(&ps->ps_ev, &dev, sizeof(dev));
 			pmc_soft_ev_release(ps);
 
 			error = copyout(&dev,
 			    &gei->pm_events[nevent],
 			    sizeof(struct pmc_dyn_event_descr));
 			if (error != 0)
 				break;
 			nevent++;
 		}
 		if (error != 0)
 			break;
 
 		error = copyout(&nevent, &gei->pm_nevent,
 		    sizeof(nevent));
 	}
 	break;
 
 	/*
 	 * Get module statistics
 	 */
 
 	case PMC_OP_GETDRIVERSTATS:
 	{
 		struct pmc_op_getdriverstats gms;
 
 		bcopy(&pmc_stats, &gms, sizeof(gms));
 		error = copyout(&gms, arg, sizeof(gms));
 	}
 	break;
 
 
 	/*
 	 * Retrieve module version number
 	 */
 
 	case PMC_OP_GETMODULEVERSION:
 	{
 		uint32_t cv, modv;
 
 		/* retrieve the client's idea of the ABI version */
 		if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0)
 			break;
 		/* don't service clients newer than our driver */
 		modv = PMC_VERSION;
 		if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) {
 			error = EPROGMISMATCH;
 			break;
 		}
 		error = copyout(&modv, arg, sizeof(int));
 	}
 	break;
 
 
 	/*
 	 * Retrieve the state of all the PMCs on a given
 	 * CPU.
 	 */
 
 	case PMC_OP_GETPMCINFO:
 	{
 		int ari;
 		struct pmc *pm;
 		size_t pmcinfo_size;
 		uint32_t cpu, n, npmc;
 		struct pmc_owner *po;
 		struct pmc_binding pb;
 		struct pmc_classdep *pcd;
 		struct pmc_info *p, *pmcinfo;
 		struct pmc_op_getpmcinfo *gpi;
 
 		PMC_DOWNGRADE_SX();
 
 		gpi = (struct pmc_op_getpmcinfo *) arg;
 
 		if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0)
 			break;
 
 		if (cpu >= pmc_cpu_max()) {
 			error = EINVAL;
 			break;
 		}
 
 		if (!pmc_cpu_is_active(cpu)) {
 			error = ENXIO;
 			break;
 		}
 
 		/* switch to CPU 'cpu' */
 		pmc_save_cpu_binding(&pb);
 		pmc_select_cpu(cpu);
 
 		npmc = md->pmd_npmc;
 
 		pmcinfo_size = npmc * sizeof(struct pmc_info);
 		pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK);
 
 		p = pmcinfo;
 
 		for (n = 0; n < md->pmd_npmc; n++, p++) {
 
 			pcd = pmc_ri_to_classdep(md, n, &ari);
 
 			KASSERT(pcd != NULL,
 			    ("[pmc,%d] null pcd ri=%d", __LINE__, n));
 
 			if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0)
 				break;
 
 			if (PMC_ROW_DISP_IS_STANDALONE(n))
 				p->pm_rowdisp = PMC_DISP_STANDALONE;
 			else if (PMC_ROW_DISP_IS_THREAD(n))
 				p->pm_rowdisp = PMC_DISP_THREAD;
 			else
 				p->pm_rowdisp = PMC_DISP_FREE;
 
 			p->pm_ownerpid = -1;
 
 			if (pm == NULL)	/* no PMC associated */
 				continue;
 
 			po = pm->pm_owner;
 
 			KASSERT(po->po_owner != NULL,
 			    ("[pmc,%d] pmc_owner had a null proc pointer",
 				__LINE__));
 
 			p->pm_ownerpid = po->po_owner->p_pid;
 			p->pm_mode     = PMC_TO_MODE(pm);
 			p->pm_event    = pm->pm_event;
 			p->pm_flags    = pm->pm_flags;
 
 			if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 				p->pm_reloadcount =
 				    pm->pm_sc.pm_reloadcount;
 		}
 
 		pmc_restore_cpu_binding(&pb);
 
 		/* now copy out the PMC info collected */
 		if (error == 0)
 			error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size);
 
 		free(pmcinfo, M_PMC);
 	}
 	break;
 
 
 	/*
 	 * Set the administrative state of a PMC.  I.e. whether
 	 * the PMC is to be used or not.
 	 */
 
 	case PMC_OP_PMCADMIN:
 	{
 		int cpu, ri;
 		enum pmc_state request;
 		struct pmc_cpu *pc;
 		struct pmc_hw *phw;
 		struct pmc_op_pmcadmin pma;
 		struct pmc_binding pb;
 
 		sx_assert(&pmc_sx, SX_XLOCKED);
 
 		KASSERT(td == curthread,
 		    ("[pmc,%d] td != curthread", __LINE__));
 
 		error = priv_check(td, PRIV_PMC_MANAGE);
 		if (error)
 			break;
 
 		if ((error = copyin(arg, &pma, sizeof(pma))) != 0)
 			break;
 
 		cpu = pma.pm_cpu;
 
 		if (cpu < 0 || cpu >= (int) pmc_cpu_max()) {
 			error = EINVAL;
 			break;
 		}
 
 		if (!pmc_cpu_is_active(cpu)) {
 			error = ENXIO;
 			break;
 		}
 
 		request = pma.pm_state;
 
 		if (request != PMC_STATE_DISABLED &&
 		    request != PMC_STATE_FREE) {
 			error = EINVAL;
 			break;
 		}
 
 		ri = pma.pm_pmc; /* pmc id == row index */
 		if (ri < 0 || ri >= (int) md->pmd_npmc) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * We can't disable a PMC with a row-index allocated
 		 * for process virtual PMCs.
 		 */
 
 		if (PMC_ROW_DISP_IS_THREAD(ri) &&
 		    request == PMC_STATE_DISABLED) {
 			error = EBUSY;
 			break;
 		}
 
 		/*
 		 * otherwise, this PMC on this CPU is either free or
 		 * in system-wide mode.
 		 */
 
 		pmc_save_cpu_binding(&pb);
 		pmc_select_cpu(cpu);
 
 		pc  = pmc_pcpu[cpu];
 		phw = pc->pc_hwpmcs[ri];
 
 		/*
 		 * XXX do we need some kind of 'forced' disable?
 		 */
 
 		if (phw->phw_pmc == NULL) {
 			if (request == PMC_STATE_DISABLED &&
 			    (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) {
 				phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED;
 				PMC_MARK_ROW_STANDALONE(ri);
 			} else if (request == PMC_STATE_FREE &&
 			    (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) {
 				phw->phw_state |=  PMC_PHW_FLAG_IS_ENABLED;
 				PMC_UNMARK_ROW_STANDALONE(ri);
 			}
 			/* other cases are a no-op */
 		} else
 			error = EBUSY;
 
 		pmc_restore_cpu_binding(&pb);
 	}
 	break;
 
 
 	/*
 	 * Allocate a PMC.
 	 */
 
 	case PMC_OP_PMCALLOCATE:
 	{
 		int adjri, n;
 		u_int cpu;
 		uint32_t caps;
 		struct pmc *pmc;
 		enum pmc_mode mode;
 		struct pmc_hw *phw;
 		struct pmc_binding pb;
 		struct pmc_classdep *pcd;
 		struct pmc_op_pmcallocate pa;
 
 		if ((error = copyin(arg, &pa, sizeof(pa))) != 0)
 			break;
 
 		caps = pa.pm_caps;
 		mode = pa.pm_mode;
 		cpu  = pa.pm_cpu;
 
 		if ((mode != PMC_MODE_SS  &&  mode != PMC_MODE_SC  &&
 		     mode != PMC_MODE_TS  &&  mode != PMC_MODE_TC) ||
 		    (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * Virtual PMCs should only ask for a default CPU.
 		 * System mode PMCs need to specify a non-default CPU.
 		 */
 
 		if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) ||
 		    (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * Check that an inactive CPU is not being asked for.
 		 */
 
 		if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) {
 			error = ENXIO;
 			break;
 		}
 
 		/*
 		 * Refuse an allocation for a system-wide PMC if this
 		 * process has been jailed, or if this process lacks
 		 * super-user credentials and the sysctl tunable
 		 * 'security.bsd.unprivileged_syspmcs' is zero.
 		 */
 
 		if (PMC_IS_SYSTEM_MODE(mode)) {
 			if (jailed(curthread->td_ucred)) {
 				error = EPERM;
 				break;
 			}
 			if (!pmc_unprivileged_syspmcs) {
 				error = priv_check(curthread,
 				    PRIV_PMC_SYSTEM);
 				if (error)
 					break;
 			}
 		}
 
 		/*
 		 * Look for valid values for 'pm_flags'
 		 */
 
 		if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
 		    PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
 			error = EINVAL;
 			break;
 		}
 
 		/* process logging options are not allowed for system PMCs */
 		if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags &
 		    (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * All sampling mode PMCs need to be able to interrupt the
 		 * CPU.
 		 */
 		if (PMC_IS_SAMPLING_MODE(mode))
 			caps |= PMC_CAP_INTERRUPT;
 
 		/* A valid class specifier should have been passed in. */
 		for (n = 0; n < md->pmd_nclass; n++)
 			if (md->pmd_classdep[n].pcd_class == pa.pm_class)
 				break;
 		if (n == md->pmd_nclass) {
 			error = EINVAL;
 			break;
 		}
 
 		/* The requested PMC capabilities should be feasible. */
 		if ((md->pmd_classdep[n].pcd_caps & caps) != caps) {
 			error = EOPNOTSUPP;
 			break;
 		}
 
 		PMCDBG4(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d",
 		    pa.pm_ev, caps, mode, cpu);
 
 		pmc = pmc_allocate_pmc_descriptor();
 		pmc->pm_id    = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class,
 		    PMC_ID_INVALID);
 		pmc->pm_event = pa.pm_ev;
 		pmc->pm_state = PMC_STATE_FREE;
 		pmc->pm_caps  = caps;
 		pmc->pm_flags = pa.pm_flags;
 
 		/* switch thread to CPU 'cpu' */
 		pmc_save_cpu_binding(&pb);
 
 #define	PMC_IS_SHAREABLE_PMC(cpu, n)				\
 	(pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state &		\
 	 PMC_PHW_FLAG_IS_SHAREABLE)
 #define	PMC_IS_UNALLOCATED(cpu, n)				\
 	(pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL)
 
 		if (PMC_IS_SYSTEM_MODE(mode)) {
 			pmc_select_cpu(cpu);
 			for (n = 0; n < (int) md->pmd_npmc; n++) {
 				pcd = pmc_ri_to_classdep(md, n, &adjri);
 				if (pmc_can_allocate_row(n, mode) == 0 &&
 				    pmc_can_allocate_rowindex(
 					    curthread->td_proc, n, cpu) == 0 &&
 				    (PMC_IS_UNALLOCATED(cpu, n) ||
 				     PMC_IS_SHAREABLE_PMC(cpu, n)) &&
 				    pcd->pcd_allocate_pmc(cpu, adjri, pmc,
 					&pa) == 0)
 					break;
 			}
 		} else {
 			/* Process virtual mode */
 			for (n = 0; n < (int) md->pmd_npmc; n++) {
 				pcd = pmc_ri_to_classdep(md, n, &adjri);
 				if (pmc_can_allocate_row(n, mode) == 0 &&
 				    pmc_can_allocate_rowindex(
 					    curthread->td_proc, n,
 					    PMC_CPU_ANY) == 0 &&
 				    pcd->pcd_allocate_pmc(curthread->td_oncpu,
 					adjri, pmc, &pa) == 0)
 					break;
 			}
 		}
 
 #undef	PMC_IS_UNALLOCATED
 #undef	PMC_IS_SHAREABLE_PMC
 
 		pmc_restore_cpu_binding(&pb);
 
 		if (n == (int) md->pmd_npmc) {
 			pmc_destroy_pmc_descriptor(pmc);
 			pmc = NULL;
 			error = EINVAL;
 			break;
 		}
 
 		/* Fill in the correct value in the ID field */
 		pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n);
 
 		PMCDBG5(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x",
 		    pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id);
 
 		/* Process mode PMCs with logging enabled need log files */
 		if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW))
 			pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
 
 		/* All system mode sampling PMCs require a log file */
 		if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode))
 			pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
 
 		/*
 		 * Configure global pmc's immediately
 		 */
 
 		if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) {
 
 			pmc_save_cpu_binding(&pb);
 			pmc_select_cpu(cpu);
 
 			phw = pmc_pcpu[cpu]->pc_hwpmcs[n];
 			pcd = pmc_ri_to_classdep(md, n, &adjri);
 
 			if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 ||
 			    (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) {
 				(void) pcd->pcd_release_pmc(cpu, adjri, pmc);
 				pmc_destroy_pmc_descriptor(pmc);
 				pmc = NULL;
 				pmc_restore_cpu_binding(&pb);
 				error = EPERM;
 				break;
 			}
 
 			pmc_restore_cpu_binding(&pb);
 		}
 
 		pmc->pm_state    = PMC_STATE_ALLOCATED;
 
 		/*
 		 * mark row disposition
 		 */
 
 		if (PMC_IS_SYSTEM_MODE(mode))
 			PMC_MARK_ROW_STANDALONE(n);
 		else
 			PMC_MARK_ROW_THREAD(n);
 
 		/*
 		 * Register this PMC with the current thread as its owner.
 		 */
 
 		if ((error =
 		    pmc_register_owner(curthread->td_proc, pmc)) != 0) {
 			pmc_release_pmc_descriptor(pmc);
 			pmc_destroy_pmc_descriptor(pmc);
 			pmc = NULL;
 			break;
 		}
 
 		/*
 		 * Return the allocated index.
 		 */
 
 		pa.pm_pmcid = pmc->pm_id;
 
 		error = copyout(&pa, arg, sizeof(pa));
 	}
 	break;
 
 
 	/*
 	 * Attach a PMC to a process.
 	 */
 
 	case PMC_OP_PMCATTACH:
 	{
 		struct pmc *pm;
 		struct proc *p;
 		struct pmc_op_pmcattach a;
 
 		sx_assert(&pmc_sx, SX_XLOCKED);
 
 		if ((error = copyin(arg, &a, sizeof(a))) != 0)
 			break;
 
 		if (a.pm_pid < 0) {
 			error = EINVAL;
 			break;
 		} else if (a.pm_pid == 0)
 			a.pm_pid = td->td_proc->p_pid;
 
 		if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
 			break;
 
 		if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
 			error = EINVAL;
 			break;
 		}
 
 		/* PMCs may be (re)attached only when allocated or stopped */
 		if (pm->pm_state == PMC_STATE_RUNNING) {
 			error = EBUSY;
 			break;
 		} else if (pm->pm_state != PMC_STATE_ALLOCATED &&
 		    pm->pm_state != PMC_STATE_STOPPED) {
 			error = EINVAL;
 			break;
 		}
 
 		/* lookup pid */
 		if ((p = pfind(a.pm_pid)) == NULL) {
 			error = ESRCH;
 			break;
 		}
 
 		/*
 		 * Ignore processes that are working on exiting.
 		 */
 		if (p->p_flag & P_WEXIT) {
 			error = ESRCH;
 			PROC_UNLOCK(p);	/* pfind() returns a locked process */
 			break;
 		}
 
 		/*
 		 * we are allowed to attach a PMC to a process if
 		 * we can debug it.
 		 */
 		error = p_candebug(curthread, p);
 
 		PROC_UNLOCK(p);
 
 		if (error == 0)
 			error = pmc_attach_process(p, pm);
 	}
 	break;
 
 
 	/*
 	 * Detach an attached PMC from a process.
 	 */
 
 	case PMC_OP_PMCDETACH:
 	{
 		struct pmc *pm;
 		struct proc *p;
 		struct pmc_op_pmcattach a;
 
 		if ((error = copyin(arg, &a, sizeof(a))) != 0)
 			break;
 
 		if (a.pm_pid < 0) {
 			error = EINVAL;
 			break;
 		} else if (a.pm_pid == 0)
 			a.pm_pid = td->td_proc->p_pid;
 
 		if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
 			break;
 
 		if ((p = pfind(a.pm_pid)) == NULL) {
 			error = ESRCH;
 			break;
 		}
 
 		/*
 		 * Treat processes that are in the process of exiting
 		 * as if they were not present.
 		 */
 
 		if (p->p_flag & P_WEXIT)
 			error = ESRCH;
 
 		PROC_UNLOCK(p);	/* pfind() returns a locked process */
 
 		if (error == 0)
 			error = pmc_detach_process(p, pm);
 	}
 	break;
 
 
 	/*
 	 * Retrieve the MSR number associated with the counter
 	 * 'pmc_id'.  This allows processes to directly use RDPMC
 	 * instructions to read their PMCs, without the overhead of a
 	 * system call.
 	 */
 
 	case PMC_OP_PMCGETMSR:
 	{
 		int adjri, ri;
 		struct pmc *pm;
 		struct pmc_target *pt;
 		struct pmc_op_getmsr gm;
 		struct pmc_classdep *pcd;
 
 		PMC_DOWNGRADE_SX();
 
 		if ((error = copyin(arg, &gm, sizeof(gm))) != 0)
 			break;
 
 		if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0)
 			break;
 
 		/*
 		 * The allocated PMC has to be a process virtual PMC,
 		 * i.e., of type MODE_T[CS].  Global PMCs can only be
 		 * read using the PMCREAD operation since they may be
 		 * allocated on a different CPU than the one we could
 		 * be running on at the time of the RDPMC instruction.
 		 *
 		 * The GETMSR operation is not allowed for PMCs that
 		 * are inherited across processes.
 		 */
 
 		if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) ||
 		    (pm->pm_flags & PMC_F_DESCENDANTS)) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * It only makes sense to use a RDPMC (or its
 		 * equivalent instruction on non-x86 architectures) on
 		 * a process that has allocated and attached a PMC to
 		 * itself.  Conversely the PMC is only allowed to have
 		 * one process attached to it -- its owner.
 		 */
 
 		if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL ||
 		    LIST_NEXT(pt, pt_next) != NULL ||
 		    pt->pt_process->pp_proc != pm->pm_owner->po_owner) {
 			error = EINVAL;
 			break;
 		}
 
 		ri = PMC_TO_ROWINDEX(pm);
 		pcd = pmc_ri_to_classdep(md, ri, &adjri);
 
 		/* PMC class has no 'GETMSR' support */
 		if (pcd->pcd_get_msr == NULL) {
 			error = ENOSYS;
 			break;
 		}
 
 		if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0)
 			break;
 
 		if ((error = copyout(&gm, arg, sizeof(gm))) < 0)
 			break;
 
 		/*
 		 * Mark our process as using MSRs.  Update machine
 		 * state using a forced context switch.
 		 */
 
 		pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS;
 		pmc_force_context_switch();
 
 	}
 	break;
 
 	/*
 	 * Release an allocated PMC
 	 */
 
 	case PMC_OP_PMCRELEASE:
 	{
 		pmc_id_t pmcid;
 		struct pmc *pm;
 		struct pmc_owner *po;
 		struct pmc_op_simple sp;
 
 		/*
 		 * Find PMC pointer for the named PMC.
 		 *
 		 * Use pmc_release_pmc_descriptor() to switch off the
 		 * PMC, remove all its target threads, and remove the
 		 * PMC from its owner's list.
 		 *
 		 * Remove the owner record if this is the last PMC
 		 * owned.
 		 *
 		 * Free up space.
 		 */
 
 		if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
 			break;
 
 		pmcid = sp.pm_pmcid;
 
 		if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
 			break;
 
 		po = pm->pm_owner;
 		pmc_release_pmc_descriptor(pm);
 		pmc_maybe_remove_owner(po);
 		pmc_destroy_pmc_descriptor(pm);
 	}
 	break;
 
 
 	/*
 	 * Read and/or write a PMC.
 	 */
 
 	case PMC_OP_PMCRW:
 	{
 		int adjri;
 		struct pmc *pm;
 		uint32_t cpu, ri;
 		pmc_value_t oldvalue;
 		struct pmc_binding pb;
 		struct pmc_op_pmcrw prw;
 		struct pmc_classdep *pcd;
 		struct pmc_op_pmcrw *pprw;
 
 		PMC_DOWNGRADE_SX();
 
 		if ((error = copyin(arg, &prw, sizeof(prw))) != 0)
 			break;
 
 		ri = 0;
 		PMCDBG2(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid,
 		    prw.pm_flags);
 
 		/* must have at least one flag set */
 		if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) {
 			error = EINVAL;
 			break;
 		}
 
 		/* locate pmc descriptor */
 		if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0)
 			break;
 
 		/* Can't read a PMC that hasn't been started. */
 		if (pm->pm_state != PMC_STATE_ALLOCATED &&
 		    pm->pm_state != PMC_STATE_STOPPED &&
 		    pm->pm_state != PMC_STATE_RUNNING) {
 			error = EINVAL;
 			break;
 		}
 
 		/* writing a new value is allowed only for 'STOPPED' pmcs */
 		if (pm->pm_state == PMC_STATE_RUNNING &&
 		    (prw.pm_flags & PMC_F_NEWVALUE)) {
 			error = EBUSY;
 			break;
 		}
 
 		if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
 
 			/*
 			 * If this PMC is attached to its owner (i.e.,
 			 * the process requesting this operation) and
 			 * is running, then attempt to get an
 			 * upto-date reading from hardware for a READ.
 			 * Writes are only allowed when the PMC is
 			 * stopped, so only update the saved value
 			 * field.
 			 *
 			 * If the PMC is not running, or is not
 			 * attached to its owner, read/write to the
 			 * savedvalue field.
 			 */
 
 			ri = PMC_TO_ROWINDEX(pm);
 			pcd = pmc_ri_to_classdep(md, ri, &adjri);
 
 			mtx_pool_lock_spin(pmc_mtxpool, pm);
 			cpu = curthread->td_oncpu;
 
 			if (prw.pm_flags & PMC_F_OLDVALUE) {
 				if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) &&
 				    (pm->pm_state == PMC_STATE_RUNNING))
 					error = (*pcd->pcd_read_pmc)(cpu, adjri,
 					    &oldvalue);
 				else
 					oldvalue = pm->pm_gv.pm_savedvalue;
 			}
 			if (prw.pm_flags & PMC_F_NEWVALUE)
 				pm->pm_gv.pm_savedvalue = prw.pm_value;
 
 			mtx_pool_unlock_spin(pmc_mtxpool, pm);
 
 		} else { /* System mode PMCs */
 			cpu = PMC_TO_CPU(pm);
 			ri  = PMC_TO_ROWINDEX(pm);
 			pcd = pmc_ri_to_classdep(md, ri, &adjri);
 
 			if (!pmc_cpu_is_active(cpu)) {
 				error = ENXIO;
 				break;
 			}
 
 			/* move this thread to CPU 'cpu' */
 			pmc_save_cpu_binding(&pb);
 			pmc_select_cpu(cpu);
 
 			critical_enter();
 			/* save old value */
 			if (prw.pm_flags & PMC_F_OLDVALUE)
 				if ((error = (*pcd->pcd_read_pmc)(cpu, adjri,
 					 &oldvalue)))
 					goto error;
 			/* write out new value */
 			if (prw.pm_flags & PMC_F_NEWVALUE)
 				error = (*pcd->pcd_write_pmc)(cpu, adjri,
 				    prw.pm_value);
 		error:
 			critical_exit();
 			pmc_restore_cpu_binding(&pb);
 			if (error)
 				break;
 		}
 
 		pprw = (struct pmc_op_pmcrw *) arg;
 
 #ifdef	HWPMC_DEBUG
 		if (prw.pm_flags & PMC_F_NEWVALUE)
 			PMCDBG3(PMC,OPS,2, "rw id=%d new %jx -> old %jx",
 			    ri, prw.pm_value, oldvalue);
 		else if (prw.pm_flags & PMC_F_OLDVALUE)
 			PMCDBG2(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue);
 #endif
 
 		/* return old value if requested */
 		if (prw.pm_flags & PMC_F_OLDVALUE)
 			if ((error = copyout(&oldvalue, &pprw->pm_value,
 				 sizeof(prw.pm_value))))
 				break;
 
 	}
 	break;
 
 
 	/*
 	 * Set the sampling rate for a sampling mode PMC and the
 	 * initial count for a counting mode PMC.
 	 */
 
 	case PMC_OP_PMCSETCOUNT:
 	{
 		struct pmc *pm;
 		struct pmc_op_pmcsetcount sc;
 
 		PMC_DOWNGRADE_SX();
 
 		if ((error = copyin(arg, &sc, sizeof(sc))) != 0)
 			break;
 
 		if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0)
 			break;
 
 		if (pm->pm_state == PMC_STATE_RUNNING) {
 			error = EBUSY;
 			break;
 		}
 
 		if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 			pm->pm_sc.pm_reloadcount = sc.pm_count;
 		else
 			pm->pm_sc.pm_initial = sc.pm_count;
 	}
 	break;
 
 
 	/*
 	 * Start a PMC.
 	 */
 
 	case PMC_OP_PMCSTART:
 	{
 		pmc_id_t pmcid;
 		struct pmc *pm;
 		struct pmc_op_simple sp;
 
 		sx_assert(&pmc_sx, SX_XLOCKED);
 
 		if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
 			break;
 
 		pmcid = sp.pm_pmcid;
 
 		if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
 			break;
 
 		KASSERT(pmcid == pm->pm_id,
 		    ("[pmc,%d] pmcid %x != id %x", __LINE__,
 			pm->pm_id, pmcid));
 
 		if (pm->pm_state == PMC_STATE_RUNNING) /* already running */
 			break;
 		else if (pm->pm_state != PMC_STATE_STOPPED &&
 		    pm->pm_state != PMC_STATE_ALLOCATED) {
 			error = EINVAL;
 			break;
 		}
 
 		error = pmc_start(pm);
 	}
 	break;
 
 
 	/*
 	 * Stop a PMC.
 	 */
 
 	case PMC_OP_PMCSTOP:
 	{
 		pmc_id_t pmcid;
 		struct pmc *pm;
 		struct pmc_op_simple sp;
 
 		PMC_DOWNGRADE_SX();
 
 		if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
 			break;
 
 		pmcid = sp.pm_pmcid;
 
 		/*
 		 * Mark the PMC as inactive and invoke the MD stop
 		 * routines if needed.
 		 */
 
 		if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
 			break;
 
 		KASSERT(pmcid == pm->pm_id,
 		    ("[pmc,%d] pmc id %x != pmcid %x", __LINE__,
 			pm->pm_id, pmcid));
 
 		if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */
 			break;
 		else if (pm->pm_state != PMC_STATE_RUNNING) {
 			error = EINVAL;
 			break;
 		}
 
 		error = pmc_stop(pm);
 	}
 	break;
 
 
 	/*
 	 * Write a user supplied value to the log file.
 	 */
 
 	case PMC_OP_WRITELOG:
 	{
 		struct pmc_op_writelog wl;
 		struct pmc_owner *po;
 
 		PMC_DOWNGRADE_SX();
 
 		if ((error = copyin(arg, &wl, sizeof(wl))) != 0)
 			break;
 
 		if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 
 		if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
 			error = EINVAL;
 			break;
 		}
 
 		error = pmclog_process_userlog(po, &wl);
 	}
 	break;
 
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	if (is_sx_locked != 0) {
 		if (is_sx_downgraded)
 			sx_sunlock(&pmc_sx);
 		else
 			sx_xunlock(&pmc_sx);
 	}
 
 	if (error)
 		atomic_add_int(&pmc_stats.pm_syscall_errors, 1);
 
 	PICKUP_GIANT();
 
 	return error;
 }
 
 /*
  * Helper functions
  */
 
 
 /*
  * Mark the thread as needing callchain capture and post an AST.  The
  * actual callchain capture will be done in a context where it is safe
  * to take page faults.
  */
 
 static void
 pmc_post_callchain_callback(void)
 {
 	struct thread *td;
 
 	td = curthread;
 
 	/*
 	 * If there is multiple PMCs for the same interrupt ignore new post
 	 */
 	if (td->td_pflags & TDP_CALLCHAIN)
 		return;
 
 	/*
 	 * Mark this thread as needing callchain capture.
 	 * `td->td_pflags' will be safe to touch because this thread
 	 * was in user space when it was interrupted.
 	 */
 	td->td_pflags |= TDP_CALLCHAIN;
 
 	/*
 	 * Don't let this thread migrate between CPUs until callchain
 	 * capture completes.
 	 */
 	sched_pin();
 
 	return;
 }
 
 /*
  * Interrupt processing.
  *
  * Find a free slot in the per-cpu array of samples and capture the
  * current callchain there.  If a sample was successfully added, a bit
  * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
  * needs to be invoked from the clock handler.
  *
  * This function is meant to be called from an NMI handler.  It cannot
  * use any of the locking primitives supplied by the OS.
  */
 
 int
 pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
     int inuserspace)
 {
 	int error, callchaindepth;
 	struct thread *td;
 	struct pmc_sample *ps;
 	struct pmc_samplebuffer *psb;
 
 	error = 0;
 
 	/*
 	 * Allocate space for a sample buffer.
 	 */
 	psb = pmc_pcpu[cpu]->pc_sb[ring];
 
 	ps = psb->ps_write;
 	if (ps->ps_nsamples) {	/* in use, reader hasn't caught up */
 		CPU_SET_ATOMIC(cpu, &pm->pm_stalled);
 		atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
 		PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
 		    cpu, pm, (void *) tf, inuserspace,
 		    (int) (psb->ps_write - psb->ps_samples),
 		    (int) (psb->ps_read - psb->ps_samples));
 		callchaindepth = 1;
 		error = ENOMEM;
 		goto done;
 	}
 
 
 	/* Fill in entry. */
 	PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
 	    (void *) tf, inuserspace,
 	    (int) (psb->ps_write - psb->ps_samples),
 	    (int) (psb->ps_read - psb->ps_samples));
 
 	KASSERT(pm->pm_runcount >= 0,
 	    ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
 		pm->pm_runcount));
 
 	atomic_add_rel_int(&pm->pm_runcount, 1);	/* hold onto PMC */
 
 	ps->ps_pmc = pm;
 	if ((td = curthread) && td->td_proc)
 		ps->ps_pid = td->td_proc->p_pid;
 	else
 		ps->ps_pid = -1;
 	ps->ps_cpu = cpu;
 	ps->ps_td = td;
 	ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
 
 	callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
 	    pmc_callchaindepth : 1;
 
 	if (callchaindepth == 1)
 		ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
 	else {
 		/*
 		 * Kernel stack traversals can be done immediately,
 		 * while we defer to an AST for user space traversals.
 		 */
 		if (!inuserspace) {
 			callchaindepth =
 			    pmc_save_kernel_callchain(ps->ps_pc,
 				callchaindepth, tf);
 		} else {
 			pmc_post_callchain_callback();
 			callchaindepth = PMC_SAMPLE_INUSE;
 		}
 	}
 
 	ps->ps_nsamples = callchaindepth;	/* mark entry as in use */
 
 	/* increment write pointer, modulo ring buffer size */
 	ps++;
 	if (ps == psb->ps_fence)
 		psb->ps_write = psb->ps_samples;
 	else
 		psb->ps_write = ps;
 
  done:
 	/* mark CPU as needing processing */
 	if (callchaindepth != PMC_SAMPLE_INUSE)
 		CPU_SET_ATOMIC(cpu, &pmc_cpumask);
 
 	return (error);
 }
 
 /*
  * Capture a user call chain.  This function will be called from ast()
  * before control returns to userland and before the process gets
  * rescheduled.
  */
 
 static void
 pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
 {
 	struct pmc *pm;
 	struct thread *td;
 	struct pmc_sample *ps, *ps_end;
 	struct pmc_samplebuffer *psb;
 #ifdef	INVARIANTS
 	int ncallchains;
 #endif
 
 	psb = pmc_pcpu[cpu]->pc_sb[ring];
 	td = curthread;
 
 	KASSERT(td->td_pflags & TDP_CALLCHAIN,
 	    ("[pmc,%d] Retrieving callchain for thread that doesn't want it",
 		__LINE__));
 
 #ifdef	INVARIANTS
 	ncallchains = 0;
 #endif
 
 	/*
 	 * Iterate through all deferred callchain requests.
 	 * Walk from the current read pointer to the current
 	 * write pointer.
 	 */
 
 	ps = psb->ps_read;
 	ps_end = psb->ps_write;
 	do {
 		if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
 			goto next;
 		if (ps->ps_td != td)
 			goto next;
 
 		KASSERT(ps->ps_cpu == cpu,
 		    ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__,
 			ps->ps_cpu, PCPU_GET(cpuid)));
 
 		pm = ps->ps_pmc;
 
 		KASSERT(pm->pm_flags & PMC_F_CALLCHAIN,
 		    ("[pmc,%d] Retrieving callchain for PMC that doesn't "
 			"want it", __LINE__));
 
 		KASSERT(pm->pm_runcount > 0,
 		    ("[pmc,%d] runcount %d", __LINE__, pm->pm_runcount));
 
 		/*
 		 * Retrieve the callchain and mark the sample buffer
 		 * as 'processable' by the timer tick sweep code.
 		 */
 		ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
 		    pmc_callchaindepth, tf);
 
 #ifdef	INVARIANTS
 		ncallchains++;
 #endif
 
 next:
 		/* increment the pointer, modulo sample ring size */
 		if (++ps == psb->ps_fence)
 			ps = psb->ps_samples;
 	} while (ps != ps_end);
 
 	KASSERT(ncallchains > 0,
 	    ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
 		cpu));
 
 	KASSERT(td->td_pinned == 1,
 	    ("[pmc,%d] invalid td_pinned value", __LINE__));
 	sched_unpin();	/* Can migrate safely now. */
 
 	/* mark CPU as needing processing */
 	CPU_SET_ATOMIC(cpu, &pmc_cpumask);
 
 	return;
 }
 
 /*
  * Process saved PC samples.
  */
 
 static void
 pmc_process_samples(int cpu, int ring)
 {
 	struct pmc *pm;
 	int adjri, n;
 	struct thread *td;
 	struct pmc_owner *po;
 	struct pmc_sample *ps;
 	struct pmc_classdep *pcd;
 	struct pmc_samplebuffer *psb;
 
 	KASSERT(PCPU_GET(cpuid) == cpu,
 	    ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__,
 		PCPU_GET(cpuid), cpu));
 
 	psb = pmc_pcpu[cpu]->pc_sb[ring];
 
 	for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
 
 		ps = psb->ps_read;
 		if (ps->ps_nsamples == PMC_SAMPLE_FREE)
 			break;
 
 		pm = ps->ps_pmc;
 
 		KASSERT(pm->pm_runcount > 0,
 		    ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
 			pm->pm_runcount));
 
 		po = pm->pm_owner;
 
 		KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
 		    ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__,
 			pm, PMC_TO_MODE(pm)));
 
 		/* Ignore PMCs that have been switched off */
 		if (pm->pm_state != PMC_STATE_RUNNING)
 			goto entrydone;
 
 		/* If there is a pending AST wait for completion */
 		if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
 			/* Need a rescan at a later time. */
 			CPU_SET_ATOMIC(cpu, &pmc_cpumask);
 			break;
 		}
 
 		PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
 		    pm, ps->ps_nsamples, ps->ps_flags,
 		    (int) (psb->ps_write - psb->ps_samples),
 		    (int) (psb->ps_read - psb->ps_samples));
 
 		/*
 		 * If this is a process-mode PMC that is attached to
 		 * its owner, and if the PC is in user mode, update
 		 * profiling statistics like timer-based profiling
 		 * would have done.
 		 */
 		if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
 			if (ps->ps_flags & PMC_CC_F_USERSPACE) {
 				td = FIRST_THREAD_IN_PROC(po->po_owner);
 				addupc_intr(td, ps->ps_pc[0], 1);
 			}
 			goto entrydone;
 		}
 
 		/*
 		 * Otherwise, this is either a sampling mode PMC that
 		 * is attached to a different process than its owner,
 		 * or a system-wide sampling PMC.  Dispatch a log
 		 * entry to the PMC's owner process.
 		 */
 		pmclog_process_callchain(pm, ps);
 
 	entrydone:
 		ps->ps_nsamples = 0; /* mark entry as free */
 		atomic_subtract_rel_int(&pm->pm_runcount, 1);
 
 		/* increment read pointer, modulo sample size */
 		if (++ps == psb->ps_fence)
 			psb->ps_read = psb->ps_samples;
 		else
 			psb->ps_read = ps;
 	}
 
 	atomic_add_int(&pmc_stats.pm_log_sweeps, 1);
 
 	/* Do not re-enable stalled PMCs if we failed to process any samples */
 	if (n == 0)
 		return;
 
 	/*
 	 * Restart any stalled sampling PMCs on this CPU.
 	 *
 	 * If the NMI handler sets the pm_stalled field of a PMC after
 	 * the check below, we'll end up processing the stalled PMC at
 	 * the next hardclock tick.
 	 */
 	for (n = 0; n < md->pmd_npmc; n++) {
 		pcd = pmc_ri_to_classdep(md, n, &adjri);
 		KASSERT(pcd != NULL,
 		    ("[pmc,%d] null pcd ri=%d", __LINE__, n));
 		(void) (*pcd->pcd_get_config)(cpu,adjri,&pm);
 
 		if (pm == NULL ||			 /* !cfg'ed */
 		    pm->pm_state != PMC_STATE_RUNNING || /* !active */
 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */
 		    !CPU_ISSET(cpu, &pm->pm_cpustate) || /* !desired */
 		    !CPU_ISSET(cpu, &pm->pm_stalled)) /* !stalled */
 			continue;
 
 		CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
 		(*pcd->pcd_start_pmc)(cpu, adjri);
 	}
 }
 
 /*
  * Event handlers.
  */
 
 /*
  * Handle a process exit.
  *
  * Remove this process from all hash tables.  If this process
  * owned any PMCs, turn off those PMCs and deallocate them,
  * removing any associations with target processes.
  *
  * This function will be called by the last 'thread' of a
  * process.
  *
  * XXX This eventhandler gets called early in the exit process.
  * Consider using a 'hook' invocation from thread_exit() or equivalent
  * spot.  Another negative is that kse_exit doesn't seem to call
  * exit1() [??].
  *
  */
 
 static void
 pmc_process_exit(void *arg __unused, struct proc *p)
 {
 	struct pmc *pm;
 	int adjri, cpu;
 	unsigned int ri;
 	int is_using_hwpmcs;
 	struct pmc_owner *po;
 	struct pmc_process *pp;
 	struct pmc_classdep *pcd;
 	pmc_value_t newvalue, tmp;
 
 	PROC_LOCK(p);
 	is_using_hwpmcs = p->p_flag & P_HWPMC;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Log a sysexit event to all SS PMC owners.
 	 */
 	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		    pmclog_process_sysexit(po, p->p_pid);
 
 	if (!is_using_hwpmcs)
 		return;
 
 	PMC_GET_SX_XLOCK();
 	PMCDBG3(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid,
 	    p->p_comm);
 
 	/*
 	 * Since this code is invoked by the last thread in an exiting
 	 * process, we would have context switched IN at some prior
 	 * point.  However, with PREEMPTION, kernel mode context
 	 * switches may happen any time, so we want to disable a
 	 * context switch OUT till we get any PMCs targetting this
 	 * process off the hardware.
 	 *
 	 * We also need to atomically remove this process'
 	 * entry from our target process hash table, using
 	 * PMC_FLAG_REMOVE.
 	 */
 	PMCDBG3(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid,
 	    p->p_comm);
 
 	critical_enter(); /* no preemption */
 
 	cpu = curthread->td_oncpu;
 
 	if ((pp = pmc_find_process_descriptor(p,
 		 PMC_FLAG_REMOVE)) != NULL) {
 
 		PMCDBG2(PRC,EXT,2,
 		    "process-exit proc=%p pmc-process=%p", p, pp);
 
 		/*
 		 * The exiting process could the target of
 		 * some PMCs which will be running on
 		 * currently executing CPU.
 		 *
 		 * We need to turn these PMCs off like we
 		 * would do at context switch OUT time.
 		 */
 		for (ri = 0; ri < md->pmd_npmc; ri++) {
 
 			/*
 			 * Pick up the pmc pointer from hardware
 			 * state similar to the CSW_OUT code.
 			 */
 			pm = NULL;
 
 			pcd = pmc_ri_to_classdep(md, ri, &adjri);
 
 			(void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
 
 			PMCDBG2(PRC,EXT,2, "ri=%d pm=%p", ri, pm);
 
 			if (pm == NULL ||
 			    !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
 				continue;
 
 			PMCDBG4(PRC,EXT,2, "ppmcs[%d]=%p pm=%p "
 			    "state=%d", ri, pp->pp_pmcs[ri].pp_pmc,
 			    pm, pm->pm_state);
 
 			KASSERT(PMC_TO_ROWINDEX(pm) == ri,
 			    ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
 				__LINE__, PMC_TO_ROWINDEX(pm), ri));
 
 			KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
 			    ("[pmc,%d] pm %p != pp_pmcs[%d] %p",
 				__LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc));
 
 			KASSERT(pm->pm_runcount > 0,
 			    ("[pmc,%d] bad runcount ri %d rc %d",
 				__LINE__, ri, pm->pm_runcount));
 
 			/*
 			 * Change desired state, and then stop if not
 			 * stalled. This two-step dance should avoid
 			 * race conditions where an interrupt re-enables
 			 * the PMC after this code has already checked
 			 * the pm_stalled flag.
 			 */
 			if (CPU_ISSET(cpu, &pm->pm_cpustate)) {
 				CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
 				if (!CPU_ISSET(cpu, &pm->pm_stalled)) {
 					(void) pcd->pcd_stop_pmc(cpu, adjri);
 					pcd->pcd_read_pmc(cpu, adjri,
 					    &newvalue);
 					tmp = newvalue -
 					    PMC_PCPU_SAVED(cpu,ri);
 
 					mtx_pool_lock_spin(pmc_mtxpool, pm);
 					pm->pm_gv.pm_savedvalue += tmp;
 					pp->pp_pmcs[ri].pp_pmcval += tmp;
 					mtx_pool_unlock_spin(pmc_mtxpool, pm);
 				}
 			}
 
 			atomic_subtract_rel_int(&pm->pm_runcount,1);
 
 			KASSERT((int) pm->pm_runcount >= 0,
 			    ("[pmc,%d] runcount is %d", __LINE__, ri));
 
 			(void) pcd->pcd_config_pmc(cpu, adjri, NULL);
 		}
 
 		/*
 		 * Inform the MD layer of this pseudo "context switch
 		 * out"
 		 */
 		(void) md->pmd_switch_out(pmc_pcpu[cpu], pp);
 
 		critical_exit(); /* ok to be pre-empted now */
 
 		/*
 		 * Unlink this process from the PMCs that are
 		 * targetting it.  This will send a signal to
 		 * all PMC owner's whose PMCs are orphaned.
 		 *
 		 * Log PMC value at exit time if requested.
 		 */
 		for (ri = 0; ri < md->pmd_npmc; ri++)
 			if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
 				if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
 				    PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)))
 					pmclog_process_procexit(pm, pp);
 				pmc_unlink_target_process(pm, pp);
 			}
 		free(pp, M_PMC);
 
 	} else
 		critical_exit(); /* pp == NULL */
 
 
 	/*
 	 * If the process owned PMCs, free them up and free up
 	 * memory.
 	 */
 	if ((po = pmc_find_owner_descriptor(p)) != NULL) {
 		pmc_remove_owner(po);
 		pmc_destroy_owner_descriptor(po);
 	}
 
 	sx_xunlock(&pmc_sx);
 }
 
 /*
  * Handle a process fork.
  *
  * If the parent process 'p1' is under HWPMC monitoring, then copy
  * over any attached PMCs that have 'do_descendants' semantics.
  */
 
 static void
 pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc,
     int flags)
 {
 	int is_using_hwpmcs;
 	unsigned int ri;
 	uint32_t do_descendants;
 	struct pmc *pm;
 	struct pmc_owner *po;
 	struct pmc_process *ppnew, *ppold;
 
 	(void) flags;		/* unused parameter */
 
 	PROC_LOCK(p1);
 	is_using_hwpmcs = p1->p_flag & P_HWPMC;
 	PROC_UNLOCK(p1);
 
 	/*
 	 * If there are system-wide sampling PMCs active, we need to
 	 * log all fork events to their owner's logs.
 	 */
 
 	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		    pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
 
 	if (!is_using_hwpmcs)
 		return;
 
 	PMC_GET_SX_XLOCK();
 	PMCDBG4(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1,
 	    p1->p_pid, p1->p_comm, newproc);
 
 	/*
 	 * If the parent process (curthread->td_proc) is a
 	 * target of any PMCs, look for PMCs that are to be
 	 * inherited, and link these into the new process
 	 * descriptor.
 	 */
 	if ((ppold = pmc_find_process_descriptor(curthread->td_proc,
 		 PMC_FLAG_NONE)) == NULL)
 		goto done;		/* nothing to do */
 
 	do_descendants = 0;
 	for (ri = 0; ri < md->pmd_npmc; ri++)
 		if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL)
 			do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS;
 	if (do_descendants == 0) /* nothing to do */
 		goto done;
 
 	/* allocate a descriptor for the new process  */
 	if ((ppnew = pmc_find_process_descriptor(newproc,
 		 PMC_FLAG_ALLOCATE)) == NULL)
 		goto done;
 
 	/*
 	 * Run through all PMCs that were targeting the old process
 	 * and which specified F_DESCENDANTS and attach them to the
 	 * new process.
 	 *
 	 * Log the fork event to all owners of PMCs attached to this
 	 * process, if not already logged.
 	 */
 	for (ri = 0; ri < md->pmd_npmc; ri++)
 		if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL &&
 		    (pm->pm_flags & PMC_F_DESCENDANTS)) {
 			pmc_link_target_process(pm, ppnew);
 			po = pm->pm_owner;
 			if (po->po_sscount == 0 &&
 			    po->po_flags & PMC_PO_OWNS_LOGFILE)
 				pmclog_process_procfork(po, p1->p_pid,
 				    newproc->p_pid);
 		}
 
 	/*
 	 * Now mark the new process as being tracked by this driver.
 	 */
 	PROC_LOCK(newproc);
 	newproc->p_flag |= P_HWPMC;
 	PROC_UNLOCK(newproc);
 
  done:
 	sx_xunlock(&pmc_sx);
 }
 
 static void
 pmc_kld_load(void *arg __unused, linker_file_t lf)
 {
 	struct pmc_owner *po;
 
 	sx_slock(&pmc_sx);
 
 	/*
 	 * Notify owners of system sampling PMCs about KLD operations.
 	 */
 	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 		if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 			pmclog_process_map_in(po, (pid_t) -1,
 			    (uintfptr_t) lf->address, lf->filename);
 
 	/*
 	 * TODO: Notify owners of (all) process-sampling PMCs too.
 	 */
 
 	sx_sunlock(&pmc_sx);
 }
 
 static void
 pmc_kld_unload(void *arg __unused, const char *filename __unused,
     caddr_t address, size_t size)
 {
 	struct pmc_owner *po;
 
 	sx_slock(&pmc_sx);
 
 	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 		if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 			pmclog_process_map_out(po, (pid_t) -1,
 			    (uintfptr_t) address, (uintfptr_t) address + size);
 
 	/*
 	 * TODO: Notify owners of process-sampling PMCs.
 	 */
 
 	sx_sunlock(&pmc_sx);
 }
 
 /*
  * initialization
  */
 static const char *
 pmc_name_of_pmcclass(enum pmc_class class)
 {
 
 	switch (class) {
 #undef	__PMC_CLASS
 #define	__PMC_CLASS(S,V,D)						\
 	case PMC_CLASS_##S:						\
 		return #S;
 	__PMC_CLASSES();
 	default:
 		return ("<unknown>");
 	}
 }
 
 /*
  * Base class initializer: allocate structure and set default classes.
  */
 struct pmc_mdep *
 pmc_mdep_alloc(int nclasses)
 {
 	struct pmc_mdep *md;
 	int	n;
 
 	/* SOFT + md classes */
 	n = 1 + nclasses;
 	md = malloc(sizeof(struct pmc_mdep) + n *
 	    sizeof(struct pmc_classdep), M_PMC, M_WAITOK|M_ZERO);
 	md->pmd_nclass = n;
 
 	/* Add base class. */
 	pmc_soft_initialize(md);
 	return md;
 }
 
 void
 pmc_mdep_free(struct pmc_mdep *md)
 {
 	pmc_soft_finalize(md);
 	free(md, M_PMC);
 }
 
 static int
 generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
 {
 	(void) pc; (void) pp;
 
 	return (0);
 }
 
 static int
 generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
 {
 	(void) pc; (void) pp;
 
 	return (0);
 }
 
 static struct pmc_mdep *
 pmc_generic_cpu_initialize(void)
 {
 	struct pmc_mdep *md;
 
 	md = pmc_mdep_alloc(0);
 
 	md->pmd_cputype    = PMC_CPU_GENERIC;
 
 	md->pmd_pcpu_init  = NULL;
 	md->pmd_pcpu_fini  = NULL;
 	md->pmd_switch_in  = generic_switch_in;
 	md->pmd_switch_out = generic_switch_out;
 
 	return (md);
 }
 
 static void
 pmc_generic_cpu_finalize(struct pmc_mdep *md)
 {
 	(void) md;
 }
 
 
 static int
 pmc_initialize(void)
 {
 	int c, cpu, error, n, ri;
 	unsigned int maxcpu;
 	struct pmc_binding pb;
 	struct pmc_sample *ps;
 	struct pmc_classdep *pcd;
 	struct pmc_samplebuffer *sb;
 
 	md = NULL;
 	error = 0;
 
 #ifdef	HWPMC_DEBUG
 	/* parse debug flags first */
 	if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags",
 		pmc_debugstr, sizeof(pmc_debugstr)))
 		pmc_debugflags_parse(pmc_debugstr,
 		    pmc_debugstr+strlen(pmc_debugstr));
 #endif
 
 	PMCDBG1(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION);
 
 	/* check kernel version */
 	if (pmc_kernel_version != PMC_VERSION) {
 		if (pmc_kernel_version == 0)
 			printf("hwpmc: this kernel has not been compiled with "
 			    "'options HWPMC_HOOKS'.\n");
 		else
 			printf("hwpmc: kernel version (0x%x) does not match "
 			    "module version (0x%x).\n", pmc_kernel_version,
 			    PMC_VERSION);
 		return EPROGMISMATCH;
 	}
 
 	/*
 	 * check sysctl parameters
 	 */
 
 	if (pmc_hashsize <= 0) {
 		(void) printf("hwpmc: tunable \"hashsize\"=%d must be "
 		    "greater than zero.\n", pmc_hashsize);
 		pmc_hashsize = PMC_HASH_SIZE;
 	}
 
 	if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
 		(void) printf("hwpmc: tunable \"nsamples\"=%d out of "
 		    "range.\n", pmc_nsamples);
 		pmc_nsamples = PMC_NSAMPLES;
 	}
 
 	if (pmc_callchaindepth <= 0 ||
 	    pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
 		(void) printf("hwpmc: tunable \"callchaindepth\"=%d out of "
 		    "range - using %d.\n", pmc_callchaindepth,
 		    PMC_CALLCHAIN_DEPTH_MAX);
 		pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX;
 	}
 
 	md = pmc_md_initialize();
 	if (md == NULL) {
 		/* Default to generic CPU. */
 		md = pmc_generic_cpu_initialize();
 		if (md == NULL)
 			return (ENOSYS);
         }
 
 	KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1,
 	    ("[pmc,%d] no classes or pmcs", __LINE__));
 
 	/* Compute the map from row-indices to classdep pointers. */
 	pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) *
 	    md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO);
 
 	for (n = 0; n < md->pmd_npmc; n++)
 		pmc_rowindex_to_classdep[n] = NULL;
 	for (ri = c = 0; c < md->pmd_nclass; c++) {
 		pcd = &md->pmd_classdep[c];
 		for (n = 0; n < pcd->pcd_num; n++, ri++)
 			pmc_rowindex_to_classdep[ri] = pcd;
 	}
 
 	KASSERT(ri == md->pmd_npmc,
 	    ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__,
 	    ri, md->pmd_npmc));
 
 	maxcpu = pmc_cpu_max();
 
 	/* allocate space for the per-cpu array */
 	pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC,
 	    M_WAITOK|M_ZERO);
 
 	/* per-cpu 'saved values' for managing process-mode PMCs */
 	pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc,
 	    M_PMC, M_WAITOK);
 
 	/* Perform CPU-dependent initialization. */
 	pmc_save_cpu_binding(&pb);
 	error = 0;
 	for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) {
 		if (!pmc_cpu_is_active(cpu))
 			continue;
 		pmc_select_cpu(cpu);
 		pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) +
 		    md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC,
 		    M_WAITOK|M_ZERO);
 		if (md->pmd_pcpu_init)
 			error = md->pmd_pcpu_init(md, cpu);
 		for (n = 0; error == 0 && n < md->pmd_nclass; n++)
 			error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu);
 	}
 	pmc_restore_cpu_binding(&pb);
 
 	if (error)
 		return (error);
 
 	/* allocate space for the sample array */
 	for (cpu = 0; cpu < maxcpu; cpu++) {
 		if (!pmc_cpu_is_active(cpu))
 			continue;
 
 		sb = malloc(sizeof(struct pmc_samplebuffer) +
 		    pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
 		    M_WAITOK|M_ZERO);
 		sb->ps_read = sb->ps_write = sb->ps_samples;
 		sb->ps_fence = sb->ps_samples + pmc_nsamples;
 
 		KASSERT(pmc_pcpu[cpu] != NULL,
 		    ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
 
 		sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples *
 		    sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO);
 
 		for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
 			ps->ps_pc = sb->ps_callchains +
 			    (n * pmc_callchaindepth);
 
 		pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb;
 
 		sb = malloc(sizeof(struct pmc_samplebuffer) +
 		    pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
 		    M_WAITOK|M_ZERO);
 		sb->ps_read = sb->ps_write = sb->ps_samples;
 		sb->ps_fence = sb->ps_samples + pmc_nsamples;
 
 		KASSERT(pmc_pcpu[cpu] != NULL,
 		    ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
 
 		sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples *
 		    sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO);
 
 		for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
 			ps->ps_pc = sb->ps_callchains +
 			    (n * pmc_callchaindepth);
 
 		pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb;
 	}
 
 	/* allocate space for the row disposition array */
 	pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc,
 	    M_PMC, M_WAITOK|M_ZERO);
 
 	/* mark all PMCs as available */
 	for (n = 0; n < (int) md->pmd_npmc; n++)
 		PMC_MARK_ROW_FREE(n);
 
 	/* allocate thread hash tables */
 	pmc_ownerhash = hashinit(pmc_hashsize, M_PMC,
 	    &pmc_ownerhashmask);
 
 	pmc_processhash = hashinit(pmc_hashsize, M_PMC,
 	    &pmc_processhashmask);
 	mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf",
 	    MTX_SPIN);
 
 	LIST_INIT(&pmc_ss_owners);
 	pmc_ss_count = 0;
 
 	/* allocate a pool of spin mutexes */
 	pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size,
 	    MTX_SPIN);
 
 	PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx "
 	    "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask,
 	    pmc_processhash, pmc_processhashmask);
 
 	/* register process {exit,fork,exec} handlers */
 	pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit,
 	    pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY);
 	pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork,
 	    pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY);
 
 	/* register kld event handlers */
 	pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload,
 	    NULL, EVENTHANDLER_PRI_ANY);
 
 	/* initialize logging */
 	pmclog_initialize();
 
 	/* set hook functions */
 	pmc_intr = md->pmd_intr;
 	pmc_hook = pmc_hook_handler;
 
 	if (error == 0) {
 		printf(PMC_MODULE_NAME ":");
 		for (n = 0; n < (int) md->pmd_nclass; n++) {
 			pcd = &md->pmd_classdep[n];
 			printf(" %s/%d/%d/0x%b",
 			    pmc_name_of_pmcclass(pcd->pcd_class),
 			    pcd->pcd_num,
 			    pcd->pcd_width,
 			    pcd->pcd_caps,
 			    "\20"
 			    "\1INT\2USR\3SYS\4EDG\5THR"
 			    "\6REA\7WRI\10INV\11QUA\12PRC"
 			    "\13TAG\14CSC");
 		}
 		printf("\n");
 	}
 
 	return (error);
 }
 
 /* prepare to be unloaded */
 static void
 pmc_cleanup(void)
 {
 	int c, cpu;
 	unsigned int maxcpu;
 	struct pmc_ownerhash *ph;
 	struct pmc_owner *po, *tmp;
 	struct pmc_binding pb;
 #ifdef	HWPMC_DEBUG
 	struct pmc_processhash *prh;
 #endif
 
 	PMCDBG0(MOD,INI,0, "cleanup");
 
 	/* switch off sampling */
 	CPU_ZERO(&pmc_cpumask);
 	pmc_intr = NULL;
 
 	sx_xlock(&pmc_sx);
 	if (pmc_hook == NULL) {	/* being unloaded already */
 		sx_xunlock(&pmc_sx);
 		return;
 	}
 
 	pmc_hook = NULL; /* prevent new threads from entering module */
 
 	/* deregister event handlers */
 	EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag);
 	EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag);
 	EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag);
 	EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag);
 
 	/* send SIGBUS to all owner threads, free up allocations */
 	if (pmc_ownerhash)
 		for (ph = pmc_ownerhash;
 		     ph <= &pmc_ownerhash[pmc_ownerhashmask];
 		     ph++) {
 			LIST_FOREACH_SAFE(po, ph, po_next, tmp) {
 				pmc_remove_owner(po);
 
 				/* send SIGBUS to owner processes */
 				PMCDBG3(MOD,INI,2, "cleanup signal proc=%p "
 				    "(%d, %s)", po->po_owner,
 				    po->po_owner->p_pid,
 				    po->po_owner->p_comm);
 
 				PROC_LOCK(po->po_owner);
 				kern_psignal(po->po_owner, SIGBUS);
 				PROC_UNLOCK(po->po_owner);
 
 				pmc_destroy_owner_descriptor(po);
 			}
 		}
 
 	/* reclaim allocated data structures */
 	if (pmc_mtxpool)
 		mtx_pool_destroy(&pmc_mtxpool);
 
 	mtx_destroy(&pmc_processhash_mtx);
 	if (pmc_processhash) {
 #ifdef	HWPMC_DEBUG
 		struct pmc_process *pp;
 
 		PMCDBG0(MOD,INI,3, "destroy process hash");
 		for (prh = pmc_processhash;
 		     prh <= &pmc_processhash[pmc_processhashmask];
 		     prh++)
 			LIST_FOREACH(pp, prh, pp_next)
 			    PMCDBG1(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid);
 #endif
 
 		hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask);
 		pmc_processhash = NULL;
 	}
 
 	if (pmc_ownerhash) {
 		PMCDBG0(MOD,INI,3, "destroy owner hash");
 		hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask);
 		pmc_ownerhash = NULL;
 	}
 
 	KASSERT(LIST_EMPTY(&pmc_ss_owners),
 	    ("[pmc,%d] Global SS owner list not empty", __LINE__));
 	KASSERT(pmc_ss_count == 0,
 	    ("[pmc,%d] Global SS count not empty", __LINE__));
 
  	/* do processor and pmc-class dependent cleanup */
 	maxcpu = pmc_cpu_max();
 
 	PMCDBG0(MOD,INI,3, "md cleanup");
 	if (md) {
 		pmc_save_cpu_binding(&pb);
 		for (cpu = 0; cpu < maxcpu; cpu++) {
 			PMCDBG2(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p",
 			    cpu, pmc_pcpu[cpu]);
 			if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL)
 				continue;
 			pmc_select_cpu(cpu);
 			for (c = 0; c < md->pmd_nclass; c++)
 				md->pmd_classdep[c].pcd_pcpu_fini(md, cpu);
 			if (md->pmd_pcpu_fini)
 				md->pmd_pcpu_fini(md, cpu);
 		}
 
 		if (md->pmd_cputype == PMC_CPU_GENERIC)
 			pmc_generic_cpu_finalize(md);
 		else
 			pmc_md_finalize(md);
 
 		pmc_mdep_free(md);
 		md = NULL;
 		pmc_restore_cpu_binding(&pb);
 	}
 
 	/* Free per-cpu descriptors. */
 	for (cpu = 0; cpu < maxcpu; cpu++) {
 		if (!pmc_cpu_is_active(cpu))
 			continue;
 		KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL,
 		    ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__,
 			cpu));
 		KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL,
 		    ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__,
 			cpu));
 		free(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC);
 		free(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC);
 		free(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC);
 		free(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC);
 		free(pmc_pcpu[cpu], M_PMC);
 	}
 
 	free(pmc_pcpu, M_PMC);
 	pmc_pcpu = NULL;
 
 	free(pmc_pcpu_saved, M_PMC);
 	pmc_pcpu_saved = NULL;
 
 	if (pmc_pmcdisp) {
 		free(pmc_pmcdisp, M_PMC);
 		pmc_pmcdisp = NULL;
 	}
 
 	if (pmc_rowindex_to_classdep) {
 		free(pmc_rowindex_to_classdep, M_PMC);
 		pmc_rowindex_to_classdep = NULL;
 	}
 
 	pmclog_shutdown();
 
 	sx_xunlock(&pmc_sx); 	/* we are done */
 }
 
 /*
  * The function called at load/unload.
  */
 
 static int
 load (struct module *module __unused, int cmd, void *arg __unused)
 {
 	int error;
 
 	error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD :
 		/* initialize the subsystem */
 		error = pmc_initialize();
 		if (error != 0)
 			break;
 		PMCDBG2(MOD,INI,1, "syscall=%d maxcpu=%d",
 		    pmc_syscall_num, pmc_cpu_max());
 		break;
 
 
 	case MOD_UNLOAD :
 	case MOD_SHUTDOWN:
 		pmc_cleanup();
 		PMCDBG0(MOD,INI,1, "unloaded");
 		break;
 
 	default :
 		error = EINVAL;	/* XXX should panic(9) */
 		break;
 	}
 
 	return error;
 }
 
 /* memory pool */
 MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");
Index: projects/powernv/dev/isp/isp.c
===================================================================
--- projects/powernv/dev/isp/isp.c	(revision 290990)
+++ projects/powernv/dev/isp/isp.c	(revision 290991)
@@ -1,8550 +1,8553 @@
 /*-
  *  Copyright (c) 1997-2009 by Matthew Jacob
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *
  *  1. Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  *
  *  THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  *  ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  *  SUCH DAMAGE.
  *
  */
 
 /*
  * Machine and OS Independent (well, as best as possible)
  * code for the Qlogic ISP SCSI and FC-SCSI adapters.
  */
 
 /*
  * Inspiration and ideas about this driver are from Erik Moe's Linux driver
  * (qlogicisp.c) and Dave Miller's SBus version of same (qlogicisp.c). Some
  * ideas dredged from the Solaris driver.
  */
 
 /*
  * Include header file appropriate for platform we're building on.
  */
 #ifdef	__NetBSD__
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD$");
 #include <dev/ic/isp_netbsd.h>
 #endif
 #ifdef	__FreeBSD__
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <dev/isp/isp_freebsd.h>
 #endif
 #ifdef	__OpenBSD__
 #include <dev/ic/isp_openbsd.h>
 #endif
 #ifdef	__linux__
 #include "isp_linux.h"
 #endif
 #ifdef	__svr4__
 #include "isp_solaris.h"
 #endif
 
 /*
  * General defines
  */
 #define	MBOX_DELAY_COUNT	1000000 / 100
 #define	ISP_MARK_PORTDB(a, b, c)				\
 	do {								\
 		isp_prt(isp, ISP_LOG_SANCFG, 				\
 		    "Chan %d ISP_MARK_PORTDB@LINE %d", (b), __LINE__);	\
 		isp_mark_portdb((a), (b), (c));				\
 	} while (0)
 
 /*
  * Local static data
  */
 static const char fconf[] = "Chan %d PortDB[%d] changed:\n current =(0x%x@0x%06x 0x%08x%08x 0x%08x%08x)\n database=(0x%x@0x%06x 0x%08x%08x 0x%08x%08x)";
 static const char notresp[] = "Not RESPONSE in RESPONSE Queue (type 0x%x) @ idx %d (next %d) nlooked %d";
 static const char topology[] = "Chan %d WWPN 0x%08x%08x PortID 0x%06x handle 0x%x, Connection '%s'";
 static const char bun[] = "bad underrun (count %d, resid %d, status %s)";
 static const char lipd[] = "Chan %d LIP destroyed %d active commands";
 static const char sacq[] = "unable to acquire scratch area";
 
 static const uint8_t alpa_map[] = {
 	0xef, 0xe8, 0xe4, 0xe2, 0xe1, 0xe0, 0xdc, 0xda,
 	0xd9, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xce,
 	0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc7, 0xc6, 0xc5,
 	0xc3, 0xbc, 0xba, 0xb9, 0xb6, 0xb5, 0xb4, 0xb3,
 	0xb2, 0xb1, 0xae, 0xad, 0xac, 0xab, 0xaa, 0xa9,
 	0xa7, 0xa6, 0xa5, 0xa3, 0x9f, 0x9e, 0x9d, 0x9b,
 	0x98, 0x97, 0x90, 0x8f, 0x88, 0x84, 0x82, 0x81,
 	0x80, 0x7c, 0x7a, 0x79, 0x76, 0x75, 0x74, 0x73,
 	0x72, 0x71, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69,
 	0x67, 0x66, 0x65, 0x63, 0x5c, 0x5a, 0x59, 0x56,
 	0x55, 0x54, 0x53, 0x52, 0x51, 0x4e, 0x4d, 0x4c,
 	0x4b, 0x4a, 0x49, 0x47, 0x46, 0x45, 0x43, 0x3c,
 	0x3a, 0x39, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31,
 	0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x27, 0x26,
 	0x25, 0x23, 0x1f, 0x1e, 0x1d, 0x1b, 0x18, 0x17,
 	0x10, 0x0f, 0x08, 0x04, 0x02, 0x01, 0x00
 };
 
 /*
  * Local function prototypes.
  */
 static int isp_parse_async(ispsoftc_t *, uint16_t);
 static int isp_parse_async_fc(ispsoftc_t *, uint16_t);
 static int isp_handle_other_response(ispsoftc_t *, int, isphdr_t *, uint32_t *);
 static void isp_parse_status(ispsoftc_t *, ispstatusreq_t *, XS_T *, long *); static void
 isp_parse_status_24xx(ispsoftc_t *, isp24xx_statusreq_t *, XS_T *, long *);
 static void isp_fastpost_complete(ispsoftc_t *, uint32_t);
 static int isp_mbox_continue(ispsoftc_t *);
 static void isp_scsi_init(ispsoftc_t *);
 static void isp_scsi_channel_init(ispsoftc_t *, int);
 static void isp_fibre_init(ispsoftc_t *);
 static void isp_fibre_init_2400(ispsoftc_t *);
 static void isp_mark_portdb(ispsoftc_t *, int, int);
 static int isp_plogx(ispsoftc_t *, int, uint16_t, uint32_t, int, int);
 static int isp_port_login(ispsoftc_t *, uint16_t, uint32_t);
 static int isp_port_logout(ispsoftc_t *, uint16_t, uint32_t);
 static int isp_getpdb(ispsoftc_t *, int, uint16_t, isp_pdb_t *, int);
 static int isp_gethandles(ispsoftc_t *, int, uint16_t *, int *, int, int);
 static void isp_dump_chip_portdb(ispsoftc_t *, int, int);
 static uint64_t isp_get_wwn(ispsoftc_t *, int, int, int);
 static int isp_fclink_test(ispsoftc_t *, int, int);
 static int isp_pdb_sync(ispsoftc_t *, int);
 static int isp_scan_loop(ispsoftc_t *, int);
 static int isp_gid_ft_sns(ispsoftc_t *, int);
 static int isp_gid_ft_ct_passthru(ispsoftc_t *, int);
 static int isp_scan_fabric(ispsoftc_t *, int);
 static int isp_login_device(ispsoftc_t *, int, uint32_t, isp_pdb_t *, uint16_t *);
 static int isp_register_fc4_type(ispsoftc_t *, int);
 static int isp_register_fc4_type_24xx(ispsoftc_t *, int);
 static uint16_t isp_next_handle(ispsoftc_t *, uint16_t *);
 static void isp_fw_state(ispsoftc_t *, int);
 static void isp_mboxcmd_qnw(ispsoftc_t *, mbreg_t *, int);
 static void isp_mboxcmd(ispsoftc_t *, mbreg_t *);
 
 static void isp_spi_update(ispsoftc_t *, int);
 static void isp_setdfltsdparm(ispsoftc_t *);
 static void isp_setdfltfcparm(ispsoftc_t *, int);
 static int isp_read_nvram(ispsoftc_t *, int);
 static int isp_read_nvram_2400(ispsoftc_t *, uint8_t *);
 static void isp_rdnvram_word(ispsoftc_t *, int, uint16_t *);
 static void isp_rd_2400_nvram(ispsoftc_t *, uint32_t, uint32_t *);
 static void isp_parse_nvram_1020(ispsoftc_t *, uint8_t *);
 static void isp_parse_nvram_1080(ispsoftc_t *, int, uint8_t *);
 static void isp_parse_nvram_12160(ispsoftc_t *, int, uint8_t *);
 static void isp_parse_nvram_2100(ispsoftc_t *, uint8_t *);
 static void isp_parse_nvram_2400(ispsoftc_t *, uint8_t *);
 
 /*
  * Reset Hardware.
  *
  * Hit the chip over the head, download new f/w if available and set it running.
  *
  * Locking done elsewhere.
  */
 
 void
 isp_reset(ispsoftc_t *isp, int do_load_defaults)
 {
 	mbreg_t mbs;
 	char *buf;
 	uint64_t fwt;
 	uint32_t code_org, val;
 	int loops, i, dodnld = 1;
 	const char *btype = "????";
 	static const char dcrc[] = "Downloaded RISC Code Checksum Failure";
 
 	isp->isp_state = ISP_NILSTATE;
 	if (isp->isp_dead) {
 		isp_shutdown(isp);
 		ISP_DISABLE_INTS(isp);
 		return;
 	}
 
 	/*
 	 * Basic types (SCSI, FibreChannel and PCI or SBus)
 	 * have been set in the MD code. We figure out more
 	 * here. Possibly more refined types based upon PCI
 	 * identification. Chip revision has been gathered.
 	 *
 	 * After we've fired this chip up, zero out the conf1 register
 	 * for SCSI adapters and do other settings for the 2100.
 	 */
 
 	ISP_DISABLE_INTS(isp);
 
 	/*
 	 * Pick an initial maxcmds value which will be used
 	 * to allocate xflist pointer space. It may be changed
 	 * later by the firmware.
 	 */
 	if (IS_24XX(isp)) {
 		isp->isp_maxcmds = 4096;
 	} else if (IS_2322(isp)) {
 		isp->isp_maxcmds = 2048;
 	} else if (IS_23XX(isp) || IS_2200(isp)) {
 		isp->isp_maxcmds = 1024;
  	} else {
 		isp->isp_maxcmds = 512;
 	}
 
 	/*
 	 * Set up DMA for the request and response queues.
 	 *
 	 * We do this now so we can use the request queue
 	 * for dma to load firmware from.
 	 */
 	if (ISP_MBOXDMASETUP(isp) != 0) {
 		isp_prt(isp, ISP_LOGERR, "Cannot setup DMA");
 		return;
 	}
 
 	/*
 	 * Set up default request/response queue in-pointer/out-pointer
 	 * register indices.
 	 */
 	if (IS_24XX(isp)) {
 		isp->isp_rqstinrp = BIU2400_REQINP;
 		isp->isp_rqstoutrp = BIU2400_REQOUTP;
 		isp->isp_respinrp = BIU2400_RSPINP;
 		isp->isp_respoutrp = BIU2400_RSPOUTP;
 	} else if (IS_23XX(isp)) {
 		isp->isp_rqstinrp = BIU_REQINP;
 		isp->isp_rqstoutrp = BIU_REQOUTP;
 		isp->isp_respinrp = BIU_RSPINP;
 		isp->isp_respoutrp = BIU_RSPOUTP;
 	} else {
 		isp->isp_rqstinrp = INMAILBOX4;
 		isp->isp_rqstoutrp = OUTMAILBOX4;
 		isp->isp_respinrp = OUTMAILBOX5;
 		isp->isp_respoutrp = INMAILBOX5;
 	}
 
 	/*
 	 * Put the board into PAUSE mode (so we can read the SXP registers
 	 * or write FPM/FBM registers).
 	 */
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_HOST_INT);
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT);
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_PAUSE);
 	} else {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_PAUSE);
 	}
 
 	if (IS_FC(isp)) {
 		switch (isp->isp_type) {
 		case ISP_HA_FC_2100:
 			btype = "2100";
 			break;
 		case ISP_HA_FC_2200:
 			btype = "2200";
 			break;
 		case ISP_HA_FC_2300:
 			btype = "2300";
 			break;
 		case ISP_HA_FC_2312:
 			btype = "2312";
 			break;
 		case ISP_HA_FC_2322:
 			btype = "2322";
 			break;
 		case ISP_HA_FC_2400:
 			btype = "2422";
 			break;
 		case ISP_HA_FC_2500:
 			btype = "2532";
 			break;
 		default:
 			break;
 		}
 
 		if (!IS_24XX(isp)) {
 			/*
 			 * While we're paused, reset the FPM module and FBM
 			 * fifos.
 			 */
 			ISP_WRITE(isp, BIU2100_CSR, BIU2100_FPM0_REGS);
 			ISP_WRITE(isp, FPM_DIAG_CONFIG, FPM_SOFT_RESET);
 			ISP_WRITE(isp, BIU2100_CSR, BIU2100_FB_REGS);
 			ISP_WRITE(isp, FBM_CMD, FBMCMD_FIFO_RESET_ALL);
 			ISP_WRITE(isp, BIU2100_CSR, BIU2100_RISC_REGS);
 		}
 	} else if (IS_1240(isp)) {
 		sdparam *sdp;
 
 		btype = "1240";
 		isp->isp_clock = 60;
 		sdp = SDPARAM(isp, 0);
 		sdp->isp_ultramode = 1;
 		sdp = SDPARAM(isp, 1);
 		sdp->isp_ultramode = 1;
 		/*
 		 * XXX: Should probably do some bus sensing.
 		 */
 	} else if (IS_ULTRA3(isp)) {
 		sdparam *sdp = isp->isp_param;
 
 		isp->isp_clock = 100;
 
 		if (IS_10160(isp))
 			btype = "10160";
 		else if (IS_12160(isp))
 			btype = "12160";
 		else
 			btype = "<UNKLVD>";
 		sdp->isp_lvdmode = 1;
 
 		if (IS_DUALBUS(isp)) {
 			sdp++;
 			sdp->isp_lvdmode = 1;
 		}
 	} else if (IS_ULTRA2(isp)) {
 		static const char m[] = "bus %d is in %s Mode";
 		uint16_t l;
 		sdparam *sdp = SDPARAM(isp, 0);
 
 		isp->isp_clock = 100;
 
 		if (IS_1280(isp))
 			btype = "1280";
 		else if (IS_1080(isp))
 			btype = "1080";
 		else
 			btype = "<UNKLVD>";
 
 		l = ISP_READ(isp, SXP_PINS_DIFF) & ISP1080_MODE_MASK;
 		switch (l) {
 		case ISP1080_LVD_MODE:
 			sdp->isp_lvdmode = 1;
 			isp_prt(isp, ISP_LOGCONFIG, m, 0, "LVD");
 			break;
 		case ISP1080_HVD_MODE:
 			sdp->isp_diffmode = 1;
 			isp_prt(isp, ISP_LOGCONFIG, m, 0, "Differential");
 			break;
 		case ISP1080_SE_MODE:
 			sdp->isp_ultramode = 1;
 			isp_prt(isp, ISP_LOGCONFIG, m, 0, "Single-Ended");
 			break;
 		default:
 			isp_prt(isp, ISP_LOGERR,
 			    "unknown mode on bus %d (0x%x)", 0, l);
 			break;
 		}
 
 		if (IS_DUALBUS(isp)) {
 			sdp = SDPARAM(isp, 1);
 			l = ISP_READ(isp, SXP_PINS_DIFF|SXP_BANK1_SELECT);
 			l &= ISP1080_MODE_MASK;
 			switch (l) {
 			case ISP1080_LVD_MODE:
 				sdp->isp_lvdmode = 1;
 				isp_prt(isp, ISP_LOGCONFIG, m, 1, "LVD");
 				break;
 			case ISP1080_HVD_MODE:
 				sdp->isp_diffmode = 1;
 				isp_prt(isp, ISP_LOGCONFIG,
 				    m, 1, "Differential");
 				break;
 			case ISP1080_SE_MODE:
 				sdp->isp_ultramode = 1;
 				isp_prt(isp, ISP_LOGCONFIG,
 				    m, 1, "Single-Ended");
 				break;
 			default:
 				isp_prt(isp, ISP_LOGERR,
 				    "unknown mode on bus %d (0x%x)", 1, l);
 				break;
 			}
 		}
 	} else {
 		sdparam *sdp = SDPARAM(isp, 0);
 		i = ISP_READ(isp, BIU_CONF0) & BIU_CONF0_HW_MASK;
 		switch (i) {
 		default:
 			isp_prt(isp, ISP_LOGALL, "Unknown Chip Type 0x%x", i);
 			/* FALLTHROUGH */
 		case 1:
 			btype = "1020";
 			isp->isp_type = ISP_HA_SCSI_1020;
 			isp->isp_clock = 40;
 			break;
 		case 2:
 			/*
 			 * Some 1020A chips are Ultra Capable, but don't
 			 * run the clock rate up for that unless told to
 			 * do so by the Ultra Capable bits being set.
 			 */
 			btype = "1020A";
 			isp->isp_type = ISP_HA_SCSI_1020A;
 			isp->isp_clock = 40;
 			break;
 		case 3:
 			btype = "1040";
 			isp->isp_type = ISP_HA_SCSI_1040;
 			isp->isp_clock = 60;
 			break;
 		case 4:
 			btype = "1040A";
 			isp->isp_type = ISP_HA_SCSI_1040A;
 			isp->isp_clock = 60;
 			break;
 		case 5:
 			btype = "1040B";
 			isp->isp_type = ISP_HA_SCSI_1040B;
 			isp->isp_clock = 60;
 			break;
 		case 6:
 			btype = "1040C";
 			isp->isp_type = ISP_HA_SCSI_1040C;
 			isp->isp_clock = 60;
                         break;
 		}
 		/*
 		 * Now, while we're at it, gather info about ultra
 		 * and/or differential mode.
 		 */
 		if (ISP_READ(isp, SXP_PINS_DIFF) & SXP_PINS_DIFF_MODE) {
 			isp_prt(isp, ISP_LOGCONFIG, "Differential Mode");
 			sdp->isp_diffmode = 1;
 		} else {
 			sdp->isp_diffmode = 0;
 		}
 		i = ISP_READ(isp, RISC_PSR);
 		if (isp->isp_bustype == ISP_BT_SBUS) {
 			i &= RISC_PSR_SBUS_ULTRA;
 		} else {
 			i &= RISC_PSR_PCI_ULTRA;
 		}
 		if (i != 0) {
 			isp_prt(isp, ISP_LOGCONFIG, "Ultra Mode Capable");
 			sdp->isp_ultramode = 1;
 			/*
 			 * If we're in Ultra Mode, we have to be 60MHz clock-
 			 * even for the SBus version.
 			 */
 			isp->isp_clock = 60;
 		} else {
 			sdp->isp_ultramode = 0;
 			/*
 			 * Clock is known. Gronk.
 			 */
 		}
 
 		/*
 		 * Machine dependent clock (if set) overrides
 		 * our generic determinations.
 		 */
 		if (isp->isp_mdvec->dv_clock) {
 			if (isp->isp_mdvec->dv_clock < isp->isp_clock) {
 				isp->isp_clock = isp->isp_mdvec->dv_clock;
 			}
 		}
 
 	}
 
 	/*
 	 * Clear instrumentation
 	 */
 	isp->isp_intcnt = isp->isp_intbogus = 0;
 
 	/*
 	 * Do MD specific pre initialization
 	 */
 	ISP_RESET0(isp);
 
 	/*
 	 * Hit the chip over the head with hammer,
 	 * and give it a chance to recover.
 	 */
 
 	if (IS_SCSI(isp)) {
 		ISP_WRITE(isp, BIU_ICR, BIU_ICR_SOFT_RESET);
 		/*
 		 * A slight delay...
 		 */
 		ISP_DELAY(100);
 
 		/*
 		 * Clear data && control DMA engines.
 		 */
 		ISP_WRITE(isp, CDMA_CONTROL, DMA_CNTRL_CLEAR_CHAN | DMA_CNTRL_RESET_INT);
 		ISP_WRITE(isp, DDMA_CONTROL, DMA_CNTRL_CLEAR_CHAN | DMA_CNTRL_RESET_INT);
 
 
 	} else if (IS_24XX(isp)) {
 		/*
 		 * Stop DMA and wait for it to stop.
 		 */
 		ISP_WRITE(isp, BIU2400_CSR, BIU2400_DMA_STOP|(3 << 4));
 		for (val = loops = 0; loops < 30000; loops++) {
 			ISP_DELAY(10);
 			val = ISP_READ(isp, BIU2400_CSR);
 			if ((val & BIU2400_DMA_ACTIVE) == 0) {
 				break;
 			}
 		}
 		if (val & BIU2400_DMA_ACTIVE) {
 			ISP_RESET0(isp);
 			isp_prt(isp, ISP_LOGERR, "DMA Failed to Stop on Reset");
 			return;
 		}
 		/*
 		 * Hold it in SOFT_RESET and STOP state for 100us.
 		 */
 		ISP_WRITE(isp, BIU2400_CSR, BIU2400_SOFT_RESET|BIU2400_DMA_STOP|(3 << 4));
 		ISP_DELAY(100);
 		for (loops = 0; loops < 10000; loops++) {
 			ISP_DELAY(5);
 			val = ISP_READ(isp, OUTMAILBOX0);
 		}
 		for (val = loops = 0; loops < 500000; loops ++) {
 			val = ISP_READ(isp, BIU2400_CSR);
 			if ((val & BIU2400_SOFT_RESET) == 0) {
 				break;
 			}
 		}
 		if (val & BIU2400_SOFT_RESET) {
 			ISP_RESET0(isp);
 			isp_prt(isp, ISP_LOGERR, "Failed to come out of reset");
 			return;
 		}
 	} else {
 		ISP_WRITE(isp, BIU2100_CSR, BIU2100_SOFT_RESET);
 		/*
 		 * A slight delay...
 		 */
 		ISP_DELAY(100);
 
 		/*
 		 * Clear data && control DMA engines.
 		 */
 		ISP_WRITE(isp, CDMA2100_CONTROL, DMA_CNTRL2100_CLEAR_CHAN | DMA_CNTRL2100_RESET_INT);
 		ISP_WRITE(isp, TDMA2100_CONTROL, DMA_CNTRL2100_CLEAR_CHAN | DMA_CNTRL2100_RESET_INT);
 		ISP_WRITE(isp, RDMA2100_CONTROL, DMA_CNTRL2100_CLEAR_CHAN | DMA_CNTRL2100_RESET_INT);
 	}
 
 	/*
 	 * Wait for ISP to be ready to go...
 	 */
 	loops = MBOX_DELAY_COUNT;
 	for (;;) {
 		if (IS_SCSI(isp)) {
 			if (!(ISP_READ(isp, BIU_ICR) & BIU_ICR_SOFT_RESET)) {
 				break;
 			}
 		} else if (IS_24XX(isp)) {
 			if (ISP_READ(isp, OUTMAILBOX0) == 0) {
 				break;
 			}
 		} else {
 			if (!(ISP_READ(isp, BIU2100_CSR) & BIU2100_SOFT_RESET))
 				break;
 		}
 		ISP_DELAY(100);
 		if (--loops < 0) {
 			ISP_DUMPREGS(isp, "chip reset timed out");
 			ISP_RESET0(isp);
 			return;
 		}
 	}
 
 	/*
 	 * After we've fired this chip up, zero out the conf1 register
 	 * for SCSI adapters and other settings for the 2100.
 	 */
 
 	if (IS_SCSI(isp)) {
 		ISP_WRITE(isp, BIU_CONF1, 0);
 	} else if (!IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2100_CSR, 0);
 	}
 
 	/*
 	 * Reset RISC Processor
 	 */
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_RESET);
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_RELEASE);
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RESET);
 	} else {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_RESET);
 		ISP_DELAY(100);
 		ISP_WRITE(isp, BIU_SEMA, 0);
 	}
 
 	/*
 	 * Post-RISC Reset stuff.
 	 */
 	if (IS_24XX(isp)) {
 		for (val = loops = 0; loops < 5000000; loops++) {
 			ISP_DELAY(5);
 			val = ISP_READ(isp, OUTMAILBOX0);
 			if (val == 0) {
 				break;
 			}
 		}
 		if (val != 0) {
 			ISP_RESET0(isp);
 			isp_prt(isp, ISP_LOGERR, "reset didn't clear");
 			return;
 		}
 	} else if (IS_SCSI(isp)) {
 		uint16_t tmp = isp->isp_mdvec->dv_conf1;
 		/*
 		 * Busted FIFO. Turn off all but burst enables.
 		 */
 		if (isp->isp_type == ISP_HA_SCSI_1040A) {
 			tmp &= BIU_BURST_ENABLE;
 		}
 		ISP_SETBITS(isp, BIU_CONF1, tmp);
 		if (tmp & BIU_BURST_ENABLE) {
 			ISP_SETBITS(isp, CDMA_CONF, DMA_ENABLE_BURST);
 			ISP_SETBITS(isp, DDMA_CONF, DMA_ENABLE_BURST);
 		}
 		if (SDPARAM(isp, 0)->isp_ptisp) {
 			if (SDPARAM(isp, 0)->isp_ultramode) {
 				while (ISP_READ(isp, RISC_MTR) != 0x1313) {
 					ISP_WRITE(isp, RISC_MTR, 0x1313);
 					ISP_WRITE(isp, HCCR, HCCR_CMD_STEP);
 				}
 			} else {
 				ISP_WRITE(isp, RISC_MTR, 0x1212);
 			}
 			/*
 			 * PTI specific register
 			 */
 			ISP_WRITE(isp, RISC_EMB, DUAL_BANK);
 		} else {
 			ISP_WRITE(isp, RISC_MTR, 0x1212);
 		}
 		ISP_WRITE(isp, HCCR, HCCR_CMD_RELEASE);
 	} else {
 		ISP_WRITE(isp, RISC_MTR2100, 0x1212);
 		if (IS_2200(isp) || IS_23XX(isp)) {
 			ISP_WRITE(isp, HCCR, HCCR_2X00_DISABLE_PARITY_PAUSE);
 		}
 		ISP_WRITE(isp, HCCR, HCCR_CMD_RELEASE);
 	}
 
 	ISP_WRITE(isp, isp->isp_rqstinrp, 0);
 	ISP_WRITE(isp, isp->isp_rqstoutrp, 0);
 	ISP_WRITE(isp, isp->isp_respinrp, 0);
 	ISP_WRITE(isp, isp->isp_respoutrp, 0);
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_PRI_REQINP, 0);
 		ISP_WRITE(isp, BIU2400_PRI_REQOUTP, 0);
 		ISP_WRITE(isp, BIU2400_ATIO_RSPINP, 0);
 		ISP_WRITE(isp, BIU2400_ATIO_RSPOUTP, 0);
 	}
 
 	/*
 	 * Do MD specific post initialization
 	 */
 	ISP_RESET1(isp);
 
 	/*
 	 * Wait for everything to finish firing up.
 	 *
 	 * Avoid doing this on early 2312s because you can generate a PCI
 	 * parity error (chip breakage).
 	 */
 	if (IS_2312(isp) && isp->isp_revision < 2) {
 		ISP_DELAY(100);
 	} else {
 		loops = MBOX_DELAY_COUNT;
 		while (ISP_READ(isp, OUTMAILBOX0) == MBOX_BUSY) {
 			ISP_DELAY(100);
 			if (--loops < 0) {
 				ISP_RESET0(isp);
 				isp_prt(isp, ISP_LOGERR, "MBOX_BUSY never cleared on reset");
 				return;
 			}
 		}
 	}
 
 	/*
 	 * Up until this point we've done everything by just reading or
 	 * setting registers. From this point on we rely on at least *some*
 	 * kind of firmware running in the card.
 	 */
 
 	/*
 	 * Do some sanity checking by running a NOP command.
 	 * If it succeeds, the ROM firmware is now running.
 	 */
 	MBSINIT(&mbs, MBOX_NO_OP, MBLOGALL, 0);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		isp_prt(isp, ISP_LOGERR, "NOP command failed (%x)", mbs.param[0]);
 		ISP_RESET0(isp);
 		return;
 	}
 
 	/*
 	 * Do some operational tests
 	 */
 
 	if (IS_SCSI(isp) || IS_24XX(isp)) {
 		static const uint16_t patterns[MAX_MAILBOX] = {
 			0x0000, 0xdead, 0xbeef, 0xffff,
 			0xa5a5, 0x5a5a, 0x7f7f, 0x7ff7,
 			0x3421, 0xabcd, 0xdcba, 0xfeef,
 			0xbead, 0xdebe, 0x2222, 0x3333,
 			0x5555, 0x6666, 0x7777, 0xaaaa,
 			0xffff, 0xdddd, 0x9999, 0x1fbc,
 			0x6666, 0x6677, 0x1122, 0x33ff,
 			0x0000, 0x0001, 0x1000, 0x1010,
 		};
 		int nmbox = ISP_NMBOX(isp);
 		if (IS_SCSI(isp))
 			nmbox = 6;
 		MBSINIT(&mbs, MBOX_MAILBOX_REG_TEST, MBLOGALL, 0);
 		for (i = 1; i < nmbox; i++) {
 			mbs.param[i] = patterns[i];
 		}
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			ISP_RESET0(isp);
 			return;
 		}
 		for (i = 1; i < nmbox; i++) {
 			if (mbs.param[i] != patterns[i]) {
 				ISP_RESET0(isp);
 				isp_prt(isp, ISP_LOGERR, "Register Test Failed at Register %d: should have 0x%04x but got 0x%04x", i, patterns[i], mbs.param[i]);
 				return;
 			}
 		}
 	}
 
 	/*
 	 * Download new Firmware, unless requested not to do so.
 	 * This is made slightly trickier in some cases where the
 	 * firmware of the ROM revision is newer than the revision
 	 * compiled into the driver. So, where we used to compare
 	 * versions of our f/w and the ROM f/w, now we just see
 	 * whether we have f/w at all and whether a config flag
 	 * has disabled our download.
 	 */
 	if ((isp->isp_mdvec->dv_ispfw == NULL) || (isp->isp_confopts & ISP_CFG_NORELOAD)) {
 		dodnld = 0;
 	}
 
 	if (IS_24XX(isp)) {
 		code_org = ISP_CODE_ORG_2400;
 	} else if (IS_23XX(isp)) {
 		code_org = ISP_CODE_ORG_2300;
 	} else {
 		code_org = ISP_CODE_ORG;
 	}
 
 	if (dodnld && IS_24XX(isp)) {
 		const uint32_t *ptr = isp->isp_mdvec->dv_ispfw;
 		int wordload;
 
 		/*
 		 * Keep loading until we run out of f/w.
 		 */
 		code_org = ptr[2];	/* 1st load address is our start addr */
 		wordload = 0;
 
 		for (;;) {
 			uint32_t la, wi, wl;
 
 			isp_prt(isp, ISP_LOGDEBUG0, "load 0x%x words of code at load address 0x%x", ptr[3], ptr[2]);
 
 			wi = 0;
 			la = ptr[2];
 			wl = ptr[3];
 
 			while (wi < ptr[3]) {
 				uint32_t *cp;
 				uint32_t nw;
 
 				nw = ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)) >> 2;
 				if (nw > wl) {
 					nw = wl;
 				}
 				cp = isp->isp_rquest;
 				for (i = 0; i < nw; i++) {
 					ISP_IOXPUT_32(isp,  ptr[wi++], &cp[i]);
 					wl--;
 				}
 				MEMORYBARRIER(isp, SYNC_REQUEST, 0, ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)), -1);
 	again:
 				MBSINIT(&mbs, 0, MBLOGALL, 0);
 				if (la < 0x10000 && nw < 0x10000) {
 					mbs.param[0] = MBOX_LOAD_RISC_RAM_2100;
 					mbs.param[1] = la;
 					mbs.param[2] = DMA_WD1(isp->isp_rquest_dma);
 					mbs.param[3] = DMA_WD0(isp->isp_rquest_dma);
 					mbs.param[4] = nw;
 					mbs.param[6] = DMA_WD3(isp->isp_rquest_dma);
 					mbs.param[7] = DMA_WD2(isp->isp_rquest_dma);
 					isp_prt(isp, ISP_LOGDEBUG0, "LOAD RISC RAM 2100 %u words at load address 0x%x", nw, la);
 				} else if (wordload) {
 					union {
 						const uint32_t *cp;
 						uint32_t *np;
 					} ucd;
 					ucd.cp = (const uint32_t *)cp;
 					mbs.param[0] = MBOX_WRITE_RAM_WORD_EXTENDED;
 					mbs.param[1] = la;
 					mbs.param[2] = (*ucd.np);
 					mbs.param[3] = (*ucd.np) >> 16;
 					mbs.param[8] = la >> 16;
 					isp->isp_mbxwrk0 = nw - 1;
 					isp->isp_mbxworkp = ucd.np+1;
 					isp->isp_mbxwrk1 = (la + 1);
 					isp->isp_mbxwrk8 = (la + 1) >> 16;
 					isp_prt(isp, ISP_LOGDEBUG0, "WRITE RAM WORD EXTENDED %u words at load address 0x%x", nw, la);
 				} else {
 					mbs.param[0] = MBOX_LOAD_RISC_RAM;
 					mbs.param[1] = la;
 					mbs.param[2] = DMA_WD1(isp->isp_rquest_dma);
 					mbs.param[3] = DMA_WD0(isp->isp_rquest_dma);
 					mbs.param[4] = nw >> 16;
 					mbs.param[5] = nw;
 					mbs.param[6] = DMA_WD3(isp->isp_rquest_dma);
 					mbs.param[7] = DMA_WD2(isp->isp_rquest_dma);
 					mbs.param[8] = la >> 16;
 					isp_prt(isp, ISP_LOGDEBUG0, "LOAD RISC RAM %u words at load address 0x%x", nw, la);
 				}
 				isp_mboxcmd(isp, &mbs);
 				if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 					if (mbs.param[0] == MBOX_HOST_INTERFACE_ERROR) {
 						isp_prt(isp, ISP_LOGERR, "switching to word load");
 						wordload = 1;
 						goto again;
 					}
 					isp_prt(isp, ISP_LOGERR, "F/W Risc Ram Load Failed");
 					ISP_RESET0(isp);
 					return;
 				}
 				la += nw;
 			}
 
 			if (ptr[1] == 0) {
 				break;
 			}
 			ptr += ptr[3];
 		}
 		isp->isp_loaded_fw = 1;
 	} else if (dodnld && IS_23XX(isp)) {
 		const uint16_t *ptr = isp->isp_mdvec->dv_ispfw;
 		uint16_t wi, wl, segno;
 		uint32_t la;
 
 		la = code_org;
 		segno = 0;
 
 		for (;;) {
 			uint32_t nxtaddr;
 
 			isp_prt(isp, ISP_LOGDEBUG0, "load 0x%x words of code at load address 0x%x", ptr[3], la);
 
 			wi = 0;
 			wl = ptr[3];
 
 			while (wi < ptr[3]) {
 				uint16_t *cp;
 				uint16_t nw;
 
 				nw = ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)) >> 1;
 				if (nw > wl) {
 					nw = wl;
 				}
 				if (nw > (1 << 15)) {
 					nw = 1 << 15;
 				}
 				cp = isp->isp_rquest;
 				for (i = 0; i < nw; i++) {
 					ISP_IOXPUT_16(isp,  ptr[wi++], &cp[i]);
 					wl--;
 				}
 				MEMORYBARRIER(isp, SYNC_REQUEST, 0, ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)), -1);
 				MBSINIT(&mbs, 0, MBLOGALL, 0);
 				if (la < 0x10000) {
 					mbs.param[0] = MBOX_LOAD_RISC_RAM_2100;
 					mbs.param[1] = la;
 					mbs.param[2] = DMA_WD1(isp->isp_rquest_dma);
 					mbs.param[3] = DMA_WD0(isp->isp_rquest_dma);
 					mbs.param[4] = nw;
 					mbs.param[6] = DMA_WD3(isp->isp_rquest_dma);
 					mbs.param[7] = DMA_WD2(isp->isp_rquest_dma);
 					isp_prt(isp, ISP_LOGDEBUG1, "LOAD RISC RAM 2100 %u words at load address 0x%x\n", nw, la);
 				} else {
 					mbs.param[0] = MBOX_LOAD_RISC_RAM;
 					mbs.param[1] = la;
 					mbs.param[2] = DMA_WD1(isp->isp_rquest_dma);
 					mbs.param[3] = DMA_WD0(isp->isp_rquest_dma);
 					mbs.param[4] = nw;
 					mbs.param[6] = DMA_WD3(isp->isp_rquest_dma);
 					mbs.param[7] = DMA_WD2(isp->isp_rquest_dma);
 					mbs.param[8] = la >> 16;
 					isp_prt(isp, ISP_LOGDEBUG1, "LOAD RISC RAM %u words at load address 0x%x\n", nw, la);
 				}
 				isp_mboxcmd(isp, &mbs);
 				if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 					isp_prt(isp, ISP_LOGERR, "F/W Risc Ram Load Failed");
 					ISP_RESET0(isp);
 					return;
 				}
 				la += nw;
 			}
 
 			if (!IS_2322(isp)) {
 				break;
 			}
 
 			if (++segno == 3) {
 				break;
 			}
 
 			/*
 			 * If we're a 2322, the firmware actually comes in
 			 * three chunks. We loaded the first at the code_org
 			 * address. The other two chunks, which follow right
 			 * after each other in memory here, get loaded at
 			 * addresses specfied at offset 0x9..0xB.
 			 */
 
 			nxtaddr = ptr[3];
 			ptr = &ptr[nxtaddr];
 			la = ptr[5] | ((ptr[4] & 0x3f) << 16);
 		}
 		isp->isp_loaded_fw = 1;
 	} else if (dodnld) {
 		union {
 			const uint16_t *cp;
 			uint16_t *np;
 		} ucd;
 		ucd.cp = isp->isp_mdvec->dv_ispfw;
 		isp->isp_mbxworkp = &ucd.np[1];
 		isp->isp_mbxwrk0 = ucd.np[3] - 1;
 		isp->isp_mbxwrk1 = code_org + 1;
 		MBSINIT(&mbs, MBOX_WRITE_RAM_WORD, MBLOGNONE, 0);
 		mbs.param[1] = code_org;
 		mbs.param[2] = ucd.np[0];
 		isp_prt(isp, ISP_LOGDEBUG1, "WRITE RAM %u words at load address 0x%x", ucd.np[3], code_org);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			isp_prt(isp, ISP_LOGERR, "F/W download failed at word %d", isp->isp_mbxwrk1 - code_org);
 			ISP_RESET0(isp);
 			return;
 		}
 	} else {
 		isp->isp_loaded_fw = 0;
 		isp_prt(isp, ISP_LOGDEBUG2, "skipping f/w download");
 	}
 
 	/*
 	 * If we loaded firmware, verify its checksum
 	 */
 	if (isp->isp_loaded_fw) {
 		MBSINIT(&mbs, MBOX_VERIFY_CHECKSUM, MBLOGNONE, 0);
 		mbs.param[0] = MBOX_VERIFY_CHECKSUM;
 		if (IS_24XX(isp)) {
 			mbs.param[1] = code_org >> 16;
 			mbs.param[2] = code_org;
 		} else {
 			mbs.param[1] = code_org;
 		}
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			isp_prt(isp, ISP_LOGERR, dcrc);
 			ISP_RESET0(isp);
 			return;
 		}
 	}
 
 	/*
 	 * Now start it rolling.
 	 *
 	 * If we didn't actually download f/w,
 	 * we still need to (re)start it.
 	 */
 
 
 	MBSINIT(&mbs, MBOX_EXEC_FIRMWARE, MBLOGALL, 5000000);
 	if (IS_24XX(isp)) {
 		mbs.param[1] = code_org >> 16;
 		mbs.param[2] = code_org;
 		if (isp->isp_loaded_fw) {
 			mbs.param[3] = 0;
 		} else {
 			mbs.param[3] = 1;
 		}
 		if (IS_25XX(isp)) {
 			mbs.ibits |= 0x10;
 		}
 	} else if (IS_2322(isp)) {
 		mbs.param[1] = code_org;
 		if (isp->isp_loaded_fw) {
 			mbs.param[2] = 0;
 		} else {
 			mbs.param[2] = 1;
 		}
 	} else {
 		mbs.param[1] = code_org;
 	}
 	isp_mboxcmd(isp, &mbs);
 	if (IS_2322(isp) || IS_24XX(isp)) {
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			ISP_RESET0(isp);
 			return;
 		}
 	}
 
 	if (IS_SCSI(isp)) {
 		/*
 		 * Set CLOCK RATE, but only if asked to.
 		 */
 		if (isp->isp_clock) {
 			MBSINIT(&mbs, MBOX_SET_CLOCK_RATE, MBLOGALL, 0);
 			mbs.param[1] = isp->isp_clock;
 			isp_mboxcmd(isp, &mbs);
 			/* we will try not to care if this fails */
 		}
 	}
 
 	/*
 	 * Ask the chip for the current firmware version.
 	 * This should prove that the new firmware is working.
 	 */
 	MBSINIT(&mbs, MBOX_ABOUT_FIRMWARE, MBLOGALL, 0);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		ISP_RESET0(isp);
 		return;
 	}
 
 	/*
 	 * The SBus firmware that we are using apparently does not return
 	 * major, minor, micro revisions in the mailbox registers, which
 	 * is really, really, annoying.
 	 */
 	if (ISP_SBUS_SUPPORTED && isp->isp_bustype == ISP_BT_SBUS) {
 		if (dodnld) {
 #ifdef	ISP_TARGET_MODE
 			isp->isp_fwrev[0] = 7;
 			isp->isp_fwrev[1] = 55;
 #else
 			isp->isp_fwrev[0] = 1;
 			isp->isp_fwrev[1] = 37;
 #endif
 			isp->isp_fwrev[2] = 0;
 		}
 	} else {
 		isp->isp_fwrev[0] = mbs.param[1];
 		isp->isp_fwrev[1] = mbs.param[2];
 		isp->isp_fwrev[2] = mbs.param[3];
 	}
 
 	if (IS_FC(isp)) {
 		/*
 		 * We do not believe firmware attributes for 2100 code less
 		 * than 1.17.0, unless it's the firmware we specifically
 		 * are loading.
 		 *
 		 * Note that all 22XX and later f/w is greater than 1.X.0.
 		 */
 		if ((ISP_FW_OLDER_THAN(isp, 1, 17, 1))) {
 #ifdef	USE_SMALLER_2100_FIRMWARE
 			isp->isp_fwattr = ISP_FW_ATTR_SCCLUN;
 #else
 			isp->isp_fwattr = 0;
 #endif
 		} else {
 			isp->isp_fwattr = mbs.param[6];
 		}
 		if (IS_24XX(isp)) {
 			isp->isp_fwattr |= ((uint64_t) mbs.param[15]) << 16;
 			if (isp->isp_fwattr & ISP2400_FW_ATTR_EXTNDED) {
 				isp->isp_fwattr |=
 				    (((uint64_t) mbs.param[16]) << 32) |
 				    (((uint64_t) mbs.param[17]) << 48);
 			}
 		}
 	} else if (IS_SCSI(isp)) {
 #ifndef	ISP_TARGET_MODE
 		isp->isp_fwattr = ISP_FW_ATTR_TMODE;
 #else
 		isp->isp_fwattr = 0;
 #endif
 	}
 
 	isp_prt(isp, ISP_LOGCONFIG, "Board Type %s, Chip Revision 0x%x, %s F/W Revision %d.%d.%d",
 	    btype, isp->isp_revision, dodnld? "loaded" : "resident", isp->isp_fwrev[0], isp->isp_fwrev[1], isp->isp_fwrev[2]);
 
 	fwt = isp->isp_fwattr;
 	if (IS_24XX(isp)) {
 		buf = FCPARAM(isp, 0)->isp_scratch;
 		ISP_SNPRINTF(buf, ISP_FC_SCRLEN, "Attributes:");
 		if (fwt & ISP2400_FW_ATTR_CLASS2) {
 			fwt ^=ISP2400_FW_ATTR_CLASS2;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s Class2", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_IP) {
 			fwt ^=ISP2400_FW_ATTR_IP;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s IP", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_MULTIID) {
 			fwt ^=ISP2400_FW_ATTR_MULTIID;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s MultiID", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_SB2) {
 			fwt ^=ISP2400_FW_ATTR_SB2;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s SB2", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_T10CRC) {
 			fwt ^=ISP2400_FW_ATTR_T10CRC;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s T10CRC", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_VI) {
 			fwt ^=ISP2400_FW_ATTR_VI;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VI", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_MQ) {
 			fwt ^=ISP2400_FW_ATTR_MQ;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s MQ", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_MSIX) {
 			fwt ^=ISP2400_FW_ATTR_MSIX;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s MSIX", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_FCOE) {
 			fwt ^=ISP2400_FW_ATTR_FCOE;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s FCOE", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_VP0) {
 			fwt ^= ISP2400_FW_ATTR_VP0;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VP0_Decoupling", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_EXPFW) {
 			fwt ^= ISP2400_FW_ATTR_EXPFW;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s (Experimental)", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_HOTFW) {
 			fwt ^= ISP2400_FW_ATTR_HOTFW;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s HotFW", buf);
 		}
 		fwt &= ~ISP2400_FW_ATTR_EXTNDED;
 		if (fwt & ISP2400_FW_ATTR_EXTVP) {
 			fwt ^= ISP2400_FW_ATTR_EXTVP;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s ExtVP", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_VN2VN) {
 			fwt ^= ISP2400_FW_ATTR_VN2VN;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VN2VN", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_EXMOFF) {
 			fwt ^= ISP2400_FW_ATTR_EXMOFF;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s EXMOFF", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_NPMOFF) {
 			fwt ^= ISP2400_FW_ATTR_NPMOFF;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s NPMOFF", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_DIFCHOP) {
 			fwt ^= ISP2400_FW_ATTR_DIFCHOP;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s DIFCHOP", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_SRIOV) {
 			fwt ^= ISP2400_FW_ATTR_SRIOV;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s SRIOV", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_ASICTMP) {
 			fwt ^= ISP2400_FW_ATTR_ASICTMP;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s ASICTMP", buf);
 		}
 		if (fwt & ISP2400_FW_ATTR_ATIOMQ) {
 			fwt ^= ISP2400_FW_ATTR_ATIOMQ;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s ATIOMQ", buf);
 		}
 		if (fwt) {
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s (unknown 0x%08x%08x)", buf,
 			    (uint32_t) (fwt >> 32), (uint32_t) fwt);
 		}
 		isp_prt(isp, ISP_LOGCONFIG, "%s", buf);
 	} else if (IS_FC(isp)) {
 		buf = FCPARAM(isp, 0)->isp_scratch;
 		ISP_SNPRINTF(buf, ISP_FC_SCRLEN, "Attributes:");
 		if (fwt & ISP_FW_ATTR_TMODE) {
 			fwt ^=ISP_FW_ATTR_TMODE;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s TargetMode", buf);
 		}
 		if (fwt & ISP_FW_ATTR_SCCLUN) {
 			fwt ^=ISP_FW_ATTR_SCCLUN;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s SCC-Lun", buf);
 		}
 		if (fwt & ISP_FW_ATTR_FABRIC) {
 			fwt ^=ISP_FW_ATTR_FABRIC;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s Fabric", buf);
 		}
 		if (fwt & ISP_FW_ATTR_CLASS2) {
 			fwt ^=ISP_FW_ATTR_CLASS2;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s Class2", buf);
 		}
 		if (fwt & ISP_FW_ATTR_FCTAPE) {
 			fwt ^=ISP_FW_ATTR_FCTAPE;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s FC-Tape", buf);
 		}
 		if (fwt & ISP_FW_ATTR_IP) {
 			fwt ^=ISP_FW_ATTR_IP;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s IP", buf);
 		}
 		if (fwt & ISP_FW_ATTR_VI) {
 			fwt ^=ISP_FW_ATTR_VI;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VI", buf);
 		}
 		if (fwt & ISP_FW_ATTR_VI_SOLARIS) {
 			fwt ^=ISP_FW_ATTR_VI_SOLARIS;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VI_SOLARIS", buf);
 		}
 		if (fwt & ISP_FW_ATTR_2KLOGINS) {
 			fwt ^=ISP_FW_ATTR_2KLOGINS;
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s 2K-Login", buf);
 		}
 		if (fwt != 0) {
 			ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s (unknown 0x%08x%08x)", buf,
 			    (uint32_t) (fwt >> 32), (uint32_t) fwt);
 		}
 		isp_prt(isp, ISP_LOGCONFIG, "%s", buf);
 	}
 
 	if (IS_24XX(isp)) {
 		MBSINIT(&mbs, MBOX_GET_RESOURCE_COUNT, MBLOGALL, 0);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			ISP_RESET0(isp);
 			return;
 		}
 		if (isp->isp_maxcmds >= mbs.param[3]) {
 			isp->isp_maxcmds = mbs.param[3];
 		}
 	} else {
 		MBSINIT(&mbs, MBOX_GET_FIRMWARE_STATUS, MBLOGALL, 0);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			ISP_RESET0(isp);
 			return;
 		}
 		if (isp->isp_maxcmds >= mbs.param[2]) {
 			isp->isp_maxcmds = mbs.param[2];
 		}
 	}
 	isp_prt(isp, ISP_LOGCONFIG, "%d max I/O command limit set", isp->isp_maxcmds);
 
 	/*
 	 * If we don't have Multi-ID f/w loaded, we need to restrict channels to one.
 	 * Only make this check for non-SCSI cards (I'm not sure firmware attributes
 	 * work for them).
 	 */
 	if (IS_FC(isp) && isp->isp_nchan > 1) {
 		if (!ISP_CAP_MULTI_ID(isp)) {
 			isp_prt(isp, ISP_LOGWARN, "non-MULTIID f/w loaded, "
 			    "only can enable 1 of %d channels", isp->isp_nchan);
 			isp->isp_nchan = 1;
 		} else if (!ISP_CAP_VP0(isp)) {
 			isp_prt(isp, ISP_LOGWARN, "We can not use MULTIID "
 			    "feature properly without VP0_Decoupling");
 			isp->isp_nchan = 1;
 		}
 	}
 	for (i = 0; i < isp->isp_nchan; i++) {
 		isp_fw_state(isp, i);
 	}
 	if (isp->isp_dead) {
 		isp_shutdown(isp);
 		ISP_DISABLE_INTS(isp);
 		return;
 	}
 
 	isp->isp_state = ISP_RESETSTATE;
 
 	/*
 	 * Okay- now that we have new firmware running, we now (re)set our
 	 * notion of how many luns we support. This is somewhat tricky because
 	 * if we haven't loaded firmware, we sometimes do not have an easy way
 	 * of knowing how many luns we support.
 	 *
 	 * Expanded lun firmware gives you 32 luns for SCSI cards and
 	 * 16384 luns for Fibre Channel cards.
 	 *
 	 * It turns out that even for QLogic 2100s with ROM 1.10 and above
 	 * we do get a firmware attributes word returned in mailbox register 6.
 	 *
 	 * Because the lun is in a different position in the Request Queue
 	 * Entry structure for Fibre Channel with expanded lun firmware, we
 	 * can only support one lun (lun zero) when we don't know what kind
 	 * of firmware we're running.
 	 */
 	if (IS_SCSI(isp)) {
 		if (dodnld) {
 			if (IS_ULTRA2(isp) || IS_ULTRA3(isp)) {
 				isp->isp_maxluns = 32;
 			} else {
 				isp->isp_maxluns = 8;
 			}
 		} else {
 			isp->isp_maxluns = 8;
 		}
 	} else {
 		if (ISP_CAP_SCCFW(isp)) {
 			isp->isp_maxluns = 0;	/* No limit -- 2/8 bytes */
 		} else {
 			isp->isp_maxluns = 16;
 		}
 	}
 
 	/*
 	 * We get some default values established. As a side
 	 * effect, NVRAM is read here (unless overriden by
 	 * a configuration flag).
 	 */
 	if (do_load_defaults) {
 		if (IS_SCSI(isp)) {
 			isp_setdfltsdparm(isp);
 		} else {
 			for (i = 0; i < isp->isp_nchan; i++) {
 				isp_setdfltfcparm(isp, i);
 			}
 		}
 	}
 }
 
 /*
  * Initialize Parameters of Hardware to a known state.
  *
  * Locks are held before coming here.
  */
 
 void
 isp_init(ispsoftc_t *isp)
 {
 	if (IS_FC(isp)) {
 		if (IS_24XX(isp)) {
 			isp_fibre_init_2400(isp);
 		} else {
 			isp_fibre_init(isp);
 		}
 	} else {
 		isp_scsi_init(isp);
 	}
 	GET_NANOTIME(&isp->isp_init_time);
 }
 
 static void
 isp_scsi_init(ispsoftc_t *isp)
 {
 	sdparam *sdp_chan0, *sdp_chan1;
 	mbreg_t mbs;
 
 	isp->isp_state = ISP_INITSTATE;
 
 	sdp_chan0 = SDPARAM(isp, 0);
 	sdp_chan1 = sdp_chan0;
 	if (IS_DUALBUS(isp)) {
 		sdp_chan1 = SDPARAM(isp, 1);
 	}
 
 	/* First do overall per-card settings. */
 
 	/*
 	 * If we have fast memory timing enabled, turn it on.
 	 */
 	if (sdp_chan0->isp_fast_mttr) {
 		ISP_WRITE(isp, RISC_MTR, 0x1313);
 	}
 
 	/*
 	 * Set Retry Delay and Count.
 	 * You set both channels at the same time.
 	 */
 	MBSINIT(&mbs, MBOX_SET_RETRY_COUNT, MBLOGALL, 0);
 	mbs.param[1] = sdp_chan0->isp_retry_count;
 	mbs.param[2] = sdp_chan0->isp_retry_delay;
 	mbs.param[6] = sdp_chan1->isp_retry_count;
 	mbs.param[7] = sdp_chan1->isp_retry_delay;
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return;
 	}
 
 	/*
 	 * Set ASYNC DATA SETUP time. This is very important.
 	 */
 	MBSINIT(&mbs, MBOX_SET_ASYNC_DATA_SETUP_TIME, MBLOGALL, 0);
 	mbs.param[1] = sdp_chan0->isp_async_data_setup;
 	mbs.param[2] = sdp_chan1->isp_async_data_setup;
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return;
 	}
 
 	/*
 	 * Set ACTIVE Negation State.
 	 */
 	MBSINIT(&mbs, MBOX_SET_ACT_NEG_STATE, MBLOGNONE, 0);
 	mbs.param[1] =
 	    (sdp_chan0->isp_req_ack_active_neg << 4) |
 	    (sdp_chan0->isp_data_line_active_neg << 5);
 	mbs.param[2] =
 	    (sdp_chan1->isp_req_ack_active_neg << 4) |
 	    (sdp_chan1->isp_data_line_active_neg << 5);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		isp_prt(isp, ISP_LOGERR,
 		    "failed to set active negation state (%d,%d), (%d,%d)",
 		    sdp_chan0->isp_req_ack_active_neg,
 		    sdp_chan0->isp_data_line_active_neg,
 		    sdp_chan1->isp_req_ack_active_neg,
 		    sdp_chan1->isp_data_line_active_neg);
 		/*
 		 * But don't return.
 		 */
 	}
 
 	/*
 	 * Set the Tag Aging limit
 	 */
 	MBSINIT(&mbs, MBOX_SET_TAG_AGE_LIMIT, MBLOGALL, 0);
 	mbs.param[1] = sdp_chan0->isp_tag_aging;
 	mbs.param[2] = sdp_chan1->isp_tag_aging;
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		isp_prt(isp, ISP_LOGERR, "failed to set tag age limit (%d,%d)",
 		    sdp_chan0->isp_tag_aging, sdp_chan1->isp_tag_aging);
 		return;
 	}
 
 	/*
 	 * Set selection timeout.
 	 */
 	MBSINIT(&mbs, MBOX_SET_SELECT_TIMEOUT, MBLOGALL, 0);
 	mbs.param[1] = sdp_chan0->isp_selection_timeout;
 	mbs.param[2] = sdp_chan1->isp_selection_timeout;
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return;
 	}
 
 	/* now do per-channel settings */
 	isp_scsi_channel_init(isp, 0);
 	if (IS_DUALBUS(isp))
 		isp_scsi_channel_init(isp, 1);
 
 	/*
 	 * Now enable request/response queues
 	 */
 
 	if (IS_ULTRA2(isp) || IS_1240(isp)) {
 		MBSINIT(&mbs, MBOX_INIT_RES_QUEUE_A64, MBLOGALL, 0);
 		mbs.param[1] = RESULT_QUEUE_LEN(isp);
 		mbs.param[2] = DMA_WD1(isp->isp_result_dma);
 		mbs.param[3] = DMA_WD0(isp->isp_result_dma);
 		mbs.param[4] = 0;
 		mbs.param[6] = DMA_WD3(isp->isp_result_dma);
 		mbs.param[7] = DMA_WD2(isp->isp_result_dma);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			return;
 		}
 		isp->isp_residx = isp->isp_resodx = mbs.param[5];
 
 		MBSINIT(&mbs, MBOX_INIT_REQ_QUEUE_A64, MBLOGALL, 0);
 		mbs.param[1] = RQUEST_QUEUE_LEN(isp);
 		mbs.param[2] = DMA_WD1(isp->isp_rquest_dma);
 		mbs.param[3] = DMA_WD0(isp->isp_rquest_dma);
 		mbs.param[5] = 0;
 		mbs.param[6] = DMA_WD3(isp->isp_result_dma);
 		mbs.param[7] = DMA_WD2(isp->isp_result_dma);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			return;
 		}
 		isp->isp_reqidx = isp->isp_reqodx = mbs.param[4];
 	} else {
 		MBSINIT(&mbs, MBOX_INIT_RES_QUEUE, MBLOGALL, 0);
 		mbs.param[1] = RESULT_QUEUE_LEN(isp);
 		mbs.param[2] = DMA_WD1(isp->isp_result_dma);
 		mbs.param[3] = DMA_WD0(isp->isp_result_dma);
 		mbs.param[4] = 0;
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			return;
 		}
 		isp->isp_residx = isp->isp_resodx = mbs.param[5];
 
 		MBSINIT(&mbs, MBOX_INIT_REQ_QUEUE, MBLOGALL, 0);
 		mbs.param[1] = RQUEST_QUEUE_LEN(isp);
 		mbs.param[2] = DMA_WD1(isp->isp_rquest_dma);
 		mbs.param[3] = DMA_WD0(isp->isp_rquest_dma);
 		mbs.param[5] = 0;
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			return;
 		}
 		isp->isp_reqidx = isp->isp_reqodx = mbs.param[4];
 	}
 
 	/*
 	 * Turn on LVD transitions for ULTRA2 or better and other features
 	 *
 	 * Now that we have 32 bit handles, don't do any fast posting
 	 * any more. For Ultra2/Ultra3 cards, we can turn on 32 bit RIO
 	 * operation or use fast posting. To be conservative, we'll only
 	 * do this for Ultra3 cards now because the other cards are so
 	 * rare for this author to find and test with.
 	 */
 
 	MBSINIT(&mbs, MBOX_SET_FW_FEATURES, MBLOGALL, 0);
 	if (IS_ULTRA2(isp))
 		mbs.param[1] |= FW_FEATURE_LVD_NOTIFY;
 #ifdef	ISP_NO_RIO
 	if (IS_ULTRA3(isp))
 		mbs.param[1] |= FW_FEATURE_FAST_POST;
 #else
 	if (IS_ULTRA3(isp))
 		mbs.param[1] |= FW_FEATURE_RIO_32BIT;
 #endif
 	if (mbs.param[1] != 0) {
 		uint16_t sfeat = mbs.param[1];
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] == MBOX_COMMAND_COMPLETE) {
 			isp_prt(isp, ISP_LOGINFO,
 			    "Enabled FW features (0x%x)", sfeat);
 		}
 	}
 
 	isp->isp_state = ISP_RUNSTATE;
 }
 
 static void
 isp_scsi_channel_init(ispsoftc_t *isp, int chan)
 {
 	sdparam *sdp;
 	mbreg_t mbs;
 	int tgt;
 
 	sdp = SDPARAM(isp, chan);
 
 	/*
 	 * Set (possibly new) Initiator ID.
 	 */
 	MBSINIT(&mbs, MBOX_SET_INIT_SCSI_ID, MBLOGALL, 0);
 	mbs.param[1] = (chan << 7) | sdp->isp_initiator_id;
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return;
 	}
 	isp_prt(isp, ISP_LOGINFO, "Chan %d Initiator ID is %d",
 	    chan, sdp->isp_initiator_id);
 
 
 	/*
 	 * Set current per-target parameters to an initial safe minimum.
 	 */
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		int lun;
 		uint16_t sdf;
 
 		if (sdp->isp_devparam[tgt].dev_enable == 0) {
 			continue;
 		}
 #ifndef	ISP_TARGET_MODE
 		sdf = sdp->isp_devparam[tgt].goal_flags;
 		sdf &= DPARM_SAFE_DFLT;
 		/*
 		 * It is not quite clear when this changed over so that
 		 * we could force narrow and async for 1000/1020 cards,
 		 * but assume that this is only the case for loaded
 		 * firmware.
 		 */
 		if (isp->isp_loaded_fw) {
 			sdf |= DPARM_NARROW | DPARM_ASYNC;
 		}
 #else
 		/*
 		 * The !$*!)$!$)* f/w uses the same index into some
 		 * internal table to decide how to respond to negotiations,
 		 * so if we've said "let's be safe" for ID X, and ID X
 		 * selects *us*, the negotiations will back to 'safe'
 		 * (as in narrow/async). What the f/w *should* do is
 		 * use the initiator id settings to decide how to respond.
 		 */
 		sdp->isp_devparam[tgt].goal_flags = sdf = DPARM_DEFAULT;
 #endif
 		MBSINIT(&mbs, MBOX_SET_TARGET_PARAMS, MBLOGNONE, 0);
 		mbs.param[1] = (chan << 15) | (tgt << 8);
 		mbs.param[2] = sdf;
 		if ((sdf & DPARM_SYNC) == 0) {
 			mbs.param[3] = 0;
 		} else {
 			mbs.param[3] =
 			    (sdp->isp_devparam[tgt].goal_offset << 8) |
 			    (sdp->isp_devparam[tgt].goal_period);
 		}
 		isp_prt(isp, ISP_LOGDEBUG0, "Initial Settings bus%d tgt%d flags 0x%x off 0x%x per 0x%x",
 		    chan, tgt, mbs.param[2], mbs.param[3] >> 8, mbs.param[3] & 0xff);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			sdf = DPARM_SAFE_DFLT;
 			MBSINIT(&mbs, MBOX_SET_TARGET_PARAMS, MBLOGALL, 0);
 			mbs.param[1] = (tgt << 8) | (chan << 15);
 			mbs.param[2] = sdf;
 			mbs.param[3] = 0;
 			isp_mboxcmd(isp, &mbs);
 			if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 				continue;
 			}
 		}
 
 		/*
 		 * We don't update any information directly from the f/w
 		 * because we need to run at least one command to cause a
 		 * new state to be latched up. So, we just assume that we
 		 * converge to the values we just had set.
 		 *
 		 * Ensure that we don't believe tagged queuing is enabled yet.
 		 * It turns out that sometimes the ISP just ignores our
 		 * attempts to set parameters for devices that it hasn't
 		 * seen yet.
 		 */
 		sdp->isp_devparam[tgt].actv_flags = sdf & ~DPARM_TQING;
 		for (lun = 0; lun < (int) isp->isp_maxluns; lun++) {
 			MBSINIT(&mbs, MBOX_SET_DEV_QUEUE_PARAMS, MBLOGALL, 0);
 			mbs.param[1] = (chan << 15) | (tgt << 8) | lun;
 			mbs.param[2] = sdp->isp_max_queue_depth;
 			mbs.param[3] = sdp->isp_devparam[tgt].exc_throttle;
 			isp_mboxcmd(isp, &mbs);
 			if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 				break;
 			}
 		}
 	}
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		if (sdp->isp_devparam[tgt].dev_refresh) {
 			sdp->sendmarker = 1;
 			sdp->update = 1;
 			break;
 		}
 	}
 }
 
 /*
  * Fibre Channel specific initialization.
  */
 static void
 isp_fibre_init(ispsoftc_t *isp)
 {
 	fcparam *fcp;
 	isp_icb_t local, *icbp = &local;
 	mbreg_t mbs;
 
 	/*
 	 * We only support one channel on non-24XX cards
 	 */
 	fcp = FCPARAM(isp, 0);
 	if (fcp->role == ISP_ROLE_NONE)
 		return;
 
 	isp->isp_state = ISP_INITSTATE;
 	ISP_MEMZERO(icbp, sizeof (*icbp));
 	icbp->icb_version = ICB_VERSION1;
 	icbp->icb_fwoptions = fcp->isp_fwoptions;
 
 	/*
 	 * Firmware Options are either retrieved from NVRAM or
 	 * are patched elsewhere. We check them for sanity here
 	 * and make changes based on board revision, but otherwise
 	 * let others decide policy.
 	 */
 
 	/*
 	 * If this is a 2100 < revision 5, we have to turn off FAIRNESS.
 	 */
 	if (IS_2100(isp) && isp->isp_revision < 5) {
 		icbp->icb_fwoptions &= ~ICBOPT_FAIRNESS;
 	}
 
 	/*
 	 * We have to use FULL LOGIN even though it resets the loop too much
 	 * because otherwise port database entries don't get updated after
 	 * a LIP- this is a known f/w bug for 2100 f/w less than 1.17.0.
 	 */
 	if (!ISP_FW_NEWER_THAN(isp, 1, 17, 0)) {
 		icbp->icb_fwoptions |= ICBOPT_FULL_LOGIN;
 	}
 
 	/*
 	 * Insist on Port Database Update Async notifications
 	 */
 	icbp->icb_fwoptions |= ICBOPT_PDBCHANGE_AE;
 
 	/*
 	 * Make sure that target role reflects into fwoptions.
 	 */
 	if (fcp->role & ISP_ROLE_TARGET) {
 		icbp->icb_fwoptions |= ICBOPT_TGT_ENABLE;
 	} else {
 		icbp->icb_fwoptions &= ~ICBOPT_TGT_ENABLE;
 	}
 
 	if (fcp->role & ISP_ROLE_INITIATOR) {
 		icbp->icb_fwoptions &= ~ICBOPT_INI_DISABLE;
 	} else {
 		icbp->icb_fwoptions |= ICBOPT_INI_DISABLE;
 	}
 
 	icbp->icb_maxfrmlen = DEFAULT_FRAMESIZE(isp);
 	if (icbp->icb_maxfrmlen < ICB_MIN_FRMLEN || icbp->icb_maxfrmlen > ICB_MAX_FRMLEN) {
 		isp_prt(isp, ISP_LOGERR, "bad frame length (%d) from NVRAM- using %d", DEFAULT_FRAMESIZE(isp), ICB_DFLT_FRMLEN);
 		icbp->icb_maxfrmlen = ICB_DFLT_FRMLEN;
 	}
 	icbp->icb_maxalloc = fcp->isp_maxalloc;
 	if (icbp->icb_maxalloc < 1) {
 		isp_prt(isp, ISP_LOGERR, "bad maximum allocation (%d)- using 16", fcp->isp_maxalloc);
 		icbp->icb_maxalloc = 16;
 	}
 	icbp->icb_execthrottle = DEFAULT_EXEC_THROTTLE(isp);
 	if (icbp->icb_execthrottle < 1) {
 		isp_prt(isp, ISP_LOGERR, "bad execution throttle of %d- using %d", DEFAULT_EXEC_THROTTLE(isp), ICB_DFLT_THROTTLE);
 		icbp->icb_execthrottle = ICB_DFLT_THROTTLE;
 	}
 	icbp->icb_retry_delay = fcp->isp_retry_delay;
 	icbp->icb_retry_count = fcp->isp_retry_count;
 	if (fcp->isp_loopid < LOCAL_LOOP_LIM) {
 		icbp->icb_hardaddr = fcp->isp_loopid;
 		if (isp->isp_confopts & ISP_CFG_OWNLOOPID)
 			icbp->icb_fwoptions |= ICBOPT_HARD_ADDRESS;
 		else
 			icbp->icb_fwoptions |= ICBOPT_PREV_ADDRESS;
 	}
 
 	/*
 	 * Right now we just set extended options to prefer point-to-point
 	 * over loop based upon some soft config options.
 	 *
 	 * NB: for the 2300, ICBOPT_EXTENDED is required.
 	 */
 	if (IS_2100(isp)) {
 		/*
 		 * We can't have Fast Posting any more- we now
 		 * have 32 bit handles.
 		 */
 		icbp->icb_fwoptions &= ~ICBOPT_FAST_POST;
 	} else if (IS_2200(isp) || IS_23XX(isp)) {
 		icbp->icb_fwoptions |= ICBOPT_EXTENDED;
 
 		icbp->icb_xfwoptions = fcp->isp_xfwoptions;
 
 		if (ISP_CAP_FCTAPE(isp)) {
 			if (isp->isp_confopts & ISP_CFG_NOFCTAPE)
 				icbp->icb_xfwoptions &= ~ICBXOPT_FCTAPE;
 
 			if (isp->isp_confopts & ISP_CFG_FCTAPE)
 				icbp->icb_xfwoptions |= ICBXOPT_FCTAPE;
 
 			if (icbp->icb_xfwoptions & ICBXOPT_FCTAPE) {
 				icbp->icb_fwoptions &= ~ICBOPT_FULL_LOGIN;	/* per documents */
 				icbp->icb_xfwoptions |= ICBXOPT_FCTAPE_CCQ|ICBXOPT_FCTAPE_CONFIRM;
 				FCPARAM(isp, 0)->fctape_enabled = 1;
 			} else {
 				FCPARAM(isp, 0)->fctape_enabled = 0;
 			}
 		} else {
 			icbp->icb_xfwoptions &= ~ICBXOPT_FCTAPE;
 			FCPARAM(isp, 0)->fctape_enabled = 0;
 		}
 
 		/*
 		 * Prefer or force Point-To-Point instead Loop?
 		 */
 		switch (isp->isp_confopts & ISP_CFG_PORT_PREF) {
 		case ISP_CFG_NPORT:
 			icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK;
 			icbp->icb_xfwoptions |= ICBXOPT_PTP_2_LOOP;
 			break;
 		case ISP_CFG_NPORT_ONLY:
 			icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK;
 			icbp->icb_xfwoptions |= ICBXOPT_PTP_ONLY;
 			break;
 		case ISP_CFG_LPORT_ONLY:
 			icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK;
 			icbp->icb_xfwoptions |= ICBXOPT_LOOP_ONLY;
 			break;
 		default:
 			/*
 			 * Let NVRAM settings define it if they are sane
 			 */
 			switch (icbp->icb_xfwoptions & ICBXOPT_TOPO_MASK) {
 			case ICBXOPT_PTP_2_LOOP:
 			case ICBXOPT_PTP_ONLY:
 			case ICBXOPT_LOOP_ONLY:
 			case ICBXOPT_LOOP_2_PTP:
 				break;
 			default:
 				icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK;
 				icbp->icb_xfwoptions |= ICBXOPT_LOOP_2_PTP;
 			}
 			break;
 		}
 		if (IS_2200(isp)) {
 			/*
 			 * We can't have Fast Posting any more- we now
 			 * have 32 bit handles.
 			 *
 			 * RIO seemed to have to much breakage.
 			 *
 			 * Just opt for safety.
 			 */
 			icbp->icb_xfwoptions &= ~ICBXOPT_RIO_16BIT;
 			icbp->icb_fwoptions &= ~ICBOPT_FAST_POST;
 		} else {
 			/*
 			 * QLogic recommends that FAST Posting be turned
 			 * off for 23XX cards and instead allow the HBA
 			 * to write response queue entries and interrupt
 			 * after a delay (ZIO).
 			 */
 			icbp->icb_fwoptions &= ~ICBOPT_FAST_POST;
 			if ((fcp->isp_xfwoptions & ICBXOPT_TIMER_MASK) == ICBXOPT_ZIO) {
 				icbp->icb_xfwoptions |= ICBXOPT_ZIO;
 				icbp->icb_idelaytimer = 10;
 			}
 			icbp->icb_zfwoptions = fcp->isp_zfwoptions;
 			if (isp->isp_confopts & ISP_CFG_ONEGB) {
 				icbp->icb_zfwoptions &= ~ICBZOPT_RATE_MASK;
 				icbp->icb_zfwoptions |= ICBZOPT_RATE_ONEGB;
 			} else if (isp->isp_confopts & ISP_CFG_TWOGB) {
 				icbp->icb_zfwoptions &= ~ICBZOPT_RATE_MASK;
 				icbp->icb_zfwoptions |= ICBZOPT_RATE_TWOGB;
 			} else {
 				switch (icbp->icb_zfwoptions & ICBZOPT_RATE_MASK) {
 				case ICBZOPT_RATE_ONEGB:
 				case ICBZOPT_RATE_TWOGB:
 				case ICBZOPT_RATE_AUTO:
 					break;
 				default:
 					icbp->icb_zfwoptions &= ~ICBZOPT_RATE_MASK;
 					icbp->icb_zfwoptions |= ICBZOPT_RATE_AUTO;
 					break;
 				}
 			}
 		}
 	}
 
 
 	/*
 	 * For 22XX > 2.1.26 && 23XX, set some options.
 	 */
 	if (ISP_FW_NEWER_THAN(isp, 2, 26, 0)) {
 		MBSINIT(&mbs, MBOX_SET_FIRMWARE_OPTIONS, MBLOGALL, 0);
 		mbs.param[1] = IFCOPT1_DISF7SWTCH|IFCOPT1_LIPASYNC|IFCOPT1_LIPF8;
 		mbs.param[2] = 0;
 		mbs.param[3] = 0;
 		if (ISP_FW_NEWER_THAN(isp, 3, 16, 0)) {
 			mbs.param[1] |= IFCOPT1_EQFQASYNC|IFCOPT1_CTIO_RETRY;
 			if (fcp->role & ISP_ROLE_TARGET) {
 				if (ISP_FW_NEWER_THAN(isp, 3, 25, 0)) {
 					mbs.param[1] |= IFCOPT1_ENAPURE;
 				}
 				mbs.param[3] = IFCOPT3_NOPRLI;
 			}
 		}
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			return;
 		}
 	}
 	icbp->icb_logintime = ICB_LOGIN_TOV;
 
 #ifdef	ISP_TARGET_MODE
 	if (IS_23XX(isp) && (icbp->icb_fwoptions & ICBOPT_TGT_ENABLE)) {
 		icbp->icb_lunenables = 0xffff;
 		icbp->icb_ccnt = DFLT_CMND_CNT;
 		icbp->icb_icnt = DFLT_INOT_CNT;
 		icbp->icb_lunetimeout = ICB_LUN_ENABLE_TOV;
 	}
 #endif
 	if (fcp->isp_wwnn && fcp->isp_wwpn) {
 		icbp->icb_fwoptions |= ICBOPT_BOTH_WWNS;
 		MAKE_NODE_NAME_FROM_WWN(icbp->icb_nodename, fcp->isp_wwnn);
 		MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn);
 		isp_prt(isp, ISP_LOGDEBUG1,
 		    "Setting ICB Node 0x%08x%08x Port 0x%08x%08x",
 		    ((uint32_t) (fcp->isp_wwnn >> 32)),
 		    ((uint32_t) (fcp->isp_wwnn)),
 		    ((uint32_t) (fcp->isp_wwpn >> 32)),
 		    ((uint32_t) (fcp->isp_wwpn)));
 	} else if (fcp->isp_wwpn) {
 		icbp->icb_fwoptions &= ~ICBOPT_BOTH_WWNS;
 		MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn);
 		isp_prt(isp, ISP_LOGDEBUG1,
 		    "Setting ICB Port 0x%08x%08x",
 		    ((uint32_t) (fcp->isp_wwpn >> 32)),
 		    ((uint32_t) (fcp->isp_wwpn)));
 	} else {
 		isp_prt(isp, ISP_LOGERR, "No valid WWNs to use");
 		return;
 	}
 	icbp->icb_rqstqlen = RQUEST_QUEUE_LEN(isp);
 	if (icbp->icb_rqstqlen < 1) {
 		isp_prt(isp, ISP_LOGERR, "bad request queue length");
 	}
 	icbp->icb_rsltqlen = RESULT_QUEUE_LEN(isp);
 	if (icbp->icb_rsltqlen < 1) {
 		isp_prt(isp, ISP_LOGERR, "bad result queue length");
 	}
 	icbp->icb_rqstaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_rquest_dma);
 	icbp->icb_rqstaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_rquest_dma);
 	icbp->icb_rqstaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_rquest_dma);
 	icbp->icb_rqstaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_rquest_dma);
 	icbp->icb_respaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_result_dma);
 	icbp->icb_respaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_result_dma);
 	icbp->icb_respaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_result_dma);
 	icbp->icb_respaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_result_dma);
 
 	if (FC_SCRATCH_ACQUIRE(isp, 0)) {
 		isp_prt(isp, ISP_LOGERR, sacq);
 		return;
 	}
 	isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init: fwopt 0x%x xfwopt 0x%x zfwopt 0x%x",
 	    icbp->icb_fwoptions, icbp->icb_xfwoptions, icbp->icb_zfwoptions);
 
 	isp_put_icb(isp, icbp, (isp_icb_t *)fcp->isp_scratch);
 
 	/*
 	 * Init the firmware
 	 */
 	MBSINIT(&mbs, MBOX_INIT_FIRMWARE, MBLOGALL, 30000000);
 	mbs.param[1] = 0;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	mbs.logval = MBLOGALL;
 	isp_prt(isp, ISP_LOGDEBUG0, "INIT F/W from %p (%08x%08x)",
 	    fcp->isp_scratch, (uint32_t) ((uint64_t)fcp->isp_scdma >> 32),
 	    (uint32_t) fcp->isp_scdma);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof (*icbp), 0);
 	isp_mboxcmd(isp, &mbs);
 	FC_SCRATCH_RELEASE(isp, 0);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		isp_print_bytes(isp, "isp_fibre_init", sizeof (*icbp), icbp);
 		return;
 	}
 	isp->isp_reqidx = 0;
 	isp->isp_reqodx = 0;
 	isp->isp_residx = 0;
 	isp->isp_resodx = 0;
 
 	/*
 	 * Whatever happens, we're now committed to being here.
 	 */
 	isp->isp_state = ISP_RUNSTATE;
 }
 
 static void
 isp_fibre_init_2400(ispsoftc_t *isp)
 {
 	fcparam *fcp;
 	isp_icb_2400_t local, *icbp = &local;
 	mbreg_t mbs;
 	int chan;
 
 	/*
 	 * Check to see whether all channels have *some* kind of role
 	 */
 	for (chan = 0; chan < isp->isp_nchan; chan++) {
 		fcp = FCPARAM(isp, chan);
 		if (fcp->role != ISP_ROLE_NONE) {
 			break;
 		}
 	}
 	if (chan == isp->isp_nchan) {
 		isp_prt(isp, ISP_LOG_WARN1, "all %d channels with role 'none'", chan);
 		return;
 	}
 
 	isp->isp_state = ISP_INITSTATE;
 
 	/*
 	 * Start with channel 0.
 	 */
 	fcp = FCPARAM(isp, 0);
 
 	/*
 	 * Turn on LIP F8 async event (1)
 	 */
 	MBSINIT(&mbs, MBOX_SET_FIRMWARE_OPTIONS, MBLOGALL, 0);
 	mbs.param[1] = 1;
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return;
 	}
 
 	ISP_MEMZERO(icbp, sizeof (*icbp));
 	icbp->icb_fwoptions1 = fcp->isp_fwoptions;
 	icbp->icb_fwoptions2 = fcp->isp_xfwoptions;
 	icbp->icb_fwoptions3 = fcp->isp_zfwoptions;
 	if (isp->isp_nchan > 1 && ISP_CAP_VP0(isp)) {
 		icbp->icb_fwoptions1 &= ~ICB2400_OPT1_INI_DISABLE;
 		icbp->icb_fwoptions1 |= ICB2400_OPT1_TGT_ENABLE;
 	} else {
 		if (fcp->role & ISP_ROLE_TARGET)
 			icbp->icb_fwoptions1 |= ICB2400_OPT1_TGT_ENABLE;
 		else
 			icbp->icb_fwoptions1 &= ~ICB2400_OPT1_TGT_ENABLE;
 		if (fcp->role & ISP_ROLE_INITIATOR)
 			icbp->icb_fwoptions1 &= ~ICB2400_OPT1_INI_DISABLE;
 		else
 			icbp->icb_fwoptions1 |= ICB2400_OPT1_INI_DISABLE;
 	}
 
 	icbp->icb_version = ICB_VERSION1;
 	icbp->icb_maxfrmlen = DEFAULT_FRAMESIZE(isp);
 	if (icbp->icb_maxfrmlen < ICB_MIN_FRMLEN || icbp->icb_maxfrmlen > ICB_MAX_FRMLEN) {
 		isp_prt(isp, ISP_LOGERR, "bad frame length (%d) from NVRAM- using %d", DEFAULT_FRAMESIZE(isp), ICB_DFLT_FRMLEN);
 		icbp->icb_maxfrmlen = ICB_DFLT_FRMLEN;
 	}
 
 	icbp->icb_execthrottle = DEFAULT_EXEC_THROTTLE(isp);
 	if (icbp->icb_execthrottle < 1) {
 		isp_prt(isp, ISP_LOGERR, "bad execution throttle of %d- using %d", DEFAULT_EXEC_THROTTLE(isp), ICB_DFLT_THROTTLE);
 		icbp->icb_execthrottle = ICB_DFLT_THROTTLE;
 	}
 
 	/*
 	 * Set target exchange count. Take half if we are supporting both roles.
 	 */
 	if (icbp->icb_fwoptions1 & ICB2400_OPT1_TGT_ENABLE) {
 		icbp->icb_xchgcnt = isp->isp_maxcmds;
 		if ((icbp->icb_fwoptions1 & ICB2400_OPT1_INI_DISABLE) == 0)
 			icbp->icb_xchgcnt >>= 1;
 	}
 
 	if (fcp->isp_loopid < LOCAL_LOOP_LIM) {
 		icbp->icb_hardaddr = fcp->isp_loopid;
 		if (isp->isp_confopts & ISP_CFG_OWNLOOPID)
 			icbp->icb_fwoptions1 |= ICB2400_OPT1_HARD_ADDRESS;
 		else
 			icbp->icb_fwoptions1 |= ICB2400_OPT1_PREV_ADDRESS;
 	}
 
 	if (isp->isp_confopts & ISP_CFG_NOFCTAPE) {
 		icbp->icb_fwoptions2 &= ~ICB2400_OPT2_FCTAPE;
 	}
 	if (isp->isp_confopts & ISP_CFG_FCTAPE) {
 		icbp->icb_fwoptions2 |= ICB2400_OPT2_FCTAPE;
 	}
 
 	for (chan = 0; chan < isp->isp_nchan; chan++) {
 		if (icbp->icb_fwoptions2 & ICB2400_OPT2_FCTAPE)
 			FCPARAM(isp, chan)->fctape_enabled = 1;
 		else
 			FCPARAM(isp, chan)->fctape_enabled = 0;
 	}
 
 	switch (isp->isp_confopts & ISP_CFG_PORT_PREF) {
 	case ISP_CFG_NPORT_ONLY:
 		icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TOPO_MASK;
 		icbp->icb_fwoptions2 |= ICB2400_OPT2_PTP_ONLY;
 		break;
 	case ISP_CFG_LPORT_ONLY:
 		icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TOPO_MASK;
 		icbp->icb_fwoptions2 |= ICB2400_OPT2_LOOP_ONLY;
 		break;
 	default:
 		/* ISP_CFG_PTP_2_LOOP not available in 24XX/25XX */
 		icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TOPO_MASK;
 		icbp->icb_fwoptions2 |= ICB2400_OPT2_LOOP_2_PTP;
 		break;
 	}
 
 	switch (icbp->icb_fwoptions2 & ICB2400_OPT2_TIMER_MASK) {
 	case ICB2400_OPT2_ZIO:
 	case ICB2400_OPT2_ZIO1:
 		icbp->icb_idelaytimer = 0;
 		break;
 	case 0:
 		break;
 	default:
 		isp_prt(isp, ISP_LOGWARN, "bad value %x in fwopt2 timer field", icbp->icb_fwoptions2 & ICB2400_OPT2_TIMER_MASK);
 		icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TIMER_MASK;
 		break;
 	}
 
 	if ((icbp->icb_fwoptions3 & ICB2400_OPT3_RSPSZ_MASK) == 0) {
 		icbp->icb_fwoptions3 |= ICB2400_OPT3_RSPSZ_24;
 	}
 	icbp->icb_fwoptions3 &= ~ICB2400_OPT3_RATE_AUTO;
 	if (isp->isp_confopts & ISP_CFG_ONEGB) {
 		icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_ONEGB;
 	} else if (isp->isp_confopts & ISP_CFG_TWOGB) {
 		icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_TWOGB;
 	} else if (isp->isp_confopts & ISP_CFG_FOURGB) {
 		icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_FOURGB;
 	} else if (IS_25XX(isp) && (isp->isp_confopts & ISP_CFG_EIGHTGB)) {
 		icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_EIGHTGB;
 	} else {
 		icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_AUTO;
 	}
 	icbp->icb_logintime = ICB_LOGIN_TOV;
 
 	if (fcp->isp_wwnn && fcp->isp_wwpn) {
 		icbp->icb_fwoptions1 |= ICB2400_OPT1_BOTH_WWNS;
 		MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn);
 		MAKE_NODE_NAME_FROM_WWN(icbp->icb_nodename, fcp->isp_wwnn);
 		isp_prt(isp, ISP_LOGDEBUG1, "Setting ICB Node 0x%08x%08x Port 0x%08x%08x", ((uint32_t) (fcp->isp_wwnn >> 32)), ((uint32_t) (fcp->isp_wwnn)),
 		    ((uint32_t) (fcp->isp_wwpn >> 32)), ((uint32_t) (fcp->isp_wwpn)));
 	} else if (fcp->isp_wwpn) {
 		icbp->icb_fwoptions1 &= ~ICB2400_OPT1_BOTH_WWNS;
 		MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn);
 		isp_prt(isp, ISP_LOGDEBUG1, "Setting ICB Node to be same as Port 0x%08x%08x", ((uint32_t) (fcp->isp_wwpn >> 32)), ((uint32_t) (fcp->isp_wwpn)));
 	} else {
 		isp_prt(isp, ISP_LOGERR, "No valid WWNs to use");
 		return;
 	}
 	icbp->icb_retry_count = fcp->isp_retry_count;
 
 	icbp->icb_rqstqlen = RQUEST_QUEUE_LEN(isp);
 	if (icbp->icb_rqstqlen < 8) {
 		isp_prt(isp, ISP_LOGERR, "bad request queue length %d", icbp->icb_rqstqlen);
 		return;
 	}
 	icbp->icb_rsltqlen = RESULT_QUEUE_LEN(isp);
 	if (icbp->icb_rsltqlen < 8) {
 		isp_prt(isp, ISP_LOGERR, "bad result queue length %d",
 		    icbp->icb_rsltqlen);
 		return;
 	}
 	icbp->icb_rqstaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_rquest_dma);
 	icbp->icb_rqstaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_rquest_dma);
 	icbp->icb_rqstaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_rquest_dma);
 	icbp->icb_rqstaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_rquest_dma);
 
 	icbp->icb_respaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_result_dma);
 	icbp->icb_respaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_result_dma);
 	icbp->icb_respaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_result_dma);
 	icbp->icb_respaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_result_dma);
 
 #ifdef	ISP_TARGET_MODE
 	/* unconditionally set up the ATIO queue if we support target mode */
 	icbp->icb_atioqlen = RESULT_QUEUE_LEN(isp);
 	if (icbp->icb_atioqlen < 8) {
 		isp_prt(isp, ISP_LOGERR, "bad ATIO queue length %d", icbp->icb_atioqlen);
 		return;
 	}
 	icbp->icb_atioqaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_atioq_dma);
 	icbp->icb_atioqaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_atioq_dma);
 	icbp->icb_atioqaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_atioq_dma);
 	icbp->icb_atioqaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_atioq_dma);
 	isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init_2400: atioq %04x%04x%04x%04x", DMA_WD3(isp->isp_atioq_dma), DMA_WD2(isp->isp_atioq_dma),
 	    DMA_WD1(isp->isp_atioq_dma), DMA_WD0(isp->isp_atioq_dma));
 #endif
 
 	isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init_2400: fwopt1 0x%x fwopt2 0x%x fwopt3 0x%x", icbp->icb_fwoptions1, icbp->icb_fwoptions2, icbp->icb_fwoptions3);
 
 	isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init_2400: rqst %04x%04x%04x%04x rsp %04x%04x%04x%04x", DMA_WD3(isp->isp_rquest_dma), DMA_WD2(isp->isp_rquest_dma),
 	    DMA_WD1(isp->isp_rquest_dma), DMA_WD0(isp->isp_rquest_dma), DMA_WD3(isp->isp_result_dma), DMA_WD2(isp->isp_result_dma),
 	    DMA_WD1(isp->isp_result_dma), DMA_WD0(isp->isp_result_dma));
 
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "isp_fibre_init_2400", sizeof (*icbp), icbp);
 	}
 
 	if (FC_SCRATCH_ACQUIRE(isp, 0)) {
 		isp_prt(isp, ISP_LOGERR, sacq);
 		return;
 	}
 	ISP_MEMZERO(fcp->isp_scratch, ISP_FC_SCRLEN);
 	isp_put_icb_2400(isp, icbp, fcp->isp_scratch);
 
 	/*
 	 * Now fill in information about any additional channels
 	 */
 	if (isp->isp_nchan > 1) {
 		isp_icb_2400_vpinfo_t vpinfo, *vdst;
 		vp_port_info_t pi, *pdst;
 		size_t amt = 0;
 		uint8_t *off;
 
-		vpinfo.vp_global_options = 0;
+		vpinfo.vp_global_options = ICB2400_VPGOPT_GEN_RIDA;
 		if (ISP_CAP_VP0(isp)) {
 			vpinfo.vp_global_options |= ICB2400_VPGOPT_VP0_DECOUPLE;
 			vpinfo.vp_count = isp->isp_nchan;
 			chan = 0;
 		} else {
 			vpinfo.vp_count = isp->isp_nchan - 1;
 			chan = 1;
 		}
 		off = fcp->isp_scratch;
 		off += ICB2400_VPINFO_OFF;
 		vdst = (isp_icb_2400_vpinfo_t *) off;
 		isp_put_icb_2400_vpinfo(isp, &vpinfo, vdst);
 		amt = ICB2400_VPINFO_OFF + sizeof (isp_icb_2400_vpinfo_t);
 		for (; chan < isp->isp_nchan; chan++) {
 			fcparam *fcp2;
 
 			ISP_MEMZERO(&pi, sizeof (pi));
 			fcp2 = FCPARAM(isp, chan);
 			if (fcp2->role != ISP_ROLE_NONE) {
-				pi.vp_port_options = ICB2400_VPOPT_ENABLED;
+				pi.vp_port_options = ICB2400_VPOPT_ENABLED |
+				    ICB2400_VPOPT_ENA_SNSLOGIN;
 				if (fcp2->role & ISP_ROLE_INITIATOR)
 					pi.vp_port_options |= ICB2400_VPOPT_INI_ENABLE;
 				if ((fcp2->role & ISP_ROLE_TARGET) == 0)
 					pi.vp_port_options |= ICB2400_VPOPT_TGT_DISABLE;
 			}
 			if (fcp2->isp_loopid < LOCAL_LOOP_LIM) {
 				pi.vp_port_loopid = fcp2->isp_loopid;
 				if (isp->isp_confopts & ISP_CFG_OWNLOOPID)
 					pi.vp_port_options |= ICB2400_VPOPT_HARD_ADDRESS;
 				else
 					pi.vp_port_options |= ICB2400_VPOPT_PREV_ADDRESS;
 			}
 			MAKE_NODE_NAME_FROM_WWN(pi.vp_port_portname, fcp2->isp_wwpn);
 			MAKE_NODE_NAME_FROM_WWN(pi.vp_port_nodename, fcp2->isp_wwnn);
 			off = fcp->isp_scratch;
 			if (ISP_CAP_VP0(isp))
 				off += ICB2400_VPINFO_PORT_OFF(chan);
 			else
 				off += ICB2400_VPINFO_PORT_OFF(chan - 1);
 			pdst = (vp_port_info_t *) off;
 			isp_put_vp_port_info(isp, &pi, pdst);
 			amt += ICB2400_VPOPT_WRITE_SIZE;
 		}
 		if (isp->isp_dblev & ISP_LOGDEBUG1) {
 			isp_print_bytes(isp, "isp_fibre_init_2400",
 			    amt - ICB2400_VPINFO_OFF,
 			    (char *)fcp->isp_scratch + ICB2400_VPINFO_OFF);
 		}
 	}
 
 	/*
 	 * Init the firmware
 	 */
 	MBSINIT(&mbs, 0, MBLOGALL, 30000000);
 	if (isp->isp_nchan > 1) {
 		mbs.param[0] = MBOX_INIT_FIRMWARE_MULTI_ID;
 	} else {
 		mbs.param[0] = MBOX_INIT_FIRMWARE;
 	}
 	mbs.param[1] = 0;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	isp_prt(isp, ISP_LOGDEBUG0, "INIT F/W from %04x%04x%04x%04x", DMA_WD3(fcp->isp_scdma), DMA_WD2(fcp->isp_scdma), DMA_WD1(fcp->isp_scdma), DMA_WD0(fcp->isp_scdma));
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof (*icbp), 0);
 	isp_mboxcmd(isp, &mbs);
 	FC_SCRATCH_RELEASE(isp, 0);
 
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return;
 	}
 	isp->isp_reqidx = 0;
 	isp->isp_reqodx = 0;
 	isp->isp_residx = 0;
 	isp->isp_resodx = 0;
 	isp->isp_atioodx = 0;
 
 	/*
 	 * Whatever happens, we're now committed to being here.
 	 */
 	isp->isp_state = ISP_RUNSTATE;
 }
 
 static void
 isp_mark_portdb(ispsoftc_t *isp, int chan, int disposition)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	fcportdb_t *lp;
 	int i;
 
 	for (i = 0; i < MAX_FC_TARG; i++) {
 		lp = &fcp->portdb[i];
 		switch (lp->state) {
 		case FC_PORTDB_STATE_PROBATIONAL:
 		case FC_PORTDB_STATE_DEAD:
 		case FC_PORTDB_STATE_CHANGED:
 		case FC_PORTDB_STATE_PENDING_VALID:
 		case FC_PORTDB_STATE_VALID:
 			if (disposition > 0)
 				lp->state = FC_PORTDB_STATE_PROBATIONAL;
 			else {
 				lp->state = FC_PORTDB_STATE_NIL;
 				isp_async(isp, ISPASYNC_DEV_GONE, chan, lp);
 			}
 			break;
 		case FC_PORTDB_STATE_ZOMBIE:
 			break;
 		case FC_PORTDB_STATE_NIL:
 		case FC_PORTDB_STATE_NEW:
 		default:
 			ISP_MEMZERO(lp, sizeof(*lp));
 			lp->state = FC_PORTDB_STATE_NIL;
 			break;
 		}
 	}
 }
 
 /*
  * Perform an IOCB PLOGI or LOGO via EXECUTE IOCB A64 for 24XX cards
  * or via FABRIC LOGIN/FABRIC LOGOUT for other cards.
  */
 static int
 isp_plogx(ispsoftc_t *isp, int chan, uint16_t handle, uint32_t portid, int flags, int gs)
 {
 	mbreg_t mbs;
 	uint8_t q[QENTRY_LEN];
 	isp_plogx_t *plp;
 	fcparam *fcp;
 	uint8_t *scp;
 	uint32_t sst, parm1;
 	int rval, lev;
 	const char *msg;
 	char buf[64];
 
 	if (!IS_24XX(isp)) {
 		int action = flags & PLOGX_FLG_CMD_MASK;
 		if (action == PLOGX_FLG_CMD_PLOGI) {
 			return (isp_port_login(isp, handle, portid));
 		} else if (action == PLOGX_FLG_CMD_LOGO) {
 			return (isp_port_logout(isp, handle, portid));
 		} else {
 			return (MBOX_INVALID_COMMAND);
 		}
 	}
 
 	ISP_MEMZERO(q, QENTRY_LEN);
 	plp = (isp_plogx_t *) q;
 	plp->plogx_header.rqs_entry_count = 1;
 	plp->plogx_header.rqs_entry_type = RQSTYPE_LOGIN;
 	plp->plogx_handle = 0xffffffff;
 	plp->plogx_nphdl = handle;
 	plp->plogx_vphdl = chan;
 	plp->plogx_portlo = portid;
 	plp->plogx_rspsz_porthi = (portid >> 16) & 0xff;
 	plp->plogx_flags = flags;
 
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "IOCB LOGX", QENTRY_LEN, plp);
 	}
 
 	if (gs == 0) {
 		if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 			isp_prt(isp, ISP_LOGERR, sacq);
 			return (-1);
 		}
 	}
 	fcp = FCPARAM(isp, chan);
 	scp = fcp->isp_scratch;
 	isp_put_plogx(isp, plp, (isp_plogx_t *) scp);
 
 	MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 500000);
 	mbs.param[1] = QENTRY_LEN;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		rval = mbs.param[0];
 		goto out;
 	}
 	MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan);
 	scp += QENTRY_LEN;
 	isp_get_plogx(isp, (isp_plogx_t *) scp, plp);
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "IOCB LOGX response", QENTRY_LEN, plp);
 	}
 
 	if (plp->plogx_status == PLOGX_STATUS_OK) {
 		rval = 0;
 		goto out;
 	} else if (plp->plogx_status != PLOGX_STATUS_IOCBERR) {
 		isp_prt(isp, ISP_LOGWARN,
 		    "status 0x%x on port login IOCB channel %d",
 		    plp->plogx_status, chan);
 		rval = -1;
 		goto out;
 	}
 
 	sst = plp->plogx_ioparm[0].lo16 | (plp->plogx_ioparm[0].hi16 << 16);
 	parm1 = plp->plogx_ioparm[1].lo16 | (plp->plogx_ioparm[1].hi16 << 16);
 
 	rval = -1;
 	lev = ISP_LOGERR;
 	msg = NULL;
 
 	switch (sst) {
 	case PLOGX_IOCBERR_NOLINK:
 		msg = "no link";
 		break;
 	case PLOGX_IOCBERR_NOIOCB:
 		msg = "no IOCB buffer";
 		break;
 	case PLOGX_IOCBERR_NOXGHG:
 		msg = "no Exchange Control Block";
 		break;
 	case PLOGX_IOCBERR_FAILED:
 		ISP_SNPRINTF(buf, sizeof (buf), "reason 0x%x (last LOGIN state 0x%x)", parm1 & 0xff, (parm1 >> 8) & 0xff);
 		msg = buf;
 		break;
 	case PLOGX_IOCBERR_NOFABRIC:
 		msg = "no fabric";
 		break;
 	case PLOGX_IOCBERR_NOTREADY:
 		msg = "firmware not ready";
 		break;
 	case PLOGX_IOCBERR_NOLOGIN:
 		ISP_SNPRINTF(buf, sizeof (buf), "not logged in (last state 0x%x)", parm1);
 		msg = buf;
 		rval = MBOX_NOT_LOGGED_IN;
 		break;
 	case PLOGX_IOCBERR_REJECT:
 		ISP_SNPRINTF(buf, sizeof (buf), "LS_RJT = 0x%x", parm1);
 		msg = buf;
 		break;
 	case PLOGX_IOCBERR_NOPCB:
 		msg = "no PCB allocated";
 		break;
 	case PLOGX_IOCBERR_EINVAL:
 		ISP_SNPRINTF(buf, sizeof (buf), "invalid parameter at offset 0x%x", parm1);
 		msg = buf;
 		break;
 	case PLOGX_IOCBERR_PORTUSED:
 		lev = ISP_LOG_SANCFG|ISP_LOG_WARN1;
 		ISP_SNPRINTF(buf, sizeof (buf), "already logged in with N-Port handle 0x%x", parm1);
 		msg = buf;
 		rval = MBOX_PORT_ID_USED | (parm1 << 16);
 		break;
 	case PLOGX_IOCBERR_HNDLUSED:
 		lev = ISP_LOG_SANCFG|ISP_LOG_WARN1;
 		ISP_SNPRINTF(buf, sizeof (buf), "handle already used for PortID 0x%06x", parm1);
 		msg = buf;
 		rval = MBOX_LOOP_ID_USED;
 		break;
 	case PLOGX_IOCBERR_NOHANDLE:
 		msg = "no handle allocated";
 		break;
 	case PLOGX_IOCBERR_NOFLOGI:
 		msg = "no FLOGI_ACC";
 		break;
 	default:
 		ISP_SNPRINTF(buf, sizeof (buf), "status %x from %x", plp->plogx_status, flags);
 		msg = buf;
 		break;
 	}
 	if (msg) {
 		isp_prt(isp, ISP_LOGERR, "Chan %d PLOGX PortID 0x%06x to N-Port handle 0x%x: %s", chan, portid, handle, msg);
 	}
 out:
 	if (gs == 0) {
 		FC_SCRATCH_RELEASE(isp, chan);
 	}
 	return (rval);
 }
 
 static int
 isp_port_login(ispsoftc_t *isp, uint16_t handle, uint32_t portid)
 {
 	mbreg_t mbs;
 
 	MBSINIT(&mbs, MBOX_FABRIC_LOGIN, MBLOGNONE, 500000);
 	if (ISP_CAP_2KLOGIN(isp)) {
 		mbs.param[1] = handle;
 		mbs.ibits = (1 << 10);
 	} else {
 		mbs.param[1] = handle << 8;
 	}
 	mbs.param[2] = portid >> 16;
 	mbs.param[3] = portid;
 	mbs.logval = MBLOGNONE;
 	mbs.timeout = 500000;
 	isp_mboxcmd(isp, &mbs);
 
 	switch (mbs.param[0]) {
 	case MBOX_PORT_ID_USED:
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: portid 0x%06x already logged in as %u", portid, mbs.param[1]);
 		return (MBOX_PORT_ID_USED | (mbs.param[1] << 16));
 
 	case MBOX_LOOP_ID_USED:
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: handle 0x%x in use for port id 0x%02xXXXX", handle, mbs.param[1] & 0xff);
 		return (MBOX_LOOP_ID_USED);
 
 	case MBOX_COMMAND_COMPLETE:
 		return (0);
 
 	case MBOX_COMMAND_ERROR:
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: error 0x%x in PLOGI to port 0x%06x", mbs.param[1], portid);
 		return (MBOX_COMMAND_ERROR);
 
 	case MBOX_ALL_IDS_USED:
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: all IDs used for fabric login");
 		return (MBOX_ALL_IDS_USED);
 
 	default:
 		isp_prt(isp, ISP_LOG_SANCFG, "isp_port_login: error 0x%x on port login of 0x%06x@0x%0x", mbs.param[0], portid, handle);
 		return (mbs.param[0]);
 	}
 }
 
 /*
  * Pre-24XX fabric port logout
  *
  * Note that portid is not used
  */
 static int
 isp_port_logout(ispsoftc_t *isp, uint16_t handle, uint32_t portid)
 {
 	mbreg_t mbs;
 
 	MBSINIT(&mbs, MBOX_FABRIC_LOGOUT, MBLOGNONE, 500000);
 	if (ISP_CAP_2KLOGIN(isp)) {
 		mbs.param[1] = handle;
 		mbs.ibits = (1 << 10);
 	} else {
 		mbs.param[1] = handle << 8;
 	}
 	isp_mboxcmd(isp, &mbs);
 	return (mbs.param[0] == MBOX_COMMAND_COMPLETE? 0 : mbs.param[0]);
 }
 
 static int
 isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb, int dolock)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	mbreg_t mbs;
 	union {
 		isp_pdb_21xx_t fred;
 		isp_pdb_24xx_t bill;
 	} un;
 
 	MBSINIT(&mbs, MBOX_GET_PORT_DB,
 	    MBLOGALL & ~MBLOGMASK(MBOX_COMMAND_PARAM_ERROR), 250000);
 	if (IS_24XX(isp)) {
 		mbs.ibits = (1 << 9)|(1 << 10);
 		mbs.param[1] = id;
 		mbs.param[9] = chan;
 	} else if (ISP_CAP_2KLOGIN(isp)) {
 		mbs.param[1] = id;
 	} else {
 		mbs.param[1] = id << 8;
 	}
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	if (dolock) {
 		if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 			isp_prt(isp, ISP_LOGERR, sacq);
 			return (-1);
 		}
 	}
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof (un), chan);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		if (dolock) {
 			FC_SCRATCH_RELEASE(isp, chan);
 		}
 		return (mbs.param[0] | (mbs.param[1] << 16));
 	}
 	if (IS_24XX(isp)) {
 		isp_get_pdb_24xx(isp, fcp->isp_scratch, &un.bill);
 		pdb->handle = un.bill.pdb_handle;
 		pdb->prli_word3 = un.bill.pdb_prli_svc3;
 		pdb->portid = BITS2WORD_24XX(un.bill.pdb_portid_bits);
 		ISP_MEMCPY(pdb->portname, un.bill.pdb_portname, 8);
 		ISP_MEMCPY(pdb->nodename, un.bill.pdb_nodename, 8);
 		isp_prt(isp, ISP_LOG_SANCFG, "Chan %d handle 0x%x Port 0x%06x flags 0x%x curstate %x", chan, id, pdb->portid, un.bill.pdb_flags, un.bill.pdb_curstate);
 		if (un.bill.pdb_curstate < PDB2400_STATE_PLOGI_DONE || un.bill.pdb_curstate > PDB2400_STATE_LOGGED_IN) {
 			mbs.param[0] = MBOX_NOT_LOGGED_IN;
 			if (dolock) {
 				FC_SCRATCH_RELEASE(isp, chan);
 			}
 			return (mbs.param[0]);
 		}
 	} else {
 		isp_get_pdb_21xx(isp, fcp->isp_scratch, &un.fred);
 		pdb->handle = un.fred.pdb_loopid;
 		pdb->prli_word3 = un.fred.pdb_prli_svc3;
 		pdb->portid = BITS2WORD(un.fred.pdb_portid_bits);
 		ISP_MEMCPY(pdb->portname, un.fred.pdb_portname, 8);
 		ISP_MEMCPY(pdb->nodename, un.fred.pdb_nodename, 8);
 	}
 	if (dolock) {
 		FC_SCRATCH_RELEASE(isp, chan);
 	}
 	return (0);
 }
 
 static int
 isp_gethandles(ispsoftc_t *isp, int chan, uint16_t *handles, int *num,
     int dolock, int loop)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	mbreg_t mbs;
 	isp_pnhle_21xx_t el1, *elp1;
 	isp_pnhle_23xx_t el3, *elp3;
 	isp_pnhle_24xx_t el4, *elp4;
 	int i, j;
 	uint32_t p;
 	uint16_t h;
 
 	MBSINIT(&mbs, MBOX_GET_ID_LIST, MBLOGALL, 250000);
 	if (IS_24XX(isp)) {
 		mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 		mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 		mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 		mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 		mbs.param[8] = ISP_FC_SCRLEN;
 		mbs.param[9] = chan;
 	} else {
 		mbs.ibits = (1 << 1)|(1 << 2)|(1 << 3)|(1 << 6);
 		mbs.param[1] = DMA_WD1(fcp->isp_scdma);
 		mbs.param[2] = DMA_WD0(fcp->isp_scdma);
 		mbs.param[3] = DMA_WD3(fcp->isp_scdma);
 		mbs.param[6] = DMA_WD2(fcp->isp_scdma);
 	}
 	if (dolock) {
 		if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 			isp_prt(isp, ISP_LOGERR, sacq);
 			return (-1);
 		}
 	}
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, ISP_FC_SCRLEN, chan);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		if (dolock) {
 			FC_SCRATCH_RELEASE(isp, chan);
 		}
 		return (mbs.param[0] | (mbs.param[1] << 16));
 	}
 	elp1 = fcp->isp_scratch;
 	elp3 = fcp->isp_scratch;
 	elp4 = fcp->isp_scratch;
 	for (i = 0, j = 0; i < mbs.param[1] && j < *num; i++) {
 		if (IS_24XX(isp)) {
 			isp_get_pnhle_24xx(isp, &elp4[i], &el4);
 			p = el4.pnhle_port_id_lo |
 			    (el4.pnhle_port_id_hi << 16);
 			h = el4.pnhle_handle;
 		} else if (IS_23XX(isp)) {
 			isp_get_pnhle_23xx(isp, &elp3[i], &el3);
 			p = el3.pnhle_port_id_lo |
 			    (el3.pnhle_port_id_hi << 16);
 			h = el3.pnhle_handle;
 		} else { /* 21xx */
 			isp_get_pnhle_21xx(isp, &elp1[i], &el1);
 			p = el1.pnhle_port_id_lo |
 			    ((el1.pnhle_port_id_hi_handle & 0xff) << 16);
 			h = el1.pnhle_port_id_hi_handle >> 8;
 		}
 		if (loop && (p >> 8) != (fcp->isp_portid >> 8))
 			continue;
 		handles[j++] = h;
 	}
 	*num = j;
 	if (dolock)
 		FC_SCRATCH_RELEASE(isp, chan);
 	return (0);
 }
 
 static void
 isp_dump_chip_portdb(ispsoftc_t *isp, int chan, int dolock)
 {
 	isp_pdb_t pdb;
 	int lim, loopid;
 
 	isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGINFO, "Chan %d chip port dump", chan);
 	if (ISP_CAP_2KLOGIN(isp)) {
 		lim = NPH_MAX_2K;
 	} else {
 		lim = NPH_MAX;
 	}
 	for (loopid = 0; loopid != lim; loopid++) {
 		if (isp_getpdb(isp, chan, loopid, &pdb, dolock)) {
 			continue;
 		}
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGINFO, "Chan %d Loopid 0x%04x "
 		    "PortID 0x%06x WWPN 0x%02x%02x%02x%02x%02x%02x%02x%02x",
 		    chan, loopid, pdb.portid, pdb.portname[0], pdb.portname[1],
 		    pdb.portname[2], pdb.portname[3], pdb.portname[4],
 		    pdb.portname[5], pdb.portname[6], pdb.portname[7]);
 	}
 }
 
 static uint64_t
 isp_get_wwn(ispsoftc_t *isp, int chan, int loopid, int nodename)
 {
 	uint64_t wwn = INI_NONE;
 	fcparam *fcp = FCPARAM(isp, chan);
 	mbreg_t mbs;
 
 	if (fcp->isp_fwstate < FW_READY ||
 	    fcp->isp_loopstate < LOOP_PDB_RCVD) {
 		return (wwn);
 	}
 	MBSINIT(&mbs, MBOX_GET_PORT_NAME,
 	    MBLOGALL & ~MBLOGMASK(MBOX_COMMAND_PARAM_ERROR), 500000);
 	if (ISP_CAP_2KLOGIN(isp)) {
 		mbs.param[1] = loopid;
 		if (nodename) {
 			mbs.param[10] = 1;
 		}
 		mbs.param[9] = chan;
 	} else {
 		mbs.ibitm = 3;
 		mbs.param[1] = loopid << 8;
 		if (nodename) {
 			mbs.param[1] |= 1;
 		}
 	}
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return (wwn);
 	}
 	if (IS_24XX(isp)) {
 		wwn =
 		    (((uint64_t)(mbs.param[2] >> 8))	<< 56) |
 		    (((uint64_t)(mbs.param[2] & 0xff))	<< 48) |
 		    (((uint64_t)(mbs.param[3] >> 8))	<< 40) |
 		    (((uint64_t)(mbs.param[3] & 0xff))	<< 32) |
 		    (((uint64_t)(mbs.param[6] >> 8))	<< 24) |
 		    (((uint64_t)(mbs.param[6] & 0xff))	<< 16) |
 		    (((uint64_t)(mbs.param[7] >> 8))	<<  8) |
 		    (((uint64_t)(mbs.param[7] & 0xff)));
 	} else {
 		wwn =
 		    (((uint64_t)(mbs.param[2] & 0xff))  << 56) |
 		    (((uint64_t)(mbs.param[2] >> 8))	<< 48) |
 		    (((uint64_t)(mbs.param[3] & 0xff))	<< 40) |
 		    (((uint64_t)(mbs.param[3] >> 8))	<< 32) |
 		    (((uint64_t)(mbs.param[6] & 0xff))	<< 24) |
 		    (((uint64_t)(mbs.param[6] >> 8))	<< 16) |
 		    (((uint64_t)(mbs.param[7] & 0xff))	<<  8) |
 		    (((uint64_t)(mbs.param[7] >> 8)));
 	}
 	return (wwn);
 }
 
 /*
  * Make sure we have good FC link.
  */
 
 static int
 isp_fclink_test(ispsoftc_t *isp, int chan, int usdelay)
 {
 	mbreg_t mbs;
 	int check_for_fabric, r;
 	uint8_t lwfs;
 	int loopid;
 	fcparam *fcp;
 	fcportdb_t *lp;
 	isp_pdb_t pdb;
 	NANOTIME_T hra, hrb;
 
 	fcp = FCPARAM(isp, chan);
 
 	isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Link Test Entry", chan);
 	ISP_MARK_PORTDB(isp, chan, 1);
 
 	/*
 	 * Wait up to N microseconds for F/W to go to a ready state.
 	 */
 	lwfs = FW_CONFIG_WAIT;
 	GET_NANOTIME(&hra);
 	while (1) {
 		isp_fw_state(isp, chan);
 		if (lwfs != fcp->isp_fwstate) {
 			isp_prt(isp, ISP_LOGCONFIG|ISP_LOG_SANCFG, "Chan %d Firmware State <%s->%s>", chan, isp_fc_fw_statename((int)lwfs), isp_fc_fw_statename((int)fcp->isp_fwstate));
 			lwfs = fcp->isp_fwstate;
 		}
 		if (fcp->isp_fwstate == FW_READY) {
 			break;
 		}
 		GET_NANOTIME(&hrb);
 		if ((NANOTIME_SUB(&hrb, &hra) / 1000 + 1000 >= usdelay))
 			break;
 		ISP_SLEEP(isp, 1000);
 	}
 
 	/*
 	 * If we haven't gone to 'ready' state, return.
 	 */
 	if (fcp->isp_fwstate != FW_READY) {
 		isp_prt(isp, ISP_LOG_SANCFG, "%s: chan %d not at FW_READY state", __func__, chan);
 		return (-1);
 	}
 
 	/*
 	 * Get our Loop ID and Port ID.
 	 */
 	MBSINIT(&mbs, MBOX_GET_LOOP_ID, MBLOGALL, 0);
 	mbs.param[9] = chan;
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return (-1);
 	}
 
 	if (ISP_CAP_2KLOGIN(isp)) {
 		fcp->isp_loopid = mbs.param[1];
 	} else {
 		fcp->isp_loopid = mbs.param[1] & 0xff;
 	}
 
 	if (IS_2100(isp)) {
 		fcp->isp_topo = TOPO_NL_PORT;
 	} else {
 		int topo = (int) mbs.param[6];
 		if (topo < TOPO_NL_PORT || topo > TOPO_PTP_STUB) {
 			topo = TOPO_PTP_STUB;
 		}
 		fcp->isp_topo = topo;
 	}
 	fcp->isp_portid = mbs.param[2] | (mbs.param[3] << 16);
 
 	if (IS_2100(isp)) {
 		/*
 		 * Don't bother with fabric if we are using really old
 		 * 2100 firmware. It's just not worth it.
 		 */
 		if (ISP_FW_NEWER_THAN(isp, 1, 15, 37)) {
 			check_for_fabric = 1;
 		} else {
 			check_for_fabric = 0;
 		}
 	} else if (fcp->isp_topo == TOPO_FL_PORT || fcp->isp_topo == TOPO_F_PORT) {
 		check_for_fabric = 1;
 	} else {
 		check_for_fabric = 0;
 	}
 
 	/*
 	 * Check to make sure we got a valid loopid
 	 * The 24XX seems to mess this up for multiple channels.
 	 */
 	if (fcp->isp_topo == TOPO_FL_PORT || fcp->isp_topo == TOPO_NL_PORT) {
 		uint8_t alpa = fcp->isp_portid;
 
 		if (alpa == 0) {
 			/* "Cannot Happen" */
 			isp_prt(isp, ISP_LOGWARN, "Zero AL_PA for Loop Topology?");
 		} else {
 			int i;
 			for (i = 0; alpa_map[i]; i++) {
 				if (alpa_map[i] == alpa) {
 					break;
 				}
 			}
 			if (alpa_map[i] && fcp->isp_loopid != i) {
 				isp_prt(isp, ISP_LOG_SANCFG,
 				    "Chan %d deriving loopid %d from AL_PA map (AL_PA 0x%x) and ignoring returned value %d (AL_PA 0x%x)",
 				    chan, i, alpa_map[i], fcp->isp_loopid, alpa);
 				fcp->isp_loopid = i;
 			}
 		}
 	}
 
 
 	if (IS_24XX(isp)) { /* XXX SHOULDN'T THIS BE FOR 2K F/W? XXX */
 		loopid = NPH_FL_ID;
 	} else {
 		loopid = FL_ID;
 	}
 	if (check_for_fabric) {
 		r = isp_getpdb(isp, chan, loopid, &pdb, 1);
 		if (r && (fcp->isp_topo == TOPO_F_PORT || fcp->isp_topo == TOPO_FL_PORT)) {
 			isp_prt(isp, ISP_LOGWARN, "fabric topology but cannot get info about fabric controller (0x%x)", r);
 			fcp->isp_topo = TOPO_PTP_STUB;
 		}
 	} else {
 		r = -1;
 	}
 	if (r == 0) {
 		if (IS_2100(isp)) {
 			fcp->isp_topo = TOPO_FL_PORT;
 		}
 		if (pdb.portid == 0) {
 			/*
 			 * Crock.
 			 */
 			fcp->isp_topo = TOPO_NL_PORT;
 			goto not_on_fabric;
 		}
 
 		/*
 		 * Save the Fabric controller's port database entry.
 		 */
 		lp = &fcp->portdb[FL_ID];
 		lp->state = FC_PORTDB_STATE_PENDING_VALID;
 		MAKE_WWN_FROM_NODE_NAME(lp->node_wwn, pdb.nodename);
 		MAKE_WWN_FROM_NODE_NAME(lp->port_wwn, pdb.portname);
 		lp->prli_word3 = pdb.prli_word3;
 		lp->portid = pdb.portid;
 		lp->handle = pdb.handle;
 		lp->new_portid = lp->portid;
 		lp->new_prli_word3 = lp->prli_word3;
 		if (IS_24XX(isp)) {
 			if (check_for_fabric) {
 				/*
 				 * The mbs is still hanging out from the MBOX_GET_LOOP_ID above.
 				 */
 				fcp->isp_fabric_params = mbs.param[7];
 			} else {
 				fcp->isp_fabric_params = 0;
 			}
-			if (chan) {
-				fcp->isp_sns_hdl = NPH_RESERVED - chan;
-				r = isp_plogx(isp, chan, fcp->isp_sns_hdl, SNS_PORT_ID, PLOGX_FLG_CMD_PLOGI | PLOGX_FLG_COND_PLOGI | PLOGX_FLG_SKIP_PRLI, 0);
-				if (r) {
-					isp_prt(isp, ISP_LOGWARN, "%s: Chan %d cannot log into SNS", __func__, chan);
-					return (-1);
-				}
-			} else {
-				fcp->isp_sns_hdl = NPH_SNS_ID;
-			}
+			fcp->isp_sns_hdl = NPH_SNS_ID;
 			r = isp_register_fc4_type_24xx(isp, chan);
 		} else {
 			fcp->isp_sns_hdl = SNS_ID;
 			r = isp_register_fc4_type(isp, chan);
 		}
 		if (r) {
 			isp_prt(isp, ISP_LOGWARN|ISP_LOG_SANCFG, "%s: register fc4 type failed", __func__);
 			return (-1);
 		}
 	} else {
 not_on_fabric:
 		fcp->portdb[FL_ID].state = FC_PORTDB_STATE_NIL;
 	}
 
 	fcp->isp_gbspeed = 1;
 	if (IS_23XX(isp) || IS_24XX(isp)) {
 		MBSINIT(&mbs, MBOX_GET_SET_DATA_RATE, MBLOGALL, 3000000);
 		mbs.param[1] = MBGSD_GET_RATE;
 		/* mbs.param[2] undefined if we're just getting rate */
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] == MBOX_COMMAND_COMPLETE) {
 			if (mbs.param[1] == MBGSD_EIGHTGB) {
 				isp_prt(isp, ISP_LOGINFO, "Chan %d 8Gb link speed", chan);
 				fcp->isp_gbspeed = 8;
 			} else if (mbs.param[1] == MBGSD_FOURGB) {
 				isp_prt(isp, ISP_LOGINFO, "Chan %d 4Gb link speed", chan);
 				fcp->isp_gbspeed = 4;
 			} else if (mbs.param[1] == MBGSD_TWOGB) {
 				isp_prt(isp, ISP_LOGINFO, "Chan %d 2Gb link speed", chan);
 				fcp->isp_gbspeed = 2;
 			} else if (mbs.param[1] == MBGSD_ONEGB) {
 				isp_prt(isp, ISP_LOGINFO, "Chan %d 1Gb link speed", chan);
 				fcp->isp_gbspeed = 1;
 			}
 		}
 	}
 
 	/*
 	 * Announce ourselves, too.
 	 */
 	isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGCONFIG, topology, chan, (uint32_t) (fcp->isp_wwpn >> 32), (uint32_t) fcp->isp_wwpn, fcp->isp_portid, fcp->isp_loopid, isp_fc_toponame(fcp));
 	isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Link Test Complete", chan);
 	return (0);
 }
 
 /*
  * Complete the synchronization of our Port Database.
  *
  * At this point, we've scanned the local loop (if any) and the fabric
  * and performed fabric logins on all new devices.
  *
  * Our task here is to go through our port database and remove any entities
  * that are still marked probational (issuing PLOGO for ones which we had
  * PLOGI'd into) or are dead.
  *
  * Our task here is to also check policy to decide whether devices which
  * have *changed* in some way should still be kept active. For example,
  * if a device has just changed PortID, we can either elect to treat it
  * as an old device or as a newly arrived device (and notify the outer
  * layer appropriately).
  *
  * We also do initiator map target id assignment here for new initiator
  * devices and refresh old ones ot make sure that they point to the correct
  * entities.
  */
 static int
 isp_pdb_sync(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	fcportdb_t *lp;
 	uint16_t dbidx;
 
 	if (fcp->isp_loopstate == LOOP_READY) {
 		return (0);
 	}
 
 	/*
 	 * Make sure we're okay for doing this right now.
 	 */
 	if (fcp->isp_loopstate != LOOP_PDB_RCVD &&
 	    fcp->isp_loopstate != LOOP_FSCAN_DONE &&
 	    fcp->isp_loopstate != LOOP_LSCAN_DONE) {
 		isp_prt(isp, ISP_LOGWARN, "isp_pdb_sync: bad loopstate %d",
 		    fcp->isp_loopstate);
 		return (-1);
 	}
 
 	if (fcp->isp_topo == TOPO_FL_PORT ||
 	    fcp->isp_topo == TOPO_NL_PORT ||
 	    fcp->isp_topo == TOPO_N_PORT) {
 		if (fcp->isp_loopstate < LOOP_LSCAN_DONE) {
 			if (isp_scan_loop(isp, chan) != 0) {
 				isp_prt(isp, ISP_LOGWARN,
 				    "isp_pdb_sync: isp_scan_loop failed");
 				return (-1);
 			}
 		}
 	}
 
 	if (fcp->isp_topo == TOPO_F_PORT || fcp->isp_topo == TOPO_FL_PORT) {
 		if (fcp->isp_loopstate < LOOP_FSCAN_DONE) {
 			if (isp_scan_fabric(isp, chan) != 0) {
 				isp_prt(isp, ISP_LOGWARN,
 				    "isp_pdb_sync: isp_scan_fabric failed");
 				return (-1);
 			}
 		}
 	}
 
 	isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Synchronizing PDBs", chan);
 
 	fcp->isp_loopstate = LOOP_SYNCING_PDB;
 
 	for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
 		lp = &fcp->portdb[dbidx];
 
 		if (lp->state == FC_PORTDB_STATE_NIL ||
 		    lp->state == FC_PORTDB_STATE_VALID) {
 			continue;
 		}
 
 		switch (lp->state) {
 		case FC_PORTDB_STATE_PROBATIONAL:
 		case FC_PORTDB_STATE_DEAD:
 			lp->state = FC_PORTDB_STATE_NIL;
 			isp_async(isp, ISPASYNC_DEV_GONE, chan, lp);
 			if (lp->autologin == 0) {
 				(void) isp_plogx(isp, chan, lp->handle,
 				    lp->portid,
 				    PLOGX_FLG_CMD_LOGO |
 				    PLOGX_FLG_IMPLICIT |
 				    PLOGX_FLG_FREE_NPHDL, 0);
 			} else {
 				lp->autologin = 0;
 			}
 			lp->new_prli_word3 = 0;
 			lp->new_portid = 0;
 			/*
 			 * Note that we might come out of this with our state
 			 * set to FC_PORTDB_STATE_ZOMBIE.
 			 */
 			break;
 		case FC_PORTDB_STATE_NEW:
 			lp->portid = lp->new_portid;
 			lp->prli_word3 = lp->new_prli_word3;
 			lp->state = FC_PORTDB_STATE_VALID;
 			isp_async(isp, ISPASYNC_DEV_ARRIVED, chan, lp);
 			lp->new_prli_word3 = 0;
 			lp->new_portid = 0;
 			break;
 		case FC_PORTDB_STATE_CHANGED:
 			lp->state = FC_PORTDB_STATE_VALID;
 			isp_async(isp, ISPASYNC_DEV_CHANGED, chan, lp);
 			lp->portid = lp->new_portid;
 			lp->prli_word3 = lp->new_prli_word3;
 			lp->new_prli_word3 = 0;
 			lp->new_portid = 0;
 			break;
 		case FC_PORTDB_STATE_PENDING_VALID:
 			lp->portid = lp->new_portid;
 			lp->prli_word3 = lp->new_prli_word3;
 			lp->state = FC_PORTDB_STATE_VALID;
 			isp_async(isp, ISPASYNC_DEV_STAYED, chan, lp);
 			if (dbidx != FL_ID) {
 				lp->new_prli_word3 = 0;
 				lp->new_portid = 0;
 			}
 			break;
 		case FC_PORTDB_STATE_ZOMBIE:
 			break;
 		default:
 			isp_prt(isp, ISP_LOGWARN,
 			    "isp_pdb_sync: state %d for idx %d",
 			    lp->state, dbidx);
 			isp_dump_portdb(isp, chan);
 		}
 	}
 
 	/*
 	 * If we get here, we've for sure seen not only a valid loop
 	 * but know what is or isn't on it, so mark this for usage
 	 * in isp_start.
 	 */
 	fcp->loop_seen_once = 1;
 	fcp->isp_loopstate = LOOP_READY;
 	return (0);
 }
 
 /*
  * Scan local loop for devices.
  */
 static int
 isp_scan_loop(ispsoftc_t *isp, int chan)
 {
 	fcportdb_t *lp, tmp;
 	fcparam *fcp = FCPARAM(isp, chan);
 	int i, idx, lim, r;
 	isp_pdb_t pdb;
 	uint16_t handles[LOCAL_LOOP_LIM];
 	uint16_t handle;
 
 	if (fcp->isp_fwstate < FW_READY ||
 	    fcp->isp_loopstate < LOOP_PDB_RCVD) {
 		return (-1);
 	}
 	if (fcp->isp_loopstate > LOOP_SCANNING_LOOP) {
 		return (0);
 	}
 	if (fcp->isp_topo != TOPO_NL_PORT && fcp->isp_topo != TOPO_FL_PORT &&
 	    fcp->isp_topo != TOPO_N_PORT) {
 		isp_prt(isp, ISP_LOG_SANCFG,
 		    "Chan %d no loop topology to scan", chan);
 		fcp->isp_loopstate = LOOP_LSCAN_DONE;
 		return (0);
 	}
 
 	fcp->isp_loopstate = LOOP_SCANNING_LOOP;
 
 	lim = LOCAL_LOOP_LIM;
 	r = isp_gethandles(isp, chan, handles, &lim, 1, 1);
 	if (r != 0) {
 		isp_prt(isp, ISP_LOG_SANCFG,
 		    "Chan %d getting list of handles failed with %x", chan, r);
 fail:
 		ISP_MARK_PORTDB(isp, chan, 1);
 		isp_prt(isp, ISP_LOG_SANCFG,
 		    "Chan %d FC scan loop DONE (bad)", chan);
 		return (-1);
 	}
 
 	isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC scan loop -- %d ports",
 	    chan, lim);
 
 	/*
 	 * Run through the list and get the port database info for each one.
 	 */
 	for (idx = 0; idx < lim; idx++) {
 		handle = handles[idx];
 
 		/*
 		 * Don't scan "special" ids.
 		 */
 		if (ISP_CAP_2KLOGIN(isp)) {
-			if (handle >= NPH_RESERVED - isp->isp_nchan)
+			if (handle >= NPH_RESERVED)
 				continue;
 		} else {
 			if (handle >= FL_ID && handle <= SNS_ID)
 				continue;
 		}
 
 		/*
 		 * In older cards with older f/w GET_PORT_DATABASE has been
 		 * known to hang. This trick gets around that problem.
 		 */
 		if (IS_2100(isp) || IS_2200(isp)) {
 			uint64_t node_wwn = isp_get_wwn(isp, chan, handle, 1);
 			if (fcp->isp_loopstate < LOOP_SCANNING_LOOP)
 				goto fail;
 			if (node_wwn == INI_NONE) {
 				continue;
 			}
 		}
 
 		/*
 		 * Get the port database entity for this index.
 		 */
 		r = isp_getpdb(isp, chan, handle, &pdb, 1);
 		if (r != 0) {
 			isp_prt(isp, ISP_LOGDEBUG1,
 			    "Chan %d FC scan loop handle %d returned %x",
 			    chan, handle, r);
 			if (fcp->isp_loopstate < LOOP_SCANNING_LOOP)
 				goto fail;
 			continue;
 		}
 
 		if (fcp->isp_loopstate < LOOP_SCANNING_LOOP)
 			goto fail;
 
 		/*
 		 * On *very* old 2100 firmware we would end up sometimes
 		 * with the firmware returning the port database entry
 		 * for something else. We used to restart this, but
 		 * now we just punt.
 		 */
 		if (IS_2100(isp) && pdb.handle != handle) {
 			isp_prt(isp, ISP_LOGWARN,
 			    "Chan %d getpdb() returned wrong handle %x != %x",
 			    chan, pdb.handle, handle);
 			goto fail;
 		}
 
 		/*
 		 * Save the pertinent info locally.
 		 */
 		MAKE_WWN_FROM_NODE_NAME(tmp.node_wwn, pdb.nodename);
 		MAKE_WWN_FROM_NODE_NAME(tmp.port_wwn, pdb.portname);
 		tmp.prli_word3 = pdb.prli_word3;
 		tmp.portid = pdb.portid;
 		tmp.handle = pdb.handle;
 
 		/*
 		 * Check to make sure it's still a valid entry. The 24XX seems
 		 * to return a portid but not a WWPN/WWNN or role for devices
 		 * which shift on a loop.
 		 */
 		if (tmp.node_wwn == 0 || tmp.port_wwn == 0 || tmp.portid == 0) {
 			int a, b, c;
 			isp_prt(isp, ISP_LOGWARN,
 			    "Chan %d bad pdb (WWNN %016jx, WWPN %016jx, PortID %06x, W3 0x%x, H 0x%x) @ handle 0x%x",
 			    chan, tmp.node_wwn, tmp.port_wwn, tmp.portid, tmp.prli_word3, tmp.handle, handle);
 			a = (tmp.node_wwn == 0);
 			b = (tmp.port_wwn == 0);
 			c = (tmp.portid == 0);
 			if (a == 0 && b == 0) {
 				tmp.node_wwn =
 				    isp_get_wwn(isp, chan, handle, 1);
 				tmp.port_wwn =
 				    isp_get_wwn(isp, chan, handle, 0);
 				if (tmp.node_wwn && tmp.port_wwn) {
 					isp_prt(isp, ISP_LOGWARN, "DODGED!");
 					goto cont;
 				}
 			}
 			isp_dump_portdb(isp, chan);
 			continue;
 		}
   cont:
 
 		/*
 		 * Now search the entire port database
 		 * for the same Port WWN.
 		 */
 		if (isp_find_pdb_by_wwn(isp, chan, tmp.port_wwn, &lp)) {
 			/*
 			 * Okay- we've found a non-nil entry that matches.
 			 * Check to make sure it's probational or a zombie.
 			 */
 			if (lp->state != FC_PORTDB_STATE_PROBATIONAL &&
 			    lp->state != FC_PORTDB_STATE_ZOMBIE &&
 			    lp->state != FC_PORTDB_STATE_VALID) {
 				isp_prt(isp, ISP_LOGERR,
 				    "Chan %d [%d] not probational/zombie (0x%x)",
 				    chan, FC_PORTDB_TGT(isp, chan, lp), lp->state);
 				isp_dump_portdb(isp, chan);
 				goto fail;
 			}
 
 			/*
 			 * Mark the device as something the f/w logs into
 			 * automatically.
 			 */
 			lp->autologin = 1;
 			lp->node_wwn = tmp.node_wwn;
 
 			/*
 			 * Check to make see if really still the same
 			 * device. If it is, we mark it pending valid.
 			 */
 			if (lp->portid == tmp.portid && lp->handle == tmp.handle && lp->prli_word3 == tmp.prli_word3) {
 				lp->new_portid = tmp.portid;
 				lp->new_prli_word3 = tmp.prli_word3;
 				lp->state = FC_PORTDB_STATE_PENDING_VALID;
 				isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Loop Port 0x%06x@0x%04x Pending Valid", chan, tmp.portid, tmp.handle);
 				continue;
 			}
 
 			/*
 			 * We can wipe out the old handle value
 			 * here because it's no longer valid.
 			 */
 			lp->handle = tmp.handle;
 
 			/*
 			 * Claim that this has changed and let somebody else
 			 * decide what to do.
 			 */
 			isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Loop Port 0x%06x@0x%04x changed", chan, tmp.portid, tmp.handle);
 			lp->state = FC_PORTDB_STATE_CHANGED;
 			lp->new_portid = tmp.portid;
 			lp->new_prli_word3 = tmp.prli_word3;
 			continue;
 		}
 
 		/*
 		 * Ah. A new device entry. Find an empty slot
 		 * for it and save info for later disposition.
 		 */
 		for (i = 0; i < MAX_FC_TARG; i++) {
 			if (fcp->portdb[i].state == FC_PORTDB_STATE_NIL) {
 				break;
 			}
 		}
 		if (i == MAX_FC_TARG) {
 			isp_prt(isp, ISP_LOGERR,
 			    "Chan %d out of portdb entries", chan);
 			continue;
 		}
 		lp = &fcp->portdb[i];
 
 		ISP_MEMZERO(lp, sizeof (fcportdb_t));
 		lp->autologin = 1;
 		lp->state = FC_PORTDB_STATE_NEW;
 		lp->new_portid = tmp.portid;
 		lp->new_prli_word3 = tmp.prli_word3;
 		lp->handle = tmp.handle;
 		lp->port_wwn = tmp.port_wwn;
 		lp->node_wwn = tmp.node_wwn;
 		isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Loop Port 0x%06x@0x%04x is New Entry", chan, tmp.portid, tmp.handle);
 	}
 	fcp->isp_loopstate = LOOP_LSCAN_DONE;
 	isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC scan loop DONE", chan);
 	return (0);
 }
 
 /*
  * Scan the fabric for devices and add them to our port database.
  *
  * Use the GID_FT command to get all Port IDs for FC4 SCSI devices it knows.
  *
  * For 2100-23XX cards, we can use the SNS mailbox command to pass simple
  * name server commands to the switch management server via the QLogic f/w.
  *
  * For the 24XX card, we have to use CT-Pass through run via the Execute IOCB
  * mailbox command.
  *
  * The net result is to leave the list of Port IDs setting untranslated in
  * offset IGPOFF of the FC scratch area, whereupon we'll canonicalize it to
  * host order at OGPOFF.
  */
 
 /*
  * Take less than half of our scratch area to store Port IDs
  */
 #define	GIDLEN	((ISP_FC_SCRLEN >> 1) - 16 - SNS_GID_FT_REQ_SIZE)
 #define	NGENT	((GIDLEN - 16) >> 2)
 
 #define	IGPOFF	(2 * QENTRY_LEN)
 #define	OGPOFF	(ISP_FC_SCRLEN >> 1)
 #define	ZTXOFF	(ISP_FC_SCRLEN - (1 * QENTRY_LEN))
 #define	CTXOFF	(ISP_FC_SCRLEN - (2 * QENTRY_LEN))
 #define	XTXOFF	(ISP_FC_SCRLEN - (3 * QENTRY_LEN))
 
 static int
 isp_gid_ft_sns(ispsoftc_t *isp, int chan)
 {
 	union {
 		sns_gid_ft_req_t _x;
 		uint8_t _y[SNS_GID_FT_REQ_SIZE];
 	} un;
 	fcparam *fcp = FCPARAM(isp, chan);
 	sns_gid_ft_req_t *rq = &un._x;
 	mbreg_t mbs;
 
 	isp_prt(isp, ISP_LOGDEBUG0, "Chan %d scanning fabric (GID_FT) via SNS", chan);
 
 	ISP_MEMZERO(rq, SNS_GID_FT_REQ_SIZE);
 	rq->snscb_rblen = GIDLEN >> 1;
 	rq->snscb_addr[RQRSP_ADDR0015] = DMA_WD0(fcp->isp_scdma + IGPOFF);
 	rq->snscb_addr[RQRSP_ADDR1631] = DMA_WD1(fcp->isp_scdma + IGPOFF);
 	rq->snscb_addr[RQRSP_ADDR3247] = DMA_WD2(fcp->isp_scdma + IGPOFF);
 	rq->snscb_addr[RQRSP_ADDR4863] = DMA_WD3(fcp->isp_scdma + IGPOFF);
 	rq->snscb_sblen = 6;
 	rq->snscb_cmd = SNS_GID_FT;
 	rq->snscb_mword_div_2 = NGENT;
 	rq->snscb_fc4_type = FC4_SCSI;
 
 	isp_put_gid_ft_request(isp, rq, fcp->isp_scratch);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, SNS_GID_FT_REQ_SIZE, chan);
 
 	MBSINIT(&mbs, MBOX_SEND_SNS, MBLOGALL, 10000000);
 	mbs.param[0] = MBOX_SEND_SNS;
 	mbs.param[1] = SNS_GID_FT_REQ_SIZE >> 1;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		if (mbs.param[0] == MBOX_INVALID_COMMAND) {
 			return (1);
 		} else {
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 static int
 isp_gid_ft_ct_passthru(ispsoftc_t *isp, int chan)
 {
 	mbreg_t mbs;
 	fcparam *fcp = FCPARAM(isp, chan);
 	union {
 		isp_ct_pt_t plocal;
 		ct_hdr_t clocal;
 		uint8_t q[QENTRY_LEN];
 	} un;
 	isp_ct_pt_t *pt;
 	ct_hdr_t *ct;
 	uint32_t *rp;
 	uint8_t *scp = fcp->isp_scratch;
 
 	isp_prt(isp, ISP_LOGDEBUG0, "Chan %d scanning fabric (GID_FT) via CT", chan);
 
 	if (!IS_24XX(isp)) {
 		return (1);
 	}
 
 	/*
 	 * Build a Passthrough IOCB in memory.
 	 */
 	pt = &un.plocal;
 	ISP_MEMZERO(un.q, QENTRY_LEN);
 	pt->ctp_header.rqs_entry_count = 1;
 	pt->ctp_header.rqs_entry_type = RQSTYPE_CT_PASSTHRU;
 	pt->ctp_handle = 0xffffffff;
 	pt->ctp_nphdl = fcp->isp_sns_hdl;
 	pt->ctp_cmd_cnt = 1;
 	pt->ctp_vpidx = ISP_GET_VPIDX(isp, chan);
 	pt->ctp_time = 30;
 	pt->ctp_rsp_cnt = 1;
 	pt->ctp_rsp_bcnt = GIDLEN;
 	pt->ctp_cmd_bcnt = sizeof (*ct) + sizeof (uint32_t);
 	pt->ctp_dataseg[0].ds_base = DMA_LO32(fcp->isp_scdma+XTXOFF);
 	pt->ctp_dataseg[0].ds_basehi = DMA_HI32(fcp->isp_scdma+XTXOFF);
 	pt->ctp_dataseg[0].ds_count = sizeof (*ct) + sizeof (uint32_t);
 	pt->ctp_dataseg[1].ds_base = DMA_LO32(fcp->isp_scdma+IGPOFF);
 	pt->ctp_dataseg[1].ds_basehi = DMA_HI32(fcp->isp_scdma+IGPOFF);
 	pt->ctp_dataseg[1].ds_count = GIDLEN;
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "ct IOCB", QENTRY_LEN, pt);
 	}
 	isp_put_ct_pt(isp, pt, (isp_ct_pt_t *) &scp[CTXOFF]);
 
 	/*
 	 * Build the CT header and command in memory.
 	 *
 	 * Note that the CT header has to end up as Big Endian format in memory.
 	 */
 	ct = &un.clocal;
 	ISP_MEMZERO(ct, sizeof (*ct));
 	ct->ct_revision = CT_REVISION;
 	ct->ct_fcs_type = CT_FC_TYPE_FC;
 	ct->ct_fcs_subtype = CT_FC_SUBTYPE_NS;
 	ct->ct_cmd_resp = SNS_GID_FT;
 	ct->ct_bcnt_resid = (GIDLEN - 16) >> 2;
 
 	isp_put_ct_hdr(isp, ct, (ct_hdr_t *) &scp[XTXOFF]);
 	rp = (uint32_t *) &scp[XTXOFF+sizeof (*ct)];
 	ISP_IOZPUT_32(isp, FC4_SCSI, rp);
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "CT HDR + payload after put",
 		    sizeof (*ct) + sizeof (uint32_t), &scp[XTXOFF]);
 	}
 	ISP_MEMZERO(&scp[ZTXOFF], QENTRY_LEN);
 	MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 500000);
 	mbs.param[1] = QENTRY_LEN;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma + CTXOFF);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma + CTXOFF);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma + CTXOFF);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma + CTXOFF);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, XTXOFF, 2 * QENTRY_LEN, chan);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		return (-1);
 	}
 	MEMORYBARRIER(isp, SYNC_SFORCPU, ZTXOFF, QENTRY_LEN, chan);
 	pt = &un.plocal;
 	isp_get_ct_pt(isp, (isp_ct_pt_t *) &scp[ZTXOFF], pt);
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "IOCB response", QENTRY_LEN, pt);
 	}
 
 	if (pt->ctp_status && pt->ctp_status != RQCS_DATA_UNDERRUN) {
 		isp_prt(isp, ISP_LOGWARN,
 		    "Chan %d ISP GID FT CT Passthrough returned 0x%x",
 		    chan, pt->ctp_status);
 		return (-1);
 	}
 	MEMORYBARRIER(isp, SYNC_SFORCPU, IGPOFF, GIDLEN + 16, chan);
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "CT response", GIDLEN+16, &scp[IGPOFF]);
 	}
 	return (0);
 }
 
 static int
 isp_scan_fabric(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	uint32_t portid;
 	uint16_t handle, loopid;
 	isp_pdb_t pdb;
 	int portidx, portlim, r;
 	sns_gid_ft_rsp_t *rs0, *rs1;
 
 	isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Scan Fabric", chan);
 	if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate < LOOP_LSCAN_DONE) {
 		return (-1);
 	}
 	if (fcp->isp_loopstate > LOOP_SCANNING_FABRIC) {
 		return (0);
 	}
 	if (fcp->isp_topo != TOPO_FL_PORT && fcp->isp_topo != TOPO_F_PORT) {
 		fcp->isp_loopstate = LOOP_FSCAN_DONE;
 		isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Scan Fabric Done (no fabric)", chan);
 		return (0);
 	}
 
 	fcp->isp_loopstate = LOOP_SCANNING_FABRIC;
 	if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 		isp_prt(isp, ISP_LOGERR, sacq);
 		ISP_MARK_PORTDB(isp, chan, 1);
 		return (-1);
 	}
 	if (fcp->isp_loopstate < LOOP_SCANNING_FABRIC) {
 		FC_SCRATCH_RELEASE(isp, chan);
 		ISP_MARK_PORTDB(isp, chan, 1);
 		return (-1);
 	}
 
 	/*
 	 * Make sure we still are logged into the fabric controller.
 	 */
 	if (IS_24XX(isp)) {	/* XXX SHOULDN'T THIS BE TRUE FOR 2K F/W? XXX */
 		loopid = NPH_FL_ID;
 	} else {
 		loopid = FL_ID;
 	}
 	r = isp_getpdb(isp, chan, loopid, &pdb, 0);
 	if ((r & 0xffff) == MBOX_NOT_LOGGED_IN) {
 		isp_dump_chip_portdb(isp, chan, 0);
 	}
 	if (r) {
 		fcp->isp_loopstate = LOOP_PDB_RCVD;
 		FC_SCRATCH_RELEASE(isp, chan);
 		ISP_MARK_PORTDB(isp, chan, 1);
 		return (-1);
 	}
 
 	if (IS_24XX(isp)) {
 		r = isp_gid_ft_ct_passthru(isp, chan);
 	} else {
 		r = isp_gid_ft_sns(isp, chan);
 	}
 
 	if (fcp->isp_loopstate < LOOP_SCANNING_FABRIC) {
 		FC_SCRATCH_RELEASE(isp, chan);
 		ISP_MARK_PORTDB(isp, chan, 1);
 		return (-1);
 	}
 
 	if (r > 0) {
 		fcp->isp_loopstate = LOOP_FSCAN_DONE;
 		FC_SCRATCH_RELEASE(isp, chan);
 		return (0);
 	} else if (r < 0) {
 		fcp->isp_loopstate = LOOP_PDB_RCVD;	/* try again */
 		FC_SCRATCH_RELEASE(isp, chan);
 		return (0);
 	}
 
 	MEMORYBARRIER(isp, SYNC_SFORCPU, IGPOFF, GIDLEN, chan);
 	rs0 = (sns_gid_ft_rsp_t *) ((uint8_t *)fcp->isp_scratch+IGPOFF);
 	rs1 = (sns_gid_ft_rsp_t *) ((uint8_t *)fcp->isp_scratch+OGPOFF);
 	isp_get_gid_ft_response(isp, rs0, rs1, NGENT);
 	if (fcp->isp_loopstate < LOOP_SCANNING_FABRIC) {
 		FC_SCRATCH_RELEASE(isp, chan);
 		ISP_MARK_PORTDB(isp, chan, 1);
 		return (-1);
 	}
 	if (rs1->snscb_cthdr.ct_cmd_resp != LS_ACC) {
 		int level;
 		if (rs1->snscb_cthdr.ct_reason == 9 && rs1->snscb_cthdr.ct_explanation == 7) {
 			level = ISP_LOG_SANCFG;
 		} else {
 			level = ISP_LOGWARN;
 		}
 		isp_prt(isp, level, "Chan %d Fabric Nameserver rejected GID_FT"
 		    " (Reason=0x%x Expl=0x%x)", chan,
 		    rs1->snscb_cthdr.ct_reason,
 		    rs1->snscb_cthdr.ct_explanation);
 		FC_SCRATCH_RELEASE(isp, chan);
 		fcp->isp_loopstate = LOOP_FSCAN_DONE;
 		return (0);
 	}
 
 
 	/*
 	 * If we get this far, we certainly still have the fabric controller.
 	 */
 	fcp->portdb[FL_ID].state = FC_PORTDB_STATE_PENDING_VALID;
 
 	/*
 	 * Go through the list and remove duplicate port ids.
 	 */
 
 	portlim = 0;
 	portidx = 0;
 	for (portidx = 0; portidx < NGENT-1; portidx++) {
 		if (rs1->snscb_ports[portidx].control & 0x80) {
 			break;
 		}
 	}
 
 	/*
 	 * If we're not at the last entry, our list wasn't big enough.
 	 */
 	if ((rs1->snscb_ports[portidx].control & 0x80) == 0) {
 		isp_prt(isp, ISP_LOGWARN,
 		    "fabric too big for scratch area: increase ISP_FC_SCRLEN");
 	}
 	portlim = portidx + 1;
 	isp_prt(isp, ISP_LOG_SANCFG,
 	    "Chan %d got %d ports back from name server", chan, portlim);
 
 	for (portidx = 0; portidx < portlim; portidx++) {
 		int npidx;
 
 		portid =
 		    ((rs1->snscb_ports[portidx].portid[0]) << 16) |
 		    ((rs1->snscb_ports[portidx].portid[1]) << 8) |
 		    ((rs1->snscb_ports[portidx].portid[2]));
 
 		for (npidx = portidx + 1; npidx < portlim; npidx++) {
 			uint32_t new_portid =
 			    ((rs1->snscb_ports[npidx].portid[0]) << 16) |
 			    ((rs1->snscb_ports[npidx].portid[1]) << 8) |
 			    ((rs1->snscb_ports[npidx].portid[2]));
 			if (new_portid == portid) {
 				break;
 			}
 		}
 
 		if (npidx < portlim) {
 			rs1->snscb_ports[npidx].portid[0] = 0;
 			rs1->snscb_ports[npidx].portid[1] = 0;
 			rs1->snscb_ports[npidx].portid[2] = 0;
 			isp_prt(isp, ISP_LOG_SANCFG, "Chan %d removing duplicate PortID 0x%06x entry from list", chan, portid);
 		}
 	}
 
 	/*
 	 * We now have a list of Port IDs for all FC4 SCSI devices
 	 * that the Fabric Name server knows about.
 	 *
 	 * For each entry on this list go through our port database looking
 	 * for probational entries- if we find one, then an old entry is
 	 * maybe still this one. We get some information to find out.
 	 *
 	 * Otherwise, it's a new fabric device, and we log into it
 	 * (unconditionally). After searching the entire database
 	 * again to make sure that we never ever ever ever have more
 	 * than one entry that has the same PortID or the same
 	 * WWNN/WWPN duple, we enter the device into our database.
 	 */
 
 	for (portidx = 0; portidx < portlim; portidx++) {
 		fcportdb_t *lp;
 		uint64_t wwnn, wwpn;
 		int dbidx, nr;
 
 		portid =
 		    ((rs1->snscb_ports[portidx].portid[0]) << 16) |
 		    ((rs1->snscb_ports[portidx].portid[1]) << 8) |
 		    ((rs1->snscb_ports[portidx].portid[2]));
 
 		if (portid == 0) {
 			isp_prt(isp, ISP_LOG_SANCFG,
 			    "Chan %d skipping null PortID at idx %d",
 			    chan, portidx);
 			continue;
 		}
 
 		/*
 		 * Skip ourselves here and on other channels. If we're
 		 * multi-id, we can't check the portids in other FCPARAM
 		 * arenas because the resolutions here aren't synchronized.
 		 * The best way to do this is to exclude looking at portids
 		 * that have the same domain and area code as our own
 		 * portid.
 		 */
 		if (ISP_CAP_MULTI_ID(isp) && isp->isp_nchan > 1) {
 			if ((portid >> 8) == (fcp->isp_portid >> 8)) {
 				isp_prt(isp, ISP_LOG_SANCFG,
 				    "Chan %d skip PortID 0x%06x",
 				    chan, portid);
 				continue;
 			}
 		} else if (portid == fcp->isp_portid) {
 			isp_prt(isp, ISP_LOG_SANCFG,
 			    "Chan %d skip ourselves on @ PortID 0x%06x",
 			    chan, portid);
 			continue;
 		}
 
 		isp_prt(isp, ISP_LOG_SANCFG,
 		    "Chan %d Checking Fabric Port 0x%06x", chan, portid);
 
 		/*
 		 * We now search our Port Database for any
 		 * probational entries with this PortID. We don't
 		 * look for zombies here- only probational
 		 * entries (we've already logged out of zombies).
 		 */
 		for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
 			lp = &fcp->portdb[dbidx];
 
 			if (lp->state != FC_PORTDB_STATE_PROBATIONAL) {
 				continue;
 			}
 			if (lp->portid == portid) {
 				break;
 			}
 		}
 
 		/*
 		 * We found a probational entry with this Port ID.
 		 */
 		if (dbidx < MAX_FC_TARG) {
 			int handle_changed = 0;
 
 			lp = &fcp->portdb[dbidx];
 
 			/*
 			 * See if we're still logged into it.
 			 *
 			 * If we aren't, mark it as a dead device and
 			 * leave the new portid in the database entry
 			 * for somebody further along to decide what to
 			 * do (policy choice).
 			 *
 			 * If we are, check to see if it's the same
 			 * device still (it should be). If for some
 			 * reason it isn't, mark it as a changed device
 			 * and leave the new portid and role in the
 			 * database entry for somebody further along to
 			 * decide what to do (policy choice).
 			 *
 			 */
 
 			r = isp_getpdb(isp, chan, lp->handle, &pdb, 0);
 			if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) {
 				FC_SCRATCH_RELEASE(isp, chan);
 				ISP_MARK_PORTDB(isp, chan, 1);
 				return (-1);
 			}
 			if (r != 0) {
 				lp->new_portid = portid;
 				lp->state = FC_PORTDB_STATE_DEAD;
 				isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric PortID 0x%06x handle 0x%x is dead (%d)", chan, portid, lp->handle, r);
 				continue;
 			}
 
 
 			/*
 			 * Check to make sure that handle, portid, WWPN and
 			 * WWNN agree. If they don't, then the association
 			 * between this PortID and the stated handle has been
 			 * broken by the firmware.
 			 */
 			MAKE_WWN_FROM_NODE_NAME(wwnn, pdb.nodename);
 			MAKE_WWN_FROM_NODE_NAME(wwpn, pdb.portname);
 			if (pdb.handle != lp->handle ||
 			    pdb.portid != portid ||
 			    wwpn != lp->port_wwn ||
 			    (lp->node_wwn != 0 && wwnn != lp->node_wwn)) {
 				isp_prt(isp, ISP_LOG_SANCFG,
 				    fconf, chan, dbidx, pdb.handle, pdb.portid,
 				    (uint32_t) (wwnn >> 32), (uint32_t) wwnn,
 				    (uint32_t) (wwpn >> 32), (uint32_t) wwpn,
 				    lp->handle, portid,
 				    (uint32_t) (lp->node_wwn >> 32),
 				    (uint32_t) lp->node_wwn,
 				    (uint32_t) (lp->port_wwn >> 32),
 				    (uint32_t) lp->port_wwn);
 				/*
 				 * Try to re-login to this device using a
 				 * new handle. If that fails, mark it dead.
 				 *
 				 * isp_login_device will check for handle and
 				 * portid consistency after re-login.
 				 *
 				 */
 				if ((fcp->role & ISP_ROLE_INITIATOR) == 0 ||
 				    isp_login_device(isp, chan, portid, &pdb,
 				     &FCPARAM(isp, 0)->isp_lasthdl)) {
 					lp->new_portid = portid;
 					lp->state = FC_PORTDB_STATE_DEAD;
 					if (fcp->isp_loopstate !=
 					    LOOP_SCANNING_FABRIC) {
 						FC_SCRATCH_RELEASE(isp, chan);
 						ISP_MARK_PORTDB(isp, chan, 1);
 						return (-1);
 					}
 					continue;
 				}
 				if (fcp->isp_loopstate !=
 				    LOOP_SCANNING_FABRIC) {
 					FC_SCRATCH_RELEASE(isp, chan);
 					ISP_MARK_PORTDB(isp, chan, 1);
 					return (-1);
 				}
 				MAKE_WWN_FROM_NODE_NAME(wwnn, pdb.nodename);
 				MAKE_WWN_FROM_NODE_NAME(wwpn, pdb.portname);
 				if (wwpn != lp->port_wwn ||
 				    (lp->node_wwn != 0 && wwnn != lp->node_wwn)) {
 					isp_prt(isp, ISP_LOGWARN, "changed WWN"
 					    " after relogin");
 					lp->new_portid = portid;
 					lp->state = FC_PORTDB_STATE_DEAD;
 					continue;
 				}
 
 				lp->handle = pdb.handle;
 				handle_changed++;
 			}
 
 			nr = pdb.prli_word3;
 
 			/*
 			 * Check to see whether the portid and roles have
 			 * stayed the same. If they have stayed the same,
 			 * we believe that this is the same device and it
 			 * hasn't become disconnected and reconnected, so
 			 * mark it as pending valid.
 			 *
 			 * If they aren't the same, mark the device as a
 			 * changed device and save the new port id and role
 			 * and let somebody else decide.
 			 */
 
 			lp->new_portid = portid;
 			lp->new_prli_word3 = nr;
 			if (pdb.portid != lp->portid || nr != lp->prli_word3 || handle_changed) {
 				isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric Port 0x%06x changed", chan, portid);
 				lp->state = FC_PORTDB_STATE_CHANGED;
 			} else {
 				isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric Port 0x%06x Now Pending Valid", chan, portid);
 				lp->state = FC_PORTDB_STATE_PENDING_VALID;
 			}
 			continue;
 		}
 
 		if ((fcp->role & ISP_ROLE_INITIATOR) == 0)
 			continue;
 
 		/*
 		 * Ah- a new entry. Search the database again for all non-NIL
 		 * entries to make sure we never ever make a new database entry
 		 * with the same port id. While we're at it, mark where the
 		 * last free entry was.
 		 */
 
 		dbidx = MAX_FC_TARG;
 		for (lp = fcp->portdb; lp < &fcp->portdb[MAX_FC_TARG]; lp++) {
 			if (lp >= &fcp->portdb[FL_ID] &&
 			    lp <= &fcp->portdb[SNS_ID]) {
 				continue;
 			}
 			if (lp->state == FC_PORTDB_STATE_NIL) {
 				if (dbidx == MAX_FC_TARG) {
 					dbidx = lp - fcp->portdb;
 				}
 				continue;
 			}
 			if (lp->state == FC_PORTDB_STATE_ZOMBIE) {
 				continue;
 			}
 			if (lp->portid == portid) {
 				break;
 			}
 		}
 
 		if (lp < &fcp->portdb[MAX_FC_TARG]) {
 			isp_prt(isp, ISP_LOGWARN, "Chan %d PortID 0x%06x "
 			    "already at %d handle %d state %d",
 			    chan, portid, dbidx, lp->handle, lp->state);
 			continue;
 		}
 
 		/*
 		 * We should have the index of the first free entry seen.
 		 */
 		if (dbidx == MAX_FC_TARG) {
 			isp_prt(isp, ISP_LOGERR,
 			    "port database too small to login PortID 0x%06x"
 			    "- increase MAX_FC_TARG", portid);
 			continue;
 		}
 
 		/*
 		 * Otherwise, point to our new home.
 		 */
 		lp = &fcp->portdb[dbidx];
 
 		/*
 		 * Try to see if we are logged into this device,
 		 * and maybe log into it.
 		 *
 		 * isp_login_device will check for handle and
 		 * portid consistency after login.
 		 */
 		if (isp_login_device(isp, chan, portid, &pdb,
 		    &FCPARAM(isp, 0)->isp_lasthdl)) {
 			if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) {
 				FC_SCRATCH_RELEASE(isp, chan);
 				ISP_MARK_PORTDB(isp, chan, 1);
 				return (-1);
 			}
 			continue;
 		}
 		if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) {
 			FC_SCRATCH_RELEASE(isp, chan);
 			ISP_MARK_PORTDB(isp, chan, 1);
 			return (-1);
 		}
 
 		handle = pdb.handle;
 		MAKE_WWN_FROM_NODE_NAME(wwnn, pdb.nodename);
 		MAKE_WWN_FROM_NODE_NAME(wwpn, pdb.portname);
 		nr = pdb.prli_word3;
 
 		/*
 		 * And go through the database *one* more time to make sure
 		 * that we do not make more than one entry that has the same
 		 * WWNN/WWPN duple
 		 */
 		for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
 			if (dbidx >= FL_ID && dbidx <= SNS_ID) {
 				continue;
 			}
 			if ((fcp->portdb[dbidx].node_wwn == wwnn ||
 			     fcp->portdb[dbidx].node_wwn == 0) &&
 			    fcp->portdb[dbidx].port_wwn == wwpn) {
 				break;
 			}
 		}
 
 		if (dbidx == MAX_FC_TARG) {
 			ISP_MEMZERO(lp, sizeof (fcportdb_t));
 			lp->handle = handle;
 			lp->node_wwn = wwnn;
 			lp->port_wwn = wwpn;
 			lp->new_portid = portid;
 			lp->new_prli_word3 = nr;
 			lp->state = FC_PORTDB_STATE_NEW;
 			isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric Port 0x%06x is a New Entry", chan, portid);
 			continue;
 		}
 
     		if (fcp->portdb[dbidx].state != FC_PORTDB_STATE_ZOMBIE) {
 			isp_prt(isp, ISP_LOGWARN,
 			    "Chan %d PortID 0x%x 0x%08x%08x/0x%08x%08x %ld "
 			    "already at idx %d, state 0x%x", chan, portid,
 			    (uint32_t) (wwnn >> 32), (uint32_t) wwnn,
 			    (uint32_t) (wwpn >> 32), (uint32_t) wwpn,
 			    (long) (lp - fcp->portdb), dbidx,
 			    fcp->portdb[dbidx].state);
 			continue;
 		}
 
 		/*
 		 * We found a zombie entry that matches us.
 		 * Revive it. We know that WWN and WWPN
 		 * are the same. For fabric devices, we
 		 * don't care that handle is different
 		 * as we assign that. If role or portid
 		 * are different, it maybe a changed device.
 		 */
 		lp = &fcp->portdb[dbidx];
 		lp->handle = handle;
 		lp->node_wwn = wwnn;
 		lp->new_portid = portid;
 		lp->new_prli_word3 = nr;
 		if (lp->portid != portid || lp->prli_word3 != nr) {
 			isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Zombie Fabric Port 0x%06x Now Changed", chan, portid);
 			lp->state = FC_PORTDB_STATE_CHANGED;
 		} else {
 			isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Zombie Fabric Port 0x%06x Now Pending Valid", chan, portid);
 			lp->state = FC_PORTDB_STATE_PENDING_VALID;
 		}
 	}
 
 	FC_SCRATCH_RELEASE(isp, chan);
 	if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) {
 		ISP_MARK_PORTDB(isp, chan, 1);
 		return (-1);
 	}
 	fcp->isp_loopstate = LOOP_FSCAN_DONE;
 	isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Scan Fabric Done", chan);
 	return (0);
 }
 
 /*
  * Find an unused handle and try and use to login to a port.
  */
 static int
 isp_login_device(ispsoftc_t *isp, int chan, uint32_t portid, isp_pdb_t *p, uint16_t *ohp)
 {
 	int lim, i, r;
 	uint16_t handle;
 
 	if (ISP_CAP_2KLOGIN(isp)) {
 		lim = NPH_MAX_2K;
 	} else {
 		lim = NPH_MAX;
 	}
 
 	handle = isp_next_handle(isp, ohp);
 	for (i = 0; i < lim; i++) {
 		/*
 		 * See if we're still logged into something with
 		 * this handle and that something agrees with this
 		 * port id.
 		 */
 		r = isp_getpdb(isp, chan, handle, p, 0);
 		if (r == 0 && p->portid != portid) {
 			(void) isp_plogx(isp, chan, handle, portid, PLOGX_FLG_CMD_LOGO | PLOGX_FLG_IMPLICIT | PLOGX_FLG_FREE_NPHDL, 1);
 		} else if (r == 0) {
 			break;
 		}
 		if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) {
 			return (-1);
 		}
 		/*
 		 * Now try and log into the device
 		 */
 		r = isp_plogx(isp, chan, handle, portid, PLOGX_FLG_CMD_PLOGI, 1);
 		if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) {
 			return (-1);
 		}
 		if (r == 0) {
 			break;
 		} else if ((r & 0xffff) == MBOX_PORT_ID_USED) {
 			/*
 			 * If we get here, then the firmwware still thinks we're logged into this device, but with a different
 			 * handle. We need to break that association. We used to try and just substitute the handle, but then
 			 * failed to get any data via isp_getpdb (below).
 			 */
 			if (isp_plogx(isp, chan, r >> 16, portid, PLOGX_FLG_CMD_LOGO | PLOGX_FLG_IMPLICIT | PLOGX_FLG_FREE_NPHDL, 1)) {
 				isp_prt(isp, ISP_LOGERR, "baw... logout of %x failed", r >> 16);
 			}
 			if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) {
 				return (-1);
 			}
 			r = isp_plogx(isp, chan, handle, portid, PLOGX_FLG_CMD_PLOGI, 1);
 			if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) {
 				return (-1);
 			}
 			if (r != 0)
 				i = lim;
 			break;
 		} else if ((r & 0xffff) == MBOX_LOOP_ID_USED) {
 			/* Try the next loop id. */
 			handle = isp_next_handle(isp, ohp);
 		} else {
 			/* Give up. */
 			i = lim;
 			break;
 		}
 	}
 
 	if (i == lim) {
 		isp_prt(isp, ISP_LOGWARN, "Chan %d PLOGI 0x%06x failed", chan, portid);
 		return (-1);
 	}
 
 	/*
 	 * If we successfully logged into it, get the PDB for it
 	 * so we can crosscheck that it is still what we think it
 	 * is and that we also have the role it plays
 	 */
 	r = isp_getpdb(isp, chan, handle, p, 0);
 	if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) {
 		return (-1);
 	}
 	if (r != 0) {
 		isp_prt(isp, ISP_LOGERR, "Chan %d new device 0x%06x@0x%x disappeared", chan, portid, handle);
 		return (-1);
 	}
 
 	if (p->handle != handle || p->portid != portid) {
 		isp_prt(isp, ISP_LOGERR, "Chan %d new device 0x%06x@0x%x changed (0x%06x@0x%0x)",
 		    chan, portid, handle, p->portid, p->handle);
 		return (-1);
 	}
 	return (0);
 }
 
 static int
 isp_register_fc4_type(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	uint8_t local[SNS_RFT_ID_REQ_SIZE];
 	sns_screq_t *reqp = (sns_screq_t *) local;
 	mbreg_t mbs;
 
 	ISP_MEMZERO((void *) reqp, SNS_RFT_ID_REQ_SIZE);
 	reqp->snscb_rblen = SNS_RFT_ID_RESP_SIZE >> 1;
 	reqp->snscb_addr[RQRSP_ADDR0015] = DMA_WD0(fcp->isp_scdma + 0x100);
 	reqp->snscb_addr[RQRSP_ADDR1631] = DMA_WD1(fcp->isp_scdma + 0x100);
 	reqp->snscb_addr[RQRSP_ADDR3247] = DMA_WD2(fcp->isp_scdma + 0x100);
 	reqp->snscb_addr[RQRSP_ADDR4863] = DMA_WD3(fcp->isp_scdma + 0x100);
 	reqp->snscb_sblen = 22;
 	reqp->snscb_data[0] = SNS_RFT_ID;
 	reqp->snscb_data[4] = fcp->isp_portid & 0xffff;
 	reqp->snscb_data[5] = (fcp->isp_portid >> 16) & 0xff;
 	reqp->snscb_data[6] = (1 << FC4_SCSI);
 	if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 		isp_prt(isp, ISP_LOGERR, sacq);
 		return (-1);
 	}
 	isp_put_sns_request(isp, reqp, (sns_screq_t *) fcp->isp_scratch);
 	MBSINIT(&mbs, MBOX_SEND_SNS, MBLOGALL, 1000000);
 	mbs.param[1] = SNS_RFT_ID_REQ_SIZE >> 1;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, SNS_RFT_ID_REQ_SIZE, chan);
 	isp_mboxcmd(isp, &mbs);
 	FC_SCRATCH_RELEASE(isp, chan);
 	if (mbs.param[0] == MBOX_COMMAND_COMPLETE) {
 		return (0);
 	} else {
 		return (-1);
 	}
 }
 
 static int
 isp_register_fc4_type_24xx(ispsoftc_t *isp, int chan)
 {
 	mbreg_t mbs;
 	fcparam *fcp = FCPARAM(isp, chan);
 	union {
 		isp_ct_pt_t plocal;
 		rft_id_t clocal;
 		uint8_t q[QENTRY_LEN];
 	} un;
 	isp_ct_pt_t *pt;
 	ct_hdr_t *ct;
 	rft_id_t *rp;
 	uint8_t *scp = fcp->isp_scratch;
 
 	if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 		isp_prt(isp, ISP_LOGERR, sacq);
 		return (-1);
 	}
 
 	/*
 	 * Build a Passthrough IOCB in memory.
 	 */
 	ISP_MEMZERO(un.q, QENTRY_LEN);
 	pt = &un.plocal;
 	pt->ctp_header.rqs_entry_count = 1;
 	pt->ctp_header.rqs_entry_type = RQSTYPE_CT_PASSTHRU;
 	pt->ctp_handle = 0xffffffff;
 	pt->ctp_nphdl = fcp->isp_sns_hdl;
 	pt->ctp_cmd_cnt = 1;
 	pt->ctp_vpidx = ISP_GET_VPIDX(isp, chan);
 	pt->ctp_time = 1;
 	pt->ctp_rsp_cnt = 1;
 	pt->ctp_rsp_bcnt = sizeof (ct_hdr_t);
 	pt->ctp_cmd_bcnt = sizeof (rft_id_t);
 	pt->ctp_dataseg[0].ds_base = DMA_LO32(fcp->isp_scdma+XTXOFF);
 	pt->ctp_dataseg[0].ds_basehi = DMA_HI32(fcp->isp_scdma+XTXOFF);
 	pt->ctp_dataseg[0].ds_count = sizeof (rft_id_t);
 	pt->ctp_dataseg[1].ds_base = DMA_LO32(fcp->isp_scdma+IGPOFF);
 	pt->ctp_dataseg[1].ds_basehi = DMA_HI32(fcp->isp_scdma+IGPOFF);
 	pt->ctp_dataseg[1].ds_count = sizeof (ct_hdr_t);
 	isp_put_ct_pt(isp, pt, (isp_ct_pt_t *) &scp[CTXOFF]);
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "IOCB CT Request", QENTRY_LEN, pt);
 	}
 
 	/*
 	 * Build the CT header and command in memory.
 	 *
 	 * Note that the CT header has to end up as Big Endian format in memory.
 	 */
 	ISP_MEMZERO(&un.clocal, sizeof (un.clocal));
 	ct = &un.clocal.rftid_hdr;
 	ct->ct_revision = CT_REVISION;
 	ct->ct_fcs_type = CT_FC_TYPE_FC;
 	ct->ct_fcs_subtype = CT_FC_SUBTYPE_NS;
 	ct->ct_cmd_resp = SNS_RFT_ID;
 	ct->ct_bcnt_resid = (sizeof (rft_id_t) - sizeof (ct_hdr_t)) >> 2;
 	rp = &un.clocal;
 	rp->rftid_portid[0] = fcp->isp_portid >> 16;
 	rp->rftid_portid[1] = fcp->isp_portid >> 8;
 	rp->rftid_portid[2] = fcp->isp_portid;
 	rp->rftid_fc4types[FC4_SCSI >> 5] = 1 << (FC4_SCSI & 0x1f);
 	isp_put_rft_id(isp, rp, (rft_id_t *) &scp[XTXOFF]);
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "CT Header", QENTRY_LEN, &scp[XTXOFF]);
 	}
 
 	ISP_MEMZERO(&scp[ZTXOFF], sizeof (ct_hdr_t));
 
 	MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 1000000);
 	mbs.param[1] = QENTRY_LEN;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma + CTXOFF);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma + CTXOFF);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma + CTXOFF);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma + CTXOFF);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, XTXOFF, 2 * QENTRY_LEN, chan);
 	isp_mboxcmd(isp, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		FC_SCRATCH_RELEASE(isp, chan);
 		return (-1);
 	}
 	MEMORYBARRIER(isp, SYNC_SFORCPU, ZTXOFF, QENTRY_LEN, chan);
 	pt = &un.plocal;
 	isp_get_ct_pt(isp, (isp_ct_pt_t *) &scp[ZTXOFF], pt);
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "IOCB response", QENTRY_LEN, pt);
 	}
 	if (pt->ctp_status) {
 		FC_SCRATCH_RELEASE(isp, chan);
 		isp_prt(isp, ISP_LOGWARN,
 		    "Chan %d Register FC4 Type CT Passthrough returned 0x%x",
 		    chan, pt->ctp_status);
 		return (1);
 	}
 
 	isp_get_ct_hdr(isp, (ct_hdr_t *) &scp[IGPOFF], ct);
 	FC_SCRATCH_RELEASE(isp, chan);
 
 	if (ct->ct_cmd_resp == LS_RJT) {
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "Chan %d Register FC4 Type rejected", chan);
 		return (-1);
 	} else if (ct->ct_cmd_resp == LS_ACC) {
 		isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Register FC4 Type accepted", chan);
 		return (0);
 	} else {
 		isp_prt(isp, ISP_LOGWARN, "Chan %d Register FC4 Type: 0x%x", chan, ct->ct_cmd_resp);
 		return (-1);
 	}
 }
 
 static uint16_t
 isp_next_handle(ispsoftc_t *isp, uint16_t *ohp)
 {
 	fcparam *fcp;
 	int i, chan, wrap;
 	uint16_t handle, minh, maxh;
 
 	handle = *ohp;
 	if (ISP_CAP_2KLOGIN(isp)) {
 		minh = 0;
-		maxh = NPH_RESERVED - isp->isp_nchan; /* Reserve for SNS */
+		maxh = NPH_RESERVED - 1;
 	} else {
 		minh = SNS_ID + 1;
 		maxh = NPH_MAX - 1;
 	}
 	wrap = 0;
 
 next:
 	if (handle == NIL_HANDLE) {
 		handle = minh;
 	} else {
 		handle++;
 		if (handle > maxh) {
 			if (++wrap >= 2) {
 				isp_prt(isp, ISP_LOGERR, "Out of port handles!");
 				return (NIL_HANDLE);
 			}
 			handle = minh;
 		}
 	}
 	for (chan = 0; chan < isp->isp_nchan; chan++) {
 		fcp = FCPARAM(isp, chan);
 		if (fcp->role == ISP_ROLE_NONE)
 			continue;
 		for (i = 0; i < MAX_FC_TARG; i++) {
 			if (fcp->portdb[i].state != FC_PORTDB_STATE_NIL &&
 			    fcp->portdb[i].handle == handle)
 				goto next;
 		}
 	}
 	*ohp = handle;
 	return (handle);
 }
 
 /*
  * Start a command. Locking is assumed done in the caller.
  */
 
 int
 isp_start(XS_T *xs)
 {
 	ispsoftc_t *isp;
 	uint32_t handle, cdblen;
 	uint8_t local[QENTRY_LEN];
 	ispreq_t *reqp;
 	void *cdbp, *qep;
 	uint16_t *tptr;
 	fcportdb_t *lp;
 	int target, dmaresult;
 
 	XS_INITERR(xs);
 	isp = XS_ISP(xs);
 
 	/*
 	 * Check command CDB length, etc.. We really are limited to 16 bytes
 	 * for Fibre Channel, but can do up to 44 bytes in parallel SCSI,
 	 * but probably only if we're running fairly new firmware (we'll
 	 * let the old f/w choke on an extended command queue entry).
 	 */
 
 	if (XS_CDBLEN(xs) > (IS_FC(isp)? 16 : 44) || XS_CDBLEN(xs) == 0) {
 		isp_prt(isp, ISP_LOGERR, "unsupported cdb length (%d, CDB[0]=0x%x)", XS_CDBLEN(xs), XS_CDBP(xs)[0] & 0xff);
 		XS_SETERR(xs, HBA_BOTCH);
 		return (CMD_COMPLETE);
 	}
 
 	/*
 	 * Translate the target to device handle as appropriate, checking
 	 * for correct device state as well.
 	 */
 	target = XS_TGT(xs);
 	if (IS_FC(isp)) {
 		fcparam *fcp = FCPARAM(isp, XS_CHANNEL(xs));
 
 		if ((fcp->role & ISP_ROLE_INITIATOR) == 0) {
 			isp_prt(isp, ISP_LOG_WARN1,
 			    "%d.%d.%jx I am not an initiator",
 			    XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs));
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 			return (CMD_COMPLETE);
 		}
 
 		if (isp->isp_state != ISP_RUNSTATE) {
 			isp_prt(isp, ISP_LOGERR, "Adapter not at RUNSTATE");
 			XS_SETERR(xs, HBA_BOTCH);
 			return (CMD_COMPLETE);
 		}
 
 		/*
 		 * Try again later.
 		 */
 		if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate != LOOP_READY) {
 			return (CMD_RQLATER);
 		}
 
 		isp_prt(isp, ISP_LOGDEBUG2, "XS_TGT(xs)=%d", target);
 		lp = &fcp->portdb[target];
 		if (target < 0 || target >= MAX_FC_TARG ||
 		    lp->is_target == 0) {
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 			return (CMD_COMPLETE);
 		}
 		if (lp->state == FC_PORTDB_STATE_ZOMBIE) {
 			isp_prt(isp, ISP_LOGDEBUG1,
 			    "%d.%d.%jx target zombie",
 			    XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs));
 			return (CMD_RQLATER);
 		}
 		if (lp->state != FC_PORTDB_STATE_VALID) {
 			isp_prt(isp, ISP_LOGDEBUG1,
 			    "%d.%d.%jx bad db port state 0x%x",
 			    XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs), lp->state);
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 			return (CMD_COMPLETE);
 		}
 	} else {
 		sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs));
 		if ((sdp->role & ISP_ROLE_INITIATOR) == 0) {
 			isp_prt(isp, ISP_LOGDEBUG1,
 			    "%d.%d.%jx I am not an initiator",
 			    XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs));
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 			return (CMD_COMPLETE);
 		}
 
 		if (isp->isp_state != ISP_RUNSTATE) {
 			isp_prt(isp, ISP_LOGERR, "Adapter not at RUNSTATE");
 			XS_SETERR(xs, HBA_BOTCH);
 			return (CMD_COMPLETE);
 		}
 
 		if (sdp->update) {
 			isp_spi_update(isp, XS_CHANNEL(xs));
 		}
 		lp = NULL;
 	}
 
  start_again:
 
 	qep = isp_getrqentry(isp);
 	if (qep == NULL) {
 		isp_prt(isp, ISP_LOG_WARN1, "Request Queue Overflow");
 		XS_SETERR(xs, HBA_BOTCH);
 		return (CMD_EAGAIN);
 	}
 	XS_SETERR(xs, HBA_NOERROR);
 
 	/*
 	 * Now see if we need to synchronize the ISP with respect to anything.
 	 * We do dual duty here (cough) for synchronizing for busses other
 	 * than which we got here to send a command to.
 	 */
 	reqp = (ispreq_t *) local;
 	ISP_MEMZERO(local, QENTRY_LEN);
 	if (ISP_TST_SENDMARKER(isp, XS_CHANNEL(xs))) {
 		if (IS_24XX(isp)) {
 			isp_marker_24xx_t *m = (isp_marker_24xx_t *) reqp;
 			m->mrk_header.rqs_entry_count = 1;
 			m->mrk_header.rqs_entry_type = RQSTYPE_MARKER;
 			m->mrk_modifier = SYNC_ALL;
 			m->mrk_vphdl = XS_CHANNEL(xs);
 			isp_put_marker_24xx(isp, m, qep);
 		} else {
 			isp_marker_t *m = (isp_marker_t *) reqp;
 			m->mrk_header.rqs_entry_count = 1;
 			m->mrk_header.rqs_entry_type = RQSTYPE_MARKER;
 			m->mrk_target = (XS_CHANNEL(xs) << 7);	/* bus # */
 			m->mrk_modifier = SYNC_ALL;
 			isp_put_marker(isp, m, qep);
 		}
 		ISP_SYNC_REQUEST(isp);
 		ISP_SET_SENDMARKER(isp, XS_CHANNEL(xs), 0);
 		goto start_again;
 	}
 
 	reqp->req_header.rqs_entry_count = 1;
 
 	/*
 	 * Select and install Header Code.
 	 * Note that it might be overridden before going out
 	 * if we're on a 64 bit platform. The lower level
 	 * code (isp_send_cmd) will select the appropriate
 	 * 64 bit variant if it needs to.
 	 */
 	if (IS_24XX(isp)) {
 		reqp->req_header.rqs_entry_type = RQSTYPE_T7RQS;
 	} else if (IS_FC(isp)) {
 		reqp->req_header.rqs_entry_type = RQSTYPE_T2RQS;
 	} else {
 		if (XS_CDBLEN(xs) > 12) {
 			reqp->req_header.rqs_entry_type = RQSTYPE_CMDONLY;
 		} else {
 			reqp->req_header.rqs_entry_type = RQSTYPE_REQUEST;
 		}
 	}
 
 	/*
 	 * Set task attributes
 	 */
 	if (IS_24XX(isp)) {
 		int ttype;
 		if (XS_TAG_P(xs)) {
 			ttype = XS_TAG_TYPE(xs);
 		} else {
 			if (XS_CDBP(xs)[0] == 0x3) {
 				ttype = REQFLAG_HTAG;
 			} else {
 				ttype = REQFLAG_STAG;
 			}
 		}
 		if (ttype == REQFLAG_OTAG) {
 			ttype = FCP_CMND_TASK_ATTR_ORDERED;
 		} else if (ttype == REQFLAG_HTAG) {
 			ttype = FCP_CMND_TASK_ATTR_HEAD;
 		} else {
 			ttype = FCP_CMND_TASK_ATTR_SIMPLE;
 		}
 		((ispreqt7_t *)reqp)->req_task_attribute = ttype;
 	} else if (IS_FC(isp)) {
 		/*
 		 * See comment in isp_intr
 		 */
 		/* XS_SET_RESID(xs, 0); */
 
 		/*
 		 * Fibre Channel always requires some kind of tag.
 		 * The Qlogic drivers seem be happy not to use a tag,
 		 * but this breaks for some devices (IBM drives).
 		 */
 		if (XS_TAG_P(xs)) {
 			((ispreqt2_t *)reqp)->req_flags = XS_TAG_TYPE(xs);
 		} else {
 			/*
 			 * If we don't know what tag to use, use HEAD OF QUEUE
 			 * for Request Sense or Simple.
 			 */
 			if (XS_CDBP(xs)[0] == 0x3)	/* REQUEST SENSE */
 				((ispreqt2_t *)reqp)->req_flags = REQFLAG_HTAG;
 			else
 				((ispreqt2_t *)reqp)->req_flags = REQFLAG_STAG;
 		}
 	} else {
 		sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs));
 		if ((sdp->isp_devparam[target].actv_flags & DPARM_TQING) && XS_TAG_P(xs)) {
 			reqp->req_flags = XS_TAG_TYPE(xs);
 		}
 	}
 
 	tptr = &reqp->req_time;
 
 	/*
 	 * NB: we do not support long CDBs (yet)
 	 */
 	cdblen = XS_CDBLEN(xs);
 
 	if (IS_SCSI(isp)) {
 		if (cdblen > sizeof (reqp->req_cdb)) {
 			isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen);
 			XS_SETERR(xs, HBA_BOTCH);
 			return (CMD_COMPLETE);
 		}
 		reqp->req_target = target | (XS_CHANNEL(xs) << 7);
 		reqp->req_lun_trn = XS_LUN(xs);
 		cdbp = reqp->req_cdb;
 		reqp->req_cdblen = cdblen;
 	} else if (IS_24XX(isp)) {
 		ispreqt7_t *t7 = (ispreqt7_t *)local;
 
 		if (cdblen > sizeof (t7->req_cdb)) {
 			isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen);
 			XS_SETERR(xs, HBA_BOTCH);
 			return (CMD_COMPLETE);
 		}
 
 		t7->req_nphdl = lp->handle;
 		t7->req_tidlo = lp->portid;
 		t7->req_tidhi = lp->portid >> 16;
 		t7->req_vpidx = ISP_GET_VPIDX(isp, XS_CHANNEL(xs));
 #if __FreeBSD_version >= 1000700
 		be64enc(t7->req_lun, CAM_EXTLUN_BYTE_SWIZZLE(XS_LUN(xs)));
 #else
 		if (XS_LUN(xs) >= 256) {
 			t7->req_lun[0] = XS_LUN(xs) >> 8;
 			t7->req_lun[0] |= 0x40;
 		}
 		t7->req_lun[1] = XS_LUN(xs);
 #endif
 		if (FCPARAM(isp, XS_CHANNEL(xs))->fctape_enabled && (lp->prli_word3 & PRLI_WD3_RETRY)) {
 			if (FCP_NEXT_CRN(isp, &t7->req_crn, xs)) {
 				isp_prt(isp, ISP_LOG_WARN1,
 				    "%d.%d.%jx cannot generate next CRN",
 				    XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs));
 				XS_SETERR(xs, HBA_BOTCH);
 				return (CMD_EAGAIN);
 			}
 		}
 		tptr = &t7->req_time;
 		cdbp = t7->req_cdb;
 	} else {
 		ispreqt2_t *t2 = (ispreqt2_t *)local;
 
 		if (cdblen > sizeof t2->req_cdb) {
 			isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen);
 			XS_SETERR(xs, HBA_BOTCH);
 			return (CMD_COMPLETE);
 		}
 		if (FCPARAM(isp, XS_CHANNEL(xs))->fctape_enabled && (lp->prli_word3 & PRLI_WD3_RETRY)) {
 			if (FCP_NEXT_CRN(isp, &t2->req_crn, xs)) {
 				isp_prt(isp, ISP_LOG_WARN1,
 				    "%d.%d.%jx cannot generate next CRN",
 				    XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs));
 				XS_SETERR(xs, HBA_BOTCH);
 				return (CMD_EAGAIN);
 			}
 		}
 		if (ISP_CAP_2KLOGIN(isp)) {
 			ispreqt2e_t *t2e = (ispreqt2e_t *)local;
 			t2e->req_target = lp->handle;
 			t2e->req_scclun = XS_LUN(xs);
 #if __FreeBSD_version < 1000700
 			if (XS_LUN(xs) >= 256)
 				t2e->req_scclun |= 0x4000;
 #endif
 			cdbp = t2e->req_cdb;
 		} else if (ISP_CAP_SCCFW(isp)) {
 			ispreqt2_t *t2 = (ispreqt2_t *)local;
 			t2->req_target = lp->handle;
 			t2->req_scclun = XS_LUN(xs);
 #if __FreeBSD_version < 1000700
 			if (XS_LUN(xs) >= 256)
 				t2->req_scclun |= 0x4000;
 #endif
 			cdbp = t2->req_cdb;
 		} else {
 			t2->req_target = lp->handle;
 			t2->req_lun_trn = XS_LUN(xs);
 			cdbp = t2->req_cdb;
 		}
 	}
 	ISP_MEMCPY(cdbp, XS_CDBP(xs), cdblen);
 
 	*tptr = XS_TIME(xs) / 1000;
 	if (*tptr == 0 && XS_TIME(xs)) {
 		*tptr = 1;
 	}
 	if (IS_24XX(isp) && *tptr > 0x1999) {
 		*tptr = 0x1999;
 	}
 
 	if (isp_allocate_xs(isp, xs, &handle)) {
 		isp_prt(isp, ISP_LOG_WARN1, "out of xflist pointers");
 		XS_SETERR(xs, HBA_BOTCH);
 		return (CMD_EAGAIN);
 	}
 	/* Whew. Thankfully the same for type 7 requests */
 	reqp->req_handle = handle;
 
 	/*
 	 * Set up DMA and/or do any platform dependent swizzling of the request entry
 	 * so that the Qlogic F/W understands what is being asked of it.
 	 *
 	 * The callee is responsible for adding all requests at this point.
 	 */
 	dmaresult = ISP_DMASETUP(isp, xs, reqp);
 	if (dmaresult != CMD_QUEUED) {
 		isp_destroy_handle(isp, handle);
 		/*
 		 * dmasetup sets actual error in packet, and
 		 * return what we were given to return.
 		 */
 		return (dmaresult);
 	}
 	isp_xs_prt(isp, xs, ISP_LOGDEBUG0, "START cmd cdb[0]=0x%x datalen %ld", XS_CDBP(xs)[0], (long) XS_XFRLEN(xs));
 	isp->isp_nactive++;
 	return (CMD_QUEUED);
 }
 
 /*
  * isp control
  * Locks (ints blocked) assumed held.
  */
 
 int
 isp_control(ispsoftc_t *isp, ispctl_t ctl, ...)
 {
 	XS_T *xs;
 	mbreg_t *mbr, mbs;
 	int chan, tgt;
 	uint32_t handle;
 	va_list ap;
 
 	switch (ctl) {
 	case ISPCTL_RESET_BUS:
 		/*
 		 * Issue a bus reset.
 		 */
 		if (IS_24XX(isp)) {
 			isp_prt(isp, ISP_LOGERR, "BUS RESET NOT IMPLEMENTED");
 			break;
 		} else if (IS_FC(isp)) {
 			mbs.param[1] = 10;
 			chan = 0;
 		} else {
 			va_start(ap, ctl);
 			chan = va_arg(ap, int);
 			va_end(ap);
 			mbs.param[1] = SDPARAM(isp, chan)->isp_bus_reset_delay;
 			if (mbs.param[1] < 2) {
 				mbs.param[1] = 2;
 			}
 			mbs.param[2] = chan;
 		}
 		MBSINIT(&mbs, MBOX_BUS_RESET, MBLOGALL, 0);
 		ISP_SET_SENDMARKER(isp, chan, 1);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			break;
 		}
 		isp_prt(isp, ISP_LOGINFO, "driver initiated bus reset of bus %d", chan);
 		return (0);
 
 	case ISPCTL_RESET_DEV:
 		va_start(ap, ctl);
 		chan = va_arg(ap, int);
 		tgt = va_arg(ap, int);
 		va_end(ap);
 		if (IS_24XX(isp)) {
 			uint8_t local[QENTRY_LEN];
 			isp24xx_tmf_t *tmf;
 			isp24xx_statusreq_t *sp;
 			fcparam *fcp = FCPARAM(isp, chan);
 			fcportdb_t *lp;
 
 			if (tgt < 0 || tgt >= MAX_FC_TARG) {
 				isp_prt(isp, ISP_LOGWARN, "Chan %d trying to reset bad target %d", chan, tgt);
 				break;
 			}
 			lp = &fcp->portdb[tgt];
 			if (lp->is_target == 0 ||
 			    lp->state != FC_PORTDB_STATE_VALID) {
 				isp_prt(isp, ISP_LOGWARN, "Chan %d abort of no longer valid target %d", chan, tgt);
 				break;
 			}
 
 			tmf = (isp24xx_tmf_t *) local;
 			ISP_MEMZERO(tmf, QENTRY_LEN);
 			tmf->tmf_header.rqs_entry_type = RQSTYPE_TSK_MGMT;
 			tmf->tmf_header.rqs_entry_count = 1;
 			tmf->tmf_nphdl = lp->handle;
 			tmf->tmf_delay = 2;
 			tmf->tmf_timeout = 2;
 			tmf->tmf_flags = ISP24XX_TMF_TARGET_RESET;
 			tmf->tmf_tidlo = lp->portid;
 			tmf->tmf_tidhi = lp->portid >> 16;
 			tmf->tmf_vpidx = ISP_GET_VPIDX(isp, chan);
 			isp_prt(isp, ISP_LOGALL, "Chan %d Reset N-Port Handle 0x%04x @ Port 0x%06x", chan, lp->handle, lp->portid);
 			MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 5000000);
 			mbs.param[1] = QENTRY_LEN;
 			mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 			mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 			mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 			mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 
 			if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 				isp_prt(isp, ISP_LOGERR, sacq);
 				break;
 			}
 			isp_put_24xx_tmf(isp, tmf, fcp->isp_scratch);
 			MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan);
 			fcp->sendmarker = 1;
 			isp_mboxcmd(isp, &mbs);
 			if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 				FC_SCRATCH_RELEASE(isp, chan);
 				break;
 			}
 			MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan);
 			sp = (isp24xx_statusreq_t *) local;
 			isp_get_24xx_response(isp, &((isp24xx_statusreq_t *)fcp->isp_scratch)[1], sp);
 			FC_SCRATCH_RELEASE(isp, chan);
 			if (sp->req_completion_status == 0) {
 				return (0);
 			}
 			isp_prt(isp, ISP_LOGWARN, "Chan %d reset of target %d returned 0x%x", chan, tgt, sp->req_completion_status);
 			break;
 		} else if (IS_FC(isp)) {
 			if (ISP_CAP_2KLOGIN(isp)) {
 				mbs.param[1] = tgt;
 				mbs.ibits = (1 << 10);
 			} else {
 				mbs.param[1] = (tgt << 8);
 			}
 		} else {
 			mbs.param[1] = (chan << 15) | (tgt << 8);
 		}
 		MBSINIT(&mbs, MBOX_ABORT_TARGET, MBLOGALL, 0);
 		mbs.param[2] = 3;	/* 'delay', in seconds */
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			break;
 		}
 		isp_prt(isp, ISP_LOGINFO, "Target %d on Bus %d Reset Succeeded", tgt, chan);
 		ISP_SET_SENDMARKER(isp, chan, 1);
 		return (0);
 
 	case ISPCTL_ABORT_CMD:
 		va_start(ap, ctl);
 		xs = va_arg(ap, XS_T *);
 		va_end(ap);
 
 		tgt = XS_TGT(xs);
 		chan = XS_CHANNEL(xs);
 
 		handle = isp_find_handle(isp, xs);
 		if (handle == 0) {
 			isp_prt(isp, ISP_LOGWARN, "cannot find handle for command to abort");
 			break;
 		}
 		if (IS_24XX(isp)) {
 			isp24xx_abrt_t local, *ab = &local, *ab2;
 			fcparam *fcp;
 			fcportdb_t *lp;
 
 			fcp = FCPARAM(isp, chan);
 			if (tgt < 0 || tgt >= MAX_FC_TARG) {
 				isp_prt(isp, ISP_LOGWARN, "Chan %d trying to abort bad target %d", chan, tgt);
 				break;
 			}
 			lp = &fcp->portdb[tgt];
 			if (lp->is_target == 0 ||
 			    lp->state != FC_PORTDB_STATE_VALID) {
 				isp_prt(isp, ISP_LOGWARN, "Chan %d abort of no longer valid target %d", chan, tgt);
 				break;
 			}
 			isp_prt(isp, ISP_LOGALL, "Chan %d Abort Cmd for N-Port 0x%04x @ Port 0x%06x", chan, lp->handle, lp->portid);
 			ISP_MEMZERO(ab, QENTRY_LEN);
 			ab->abrt_header.rqs_entry_type = RQSTYPE_ABORT_IO;
 			ab->abrt_header.rqs_entry_count = 1;
 			ab->abrt_handle = lp->handle;
 			ab->abrt_cmd_handle = handle;
 			ab->abrt_tidlo = lp->portid;
 			ab->abrt_tidhi = lp->portid >> 16;
 			ab->abrt_vpidx = ISP_GET_VPIDX(isp, chan);
 
 			ISP_MEMZERO(&mbs, sizeof (mbs));
 			MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 5000000);
 			mbs.param[1] = QENTRY_LEN;
 			mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 			mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 			mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 			mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 
 			if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 				isp_prt(isp, ISP_LOGERR, sacq);
 				break;
 			}
 			isp_put_24xx_abrt(isp, ab, fcp->isp_scratch);
 			ab2 = (isp24xx_abrt_t *) &((uint8_t *)fcp->isp_scratch)[QENTRY_LEN];
 			ab2->abrt_nphdl = 0xdeaf;
 			MEMORYBARRIER(isp, SYNC_SFORDEV, 0, 2 * QENTRY_LEN, chan);
 			isp_mboxcmd(isp, &mbs);
 			if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 				FC_SCRATCH_RELEASE(isp, chan);
 				break;
 			}
 			MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan);
 			isp_get_24xx_abrt(isp, ab2, ab);
 			FC_SCRATCH_RELEASE(isp, chan);
 			if (ab->abrt_nphdl == ISP24XX_ABRT_OKAY) {
 				return (0);
 			}
 			isp_prt(isp, ISP_LOGWARN, "Chan %d handle %d abort returned 0x%x", chan, tgt, ab->abrt_nphdl);
 			break;
 		} else if (IS_FC(isp)) {
 			if (ISP_CAP_SCCFW(isp)) {
 				if (ISP_CAP_2KLOGIN(isp)) {
 					mbs.param[1] = tgt;
 				} else {
 					mbs.param[1] = tgt << 8;
 				}
 				mbs.param[6] = XS_LUN(xs);
 			} else {
 				mbs.param[1] = tgt << 8 | XS_LUN(xs);
 			}
 		} else {
 			mbs.param[1] = (chan << 15) | (tgt << 8) | XS_LUN(xs);
 		}
 		MBSINIT(&mbs, MBOX_ABORT,
 		    MBLOGALL & ~MBLOGMASK(MBOX_COMMAND_ERROR), 0);
 		mbs.param[2] = handle;
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			break;
 		}
 		return (0);
 
 	case ISPCTL_UPDATE_PARAMS:
 
 		va_start(ap, ctl);
 		chan = va_arg(ap, int);
 		va_end(ap);
 		isp_spi_update(isp, chan);
 		return (0);
 
 	case ISPCTL_FCLINK_TEST:
 
 		if (IS_FC(isp)) {
 			int usdelay;
 			va_start(ap, ctl);
 			chan = va_arg(ap, int);
 			usdelay = va_arg(ap, int);
 			va_end(ap);
 			if (usdelay == 0) {
 				usdelay =  250000;
 			}
 			return (isp_fclink_test(isp, chan, usdelay));
 		}
 		break;
 
 	case ISPCTL_SCAN_FABRIC:
 
 		if (IS_FC(isp)) {
 			va_start(ap, ctl);
 			chan = va_arg(ap, int);
 			va_end(ap);
 			return (isp_scan_fabric(isp, chan));
 		}
 		break;
 
 	case ISPCTL_SCAN_LOOP:
 
 		if (IS_FC(isp)) {
 			va_start(ap, ctl);
 			chan = va_arg(ap, int);
 			va_end(ap);
 			return (isp_scan_loop(isp, chan));
 		}
 		break;
 
 	case ISPCTL_PDB_SYNC:
 
 		if (IS_FC(isp)) {
 			va_start(ap, ctl);
 			chan = va_arg(ap, int);
 			va_end(ap);
 			return (isp_pdb_sync(isp, chan));
 		}
 		break;
 
 	case ISPCTL_SEND_LIP:
 
 		if (IS_FC(isp) && !IS_24XX(isp)) {
 			MBSINIT(&mbs, MBOX_INIT_LIP, MBLOGALL, 0);
 			if (ISP_CAP_2KLOGIN(isp)) {
 				mbs.ibits = (1 << 10);
 			}
 			isp_mboxcmd(isp, &mbs);
 			if (mbs.param[0] == MBOX_COMMAND_COMPLETE) {
 				return (0);
 			}
 		}
 		break;
 
 	case ISPCTL_GET_PDB:
 		if (IS_FC(isp)) {
 			isp_pdb_t *pdb;
 			va_start(ap, ctl);
 			chan = va_arg(ap, int);
 			tgt = va_arg(ap, int);
 			pdb = va_arg(ap, isp_pdb_t *);
 			va_end(ap);
 			return (isp_getpdb(isp, chan, tgt, pdb, 1));
 		}
 		break;
 
 	case ISPCTL_GET_NAMES:
 	{
 		uint64_t *wwnn, *wwnp;
 		va_start(ap, ctl);
 		chan = va_arg(ap, int);
 		tgt = va_arg(ap, int);
 		wwnn = va_arg(ap, uint64_t *);
 		wwnp = va_arg(ap, uint64_t *);
 		va_end(ap);
 		if (wwnn == NULL && wwnp == NULL) {
 			break;
 		}
 		if (wwnn) {
 			*wwnn = isp_get_wwn(isp, chan, tgt, 1);
 			if (*wwnn == INI_NONE) {
 				break;
 			}
 		}
 		if (wwnp) {
 			*wwnp = isp_get_wwn(isp, chan, tgt, 0);
 			if (*wwnp == INI_NONE) {
 				break;
 			}
 		}
 		return (0);
 	}
 	case ISPCTL_RUN_MBOXCMD:
 	{
 		va_start(ap, ctl);
 		mbr = va_arg(ap, mbreg_t *);
 		va_end(ap);
 		isp_mboxcmd(isp, mbr);
 		return (0);
 	}
 	case ISPCTL_PLOGX:
 	{
 		isp_plcmd_t *p;
 		int r;
 
 		va_start(ap, ctl);
 		p = va_arg(ap, isp_plcmd_t *);
 		va_end(ap);
 
 		if ((p->flags & PLOGX_FLG_CMD_MASK) != PLOGX_FLG_CMD_PLOGI || (p->handle != NIL_HANDLE)) {
 			return (isp_plogx(isp, p->channel, p->handle, p->portid, p->flags, 0));
 		}
 		do {
 			isp_next_handle(isp, &p->handle);
 			r = isp_plogx(isp, p->channel, p->handle, p->portid, p->flags, 0);
 			if ((r & 0xffff) == MBOX_PORT_ID_USED) {
 				p->handle = r >> 16;
 				r = 0;
 				break;
 			}
 		} while ((r & 0xffff) == MBOX_LOOP_ID_USED);
 		return (r);
 	}
 	case ISPCTL_CHANGE_ROLE:
 	{
 		int role, r;
 
 		va_start(ap, ctl);
 		chan = va_arg(ap, int);
 		role = va_arg(ap, int);
 		va_end(ap);
 		if (IS_FC(isp)) {
 			r = isp_fc_change_role(isp, chan, role);
 		} else {
 			SDPARAM(isp, chan)->role = role;
 			r = 0;
 		}
 		return (r);
 	}
 	default:
 		isp_prt(isp, ISP_LOGERR, "Unknown Control Opcode 0x%x", ctl);
 		break;
 
 	}
 	return (-1);
 }
 
 /*
  * Interrupt Service Routine(s).
  *
  * External (OS) framework has done the appropriate locking,
  * and the locking will be held throughout this function.
  */
 
 /*
  * Limit our stack depth by sticking with the max likely number
  * of completions on a request queue at any one time.
  */
 #ifndef	MAX_REQUESTQ_COMPLETIONS
 #define	MAX_REQUESTQ_COMPLETIONS	32
 #endif
 
 void
 isp_intr(ispsoftc_t *isp, uint16_t isr, uint16_t sema, uint16_t info)
 {
 	XS_T *complist[MAX_REQUESTQ_COMPLETIONS], *xs;
 	uint32_t iptr, optr, junk;
 	int i, nlooked = 0, ndone = 0, continuations_expected = 0;
 	int etype, last_etype = 0;
 
 again:
 	/*
 	 * Is this a mailbox related interrupt?
 	 * The mailbox semaphore will be nonzero if so.
 	 */
 	if (sema) {
  fmbox:
 		if (info & MBOX_COMMAND_COMPLETE) {
 			isp->isp_intmboxc++;
 			if (isp->isp_mboxbsy) {
 				int obits = isp->isp_obits;
 				isp->isp_mboxtmp[0] = info;
 				for (i = 1; i < ISP_NMBOX(isp); i++) {
 					if ((obits & (1 << i)) == 0) {
 						continue;
 					}
 					isp->isp_mboxtmp[i] = ISP_READ(isp, MBOX_OFF(i));
 				}
 				if (isp->isp_mbxwrk0) {
 					if (isp_mbox_continue(isp) == 0) {
 						return;
 					}
 				}
 				MBOX_NOTIFY_COMPLETE(isp);
 			} else {
 				isp_prt(isp, ISP_LOGWARN, "mailbox cmd (0x%x) with no waiters", info);
 			}
 		} else {
 			i = IS_FC(isp)? isp_parse_async_fc(isp, info) : isp_parse_async(isp, info);
 			if (i < 0) {
 				return;
 			}
 		}
 		if ((IS_FC(isp) && info != ASYNC_RIOZIO_STALL) || isp->isp_state != ISP_RUNSTATE) {
 			goto out;
 		}
 	}
 
 	/*
 	 * We can't be getting this now.
 	 */
 	if (isp->isp_state != ISP_RUNSTATE) {
 		/*
 		 * This seems to happen to 23XX and 24XX cards- don't know why.
 		 */
 		 if (isp->isp_mboxbsy && isp->isp_lastmbxcmd == MBOX_ABOUT_FIRMWARE) {
 			goto fmbox;
 		}
 		isp_prt(isp, ISP_LOGINFO, "interrupt (ISR=%x SEMA=%x INFO=%x) "
 		    "when not ready", isr, sema, info);
 		/*
 		 * Thank you very much!  *Burrrp*!
 		 */
 		isp->isp_residx = ISP_READ(isp, isp->isp_respinrp);
 		isp->isp_resodx = isp->isp_residx;
 		ISP_WRITE(isp, isp->isp_respoutrp, isp->isp_resodx);
 		if (IS_24XX(isp)) {
 			ISP_DISABLE_INTS(isp);
 		}
 		goto out;
 	}
 
 #ifdef	ISP_TARGET_MODE
 	/*
 	 * Check for ATIO Queue entries.
 	 */
 	if (IS_24XX(isp) &&
 	    (isr == ISPR2HST_ATIO_UPDATE || isr == ISPR2HST_ATIO_RSPQ_UPDATE ||
 	     isr == ISPR2HST_ATIO_UPDATE2)) {
 		iptr = ISP_READ(isp, BIU2400_ATIO_RSPINP);
 		optr = isp->isp_atioodx;
 
 		while (optr != iptr) {
 			uint8_t qe[QENTRY_LEN];
 			isphdr_t *hp;
 			uint32_t oop;
 			void *addr;
 
 			oop = optr;
 			MEMORYBARRIER(isp, SYNC_ATIOQ, oop, QENTRY_LEN, -1);
 			addr = ISP_QUEUE_ENTRY(isp->isp_atioq, oop);
 			isp_get_hdr(isp, addr, (isphdr_t *)qe);
 			hp = (isphdr_t *)qe;
 			switch (hp->rqs_entry_type) {
 			case RQSTYPE_NOTIFY:
 			case RQSTYPE_ATIO:
 				(void) isp_target_notify(isp, addr, &oop);
 				break;
 			default:
 				isp_print_qentry(isp, "?ATIOQ entry?", oop, addr);
 				break;
 			}
 			optr = ISP_NXT_QENTRY(oop, RESULT_QUEUE_LEN(isp));
 		}
 		if (isp->isp_atioodx != optr) {
 			ISP_WRITE(isp, BIU2400_ATIO_RSPOUTP, optr);
 			isp->isp_atioodx = optr;
 		}
 	}
 #endif
 
 	/*
 	 * You *must* read the Response Queue In Pointer
 	 * prior to clearing the RISC interrupt.
 	 *
 	 * Debounce the 2300 if revision less than 2.
 	 */
 	if (IS_2100(isp) || (IS_2300(isp) && isp->isp_revision < 2)) {
 		i = 0;
 		do {
 			iptr = ISP_READ(isp, isp->isp_respinrp);
 			junk = ISP_READ(isp, isp->isp_respinrp);
 		} while (junk != iptr && ++i < 1000);
 
 		if (iptr != junk) {
 			isp_prt(isp, ISP_LOGWARN, "Response Queue Out Pointer Unstable (%x, %x)", iptr, junk);
 			goto out;
 		}
 	} else {
 		iptr = ISP_READ(isp, isp->isp_respinrp);
 	}
 
 	optr = isp->isp_resodx;
 	if (optr == iptr && sema == 0) {
 		/*
 		 * There are a lot of these- reasons unknown- mostly on
 		 * faster Alpha machines.
 		 *
 		 * I tried delaying after writing HCCR_CMD_CLEAR_RISC_INT to
 		 * make sure the old interrupt went away (to avoid 'ringing'
 		 * effects), but that didn't stop this from occurring.
 		 */
 		if (IS_24XX(isp)) {
 			junk = 0;
 		} else if (IS_23XX(isp)) {
 			ISP_DELAY(100);
 			iptr = ISP_READ(isp, isp->isp_respinrp);
 			junk = ISP_READ(isp, BIU_R2HSTSLO);
 		} else {
 			junk = ISP_READ(isp, BIU_ISR);
 		}
 		if (optr == iptr) {
 			if (IS_23XX(isp) || IS_24XX(isp)) {
 				;
 			} else {
 				sema = ISP_READ(isp, BIU_SEMA);
 				info = ISP_READ(isp, OUTMAILBOX0);
 				if ((sema & 0x3) && (info & 0x8000)) {
 					goto again;
 				}
 			}
 			isp->isp_intbogus++;
 			isp_prt(isp, ISP_LOGDEBUG1, "bogus intr- isr %x (%x) iptr %x optr %x", isr, junk, iptr, optr);
 		}
 	}
 	isp->isp_residx = iptr;
 
 	while (optr != iptr) {
 		uint8_t qe[QENTRY_LEN];
 		ispstatusreq_t *sp = (ispstatusreq_t *) qe;
 		isphdr_t *hp;
 		int buddaboom, scsi_status, completion_status;
 		int req_status_flags, req_state_flags;
 		uint8_t *snsp, *resp;
 		uint32_t rlen, slen, totslen;
 		long resid;
 		uint16_t oop;
 
 		hp = (isphdr_t *) ISP_QUEUE_ENTRY(isp->isp_result, optr);
 		oop = optr;
 		optr = ISP_NXT_QENTRY(optr, RESULT_QUEUE_LEN(isp));
 		nlooked++;
  read_again:
 		buddaboom = req_status_flags = req_state_flags = 0;
 		resid = 0L;
 
 		/*
 		 * Synchronize our view of this response queue entry.
 		 */
 		MEMORYBARRIER(isp, SYNC_RESULT, oop, QENTRY_LEN, -1);
 		isp_get_hdr(isp, hp, &sp->req_header);
 		etype = sp->req_header.rqs_entry_type;
 
 		if (IS_24XX(isp) && etype == RQSTYPE_RESPONSE) {
 			isp24xx_statusreq_t *sp2 = (isp24xx_statusreq_t *)qe;
 			isp_get_24xx_response(isp, (isp24xx_statusreq_t *)hp, sp2);
 			if (isp->isp_dblev & ISP_LOGDEBUG1) {
 				isp_print_bytes(isp, "Response Queue Entry", QENTRY_LEN, sp2);
 			}
 			scsi_status = sp2->req_scsi_status;
 			completion_status = sp2->req_completion_status;
 			if ((scsi_status & 0xff) != 0)
 				req_state_flags = RQSF_GOT_STATUS;
 			else
 				req_state_flags = 0;
 			resid = sp2->req_resid;
 		} else if (etype == RQSTYPE_RESPONSE) {
 			isp_get_response(isp, (ispstatusreq_t *) hp, sp);
 			if (isp->isp_dblev & ISP_LOGDEBUG1) {
 				isp_print_bytes(isp, "Response Queue Entry", QENTRY_LEN, sp);
 			}
 			scsi_status = sp->req_scsi_status;
 			completion_status = sp->req_completion_status;
 			req_status_flags = sp->req_status_flags;
 			req_state_flags = sp->req_state_flags;
 			resid = sp->req_resid;
 		} else if (etype == RQSTYPE_RIO1) {
 			isp_rio1_t *rio = (isp_rio1_t *) qe;
 			isp_get_rio1(isp, (isp_rio1_t *) hp, rio);
 			if (isp->isp_dblev & ISP_LOGDEBUG1) {
 				isp_print_bytes(isp, "Response Queue Entry", QENTRY_LEN, rio);
 			}
 			for (i = 0; i < rio->req_header.rqs_seqno; i++) {
 				isp_fastpost_complete(isp, rio->req_handles[i]);
 			}
 			if (isp->isp_fpcchiwater < rio->req_header.rqs_seqno) {
 				isp->isp_fpcchiwater = rio->req_header.rqs_seqno;
 			}
 			ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 			last_etype = etype;
 			continue;
 		} else if (etype == RQSTYPE_RIO2) {
 			isp_prt(isp, ISP_LOGERR, "dropping RIO2 response");
 			ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 			last_etype = etype;
 			continue;
 		} else if (etype == RQSTYPE_STATUS_CONT) {
 			isp_get_cont_response(isp, (ispstatus_cont_t *) hp, (ispstatus_cont_t *) sp);
 			if (last_etype == RQSTYPE_RESPONSE && continuations_expected && ndone > 0 && (xs = complist[ndone-1]) != NULL) {
 				ispstatus_cont_t *scp = (ispstatus_cont_t *) sp;
 				XS_SENSE_APPEND(xs, scp->req_sense_data, sizeof (scp->req_sense_data));
 				isp_prt(isp, ISP_LOGDEBUG0|ISP_LOG_CWARN, "%d more Status Continuations expected", --continuations_expected);
 			} else {
 				isp_prt(isp, ISP_LOG_WARN1, "Ignored Continuation Response");
 			}
 			ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 			continue;
 		} else {
 			/*
 			 * Somebody reachable via isp_handle_other_response
 			 * may have updated the response queue pointers for
 			 * us, so we reload our goal index.
 			 */
 			int r;
 			uint32_t tsto = oop;
 			r = isp_handle_other_response(isp, etype, hp, &tsto);
 			if (r < 0) {
 				goto read_again;
 			}
 			/*
 			 * If somebody updated the output pointer, then reset
 			 * optr to be one more than the updated amount.
 			 */
 			while (tsto != oop) {
 				optr = ISP_NXT_QENTRY(tsto, RESULT_QUEUE_LEN(isp));
 			}
 			if (r > 0) {
 				ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 				last_etype = etype;
 				continue;
 			}
 
 			/*
 			 * After this point, we'll just look at the header as
 			 * we don't know how to deal with the rest of the
 			 * response.
 			 */
 
 			/*
 			 * It really has to be a bounced request just copied
 			 * from the request queue to the response queue. If
 			 * not, something bad has happened.
 			 */
 			if (etype != RQSTYPE_REQUEST) {
 				isp_prt(isp, ISP_LOGERR, notresp, etype, oop, optr, nlooked);
 				isp_print_bytes(isp, "Request Queue Entry", QENTRY_LEN, sp);
 				ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 				last_etype = etype;
 				continue;
 			}
 			buddaboom = 1;
 			scsi_status = sp->req_scsi_status;
 			completion_status = sp->req_completion_status;
 			req_status_flags = sp->req_status_flags;
 			req_state_flags = sp->req_state_flags;
 			resid = sp->req_resid;
 		}
 
 		if (sp->req_header.rqs_flags & RQSFLAG_MASK) {
 			if (sp->req_header.rqs_flags & RQSFLAG_CONTINUATION) {
 				isp_print_bytes(isp, "unexpected continuation segment", QENTRY_LEN, sp);
 				last_etype = etype;
 				continue;
 			}
 			if (sp->req_header.rqs_flags & RQSFLAG_FULL) {
 				isp_prt(isp, ISP_LOG_WARN1, "internal queues full");
 				/*
 				 * We'll synthesize a QUEUE FULL message below.
 				 */
 			}
 			if (sp->req_header.rqs_flags & RQSFLAG_BADHEADER) {
 				isp_print_bytes(isp, "bad header flag", QENTRY_LEN, sp);
 				buddaboom++;
 			}
 			if (sp->req_header.rqs_flags & RQSFLAG_BADPACKET) {
 				isp_print_bytes(isp, "bad request packet", QENTRY_LEN, sp);
 				buddaboom++;
 			}
 			if (sp->req_header.rqs_flags & RQSFLAG_BADCOUNT) {
 				isp_print_bytes(isp, "invalid entry count", QENTRY_LEN, sp);
 				buddaboom++;
 			}
 			if (sp->req_header.rqs_flags & RQSFLAG_BADORDER) {
 				isp_print_bytes(isp, "invalid IOCB ordering", QENTRY_LEN, sp);
 				last_etype = etype;
 				continue;
 			}
 		}
 
 		if (!ISP_VALID_HANDLE(isp, sp->req_handle)) {
 			isp_prt(isp, ISP_LOGERR, "bad request handle 0x%x (iocb type 0x%x)", sp->req_handle, etype);
 			ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 			last_etype = etype;
 			continue;
 		}
 		xs = isp_find_xs(isp, sp->req_handle);
 		if (xs == NULL) {
 			uint8_t ts = completion_status & 0xff;
 			/*
 			 * Only whine if this isn't the expected fallout of
 			 * aborting the command or resetting the target.
 			 */
 			if (etype != RQSTYPE_RESPONSE) {
 				isp_prt(isp, ISP_LOGERR, "cannot find handle 0x%x (type 0x%x)", sp->req_handle, etype);
 			} else if (ts != RQCS_ABORTED && ts != RQCS_RESET_OCCURRED) {
 				isp_prt(isp, ISP_LOGERR, "cannot find handle 0x%x (status 0x%x)", sp->req_handle, ts);
 			}
 			ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 			last_etype = etype;
 			continue;
 		}
 		if (req_status_flags & RQSTF_BUS_RESET) {
 			isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx bus was reset",
 			    XS_CHANNEL(xs), XS_TGT(xs), (uintmax_t)XS_LUN(xs));
 			XS_SETERR(xs, HBA_BUSRESET);
 			ISP_SET_SENDMARKER(isp, XS_CHANNEL(xs), 1);
 		}
 		if (buddaboom) {
 			isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx buddaboom",
 			    XS_CHANNEL(xs), XS_TGT(xs), (uintmax_t)XS_LUN(xs));
 			XS_SETERR(xs, HBA_BOTCH);
 		}
 
 		resp = NULL;
 		rlen = 0;
 		snsp = NULL;
 		totslen = slen = 0;
 		if (IS_24XX(isp) && (scsi_status & (RQCS_RV|RQCS_SV)) != 0) {
 			resp = ((isp24xx_statusreq_t *)sp)->req_rsp_sense;
 			rlen = ((isp24xx_statusreq_t *)sp)->req_response_len;
 		} else if (IS_FC(isp) && (scsi_status & RQCS_RV) != 0) {
 			resp = sp->req_response;
 			rlen = sp->req_response_len;
 		}
 		if (IS_FC(isp) && (scsi_status & RQCS_SV) != 0) {
 			/*
 			 * Fibre Channel F/W doesn't say we got status
 			 * if there's Sense Data instead. I guess they
 			 * think it goes w/o saying.
 			 */
 			req_state_flags |= RQSF_GOT_STATUS|RQSF_GOT_SENSE;
 			if (IS_24XX(isp)) {
 				snsp = ((isp24xx_statusreq_t *)sp)->req_rsp_sense;
 				snsp += rlen;
 				totslen = ((isp24xx_statusreq_t *)sp)->req_sense_len;
 				slen = (sizeof (((isp24xx_statusreq_t *)sp)->req_rsp_sense)) - rlen;
 				if (totslen < slen)
 					slen = totslen; 
 			} else {
 				snsp = sp->req_sense_data;
 				totslen = sp->req_sense_len;
 				slen = sizeof (sp->req_sense_data);
 				if (totslen < slen)
 					slen = totslen;
 			}
 		} else if (IS_SCSI(isp) && (req_state_flags & RQSF_GOT_SENSE)) {
 			snsp = sp->req_sense_data;
 			totslen = sp->req_sense_len;
 			slen = sizeof (sp->req_sense_data);
 			if (totslen < slen)
 				slen = totslen;
 		}
 		if (req_state_flags & RQSF_GOT_STATUS) {
 			*XS_STSP(xs) = scsi_status & 0xff;
 		}
 
 		switch (etype) {
 		case RQSTYPE_RESPONSE:
 			if (resp && rlen >= 4 && resp[FCP_RSPNS_CODE_OFFSET] != 0) {
 				const char *ptr;
 				char lb[64];
 				const char *rnames[10] = {
 				    "Task Management function complete",
 				    "FCP_DATA length different than FCP_BURST_LEN",
 				    "FCP_CMND fields invalid",
 				    "FCP_DATA parameter mismatch with FCP_DATA_RO",
 				    "Task Management function rejected",
 				    "Task Management function failed",
 				    NULL,
 				    NULL,
 				    "Task Management function succeeded",
 				    "Task Management function incorrect logical unit number",
 				};
 				uint8_t code = resp[FCP_RSPNS_CODE_OFFSET];
 				if (code >= 10 || rnames[code] == NULL) {
 					ISP_SNPRINTF(lb, sizeof(lb),
 					    "Unknown FCP Response Code 0x%x",
 					    code);
 					ptr = lb;
 				} else {
 					ptr = rnames[code];
 				}
 				isp_xs_prt(isp, xs, ISP_LOGWARN,
 				    "FCP RESPONSE, LENGTH %u: %s CDB0=0x%02x",
 				    rlen, ptr, XS_CDBP(xs)[0] & 0xff);
 				if (code != 0 && code != 8)
 					XS_SETERR(xs, HBA_BOTCH);
 			}
 			if (IS_24XX(isp)) {
 				isp_parse_status_24xx(isp, (isp24xx_statusreq_t *)sp, xs, &resid);
 			} else {
 				isp_parse_status(isp, (void *)sp, xs, &resid);
 			}
 			if ((XS_NOERR(xs) || XS_ERR(xs) == HBA_NOERROR) && (*XS_STSP(xs) == SCSI_BUSY)) {
 				XS_SETERR(xs, HBA_TGTBSY);
 			}
 			if (IS_SCSI(isp)) {
 				XS_SET_RESID(xs, resid);
 				/*
 				 * A new synchronous rate was negotiated for
 				 * this target. Mark state such that we'll go
 				 * look up that which has changed later.
 				 */
 				if (req_status_flags & RQSTF_NEGOTIATION) {
 					int t = XS_TGT(xs);
 					sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs));
 					sdp->isp_devparam[t].dev_refresh = 1;
 					sdp->update = 1;
 				}
 			} else {
 				if (req_status_flags & RQSF_XFER_COMPLETE) {
 					XS_SET_RESID(xs, 0);
 				} else if (scsi_status & RQCS_RESID) {
 					XS_SET_RESID(xs, resid);
 				} else {
 					XS_SET_RESID(xs, 0);
 				}
 			}
 			if (snsp && slen) {
 				if (totslen > slen) {
 					continuations_expected += ((totslen - slen + QENTRY_LEN - 5) / (QENTRY_LEN - 4));
 					if (ndone > (MAX_REQUESTQ_COMPLETIONS - continuations_expected - 1)) {
 						/* we'll lose some stats, but that's a small price to pay */
 						for (i = 0; i < ndone; i++) {
 							if (complist[i]) {
 								isp->isp_rsltccmplt++;
 								isp_done(complist[i]);
 							}
 						}
 						ndone = 0;
 					}
 					isp_prt(isp, ISP_LOGDEBUG0|ISP_LOG_CWARN, "Expecting %d more Status Continuations for total sense length of %u",
 					    continuations_expected, totslen);
 				}
 				XS_SAVE_SENSE(xs, snsp, totslen, slen);
 			} else if ((req_status_flags & RQSF_GOT_STATUS) && (scsi_status & 0xff) == SCSI_CHECK && IS_FC(isp)) {
 				isp_prt(isp, ISP_LOGWARN, "CHECK CONDITION w/o sense data for CDB=0x%x", XS_CDBP(xs)[0] & 0xff);
 				isp_print_bytes(isp, "CC with no Sense", QENTRY_LEN, qe);
 			}
 			isp_prt(isp, ISP_LOGDEBUG2, "asked for %ld got raw resid %ld settled for %ld", (long) XS_XFRLEN(xs), resid, (long) XS_GET_RESID(xs));
 			break;
 		case RQSTYPE_REQUEST:
 		case RQSTYPE_A64:
 		case RQSTYPE_T2RQS:
 		case RQSTYPE_T3RQS:
 		case RQSTYPE_T7RQS:
 			if (!IS_24XX(isp) && (sp->req_header.rqs_flags & RQSFLAG_FULL)) {
 				/*
 				 * Force Queue Full status.
 				 */
 				*XS_STSP(xs) = SCSI_QFULL;
 				XS_SETERR(xs, HBA_NOERROR);
 			} else if (XS_NOERR(xs)) {
 				isp_prt(isp, ISP_LOG_WARN1,
 				    "%d.%d.%jx badness at %s:%u",
 				    XS_CHANNEL(xs), XS_TGT(xs),
 				    (uintmax_t)XS_LUN(xs),
 				    __func__, __LINE__);
 				XS_SETERR(xs, HBA_BOTCH);
 			}
 			XS_SET_RESID(xs, XS_XFRLEN(xs));
 			break;
 		default:
 			isp_print_bytes(isp, "Unhandled Response Type", QENTRY_LEN, qe);
 			if (XS_NOERR(xs)) {
 				XS_SETERR(xs, HBA_BOTCH);
 			}
 			break;
 		}
 
 		/*
 		 * Free any DMA resources. As a side effect, this may
 		 * also do any cache flushing necessary for data coherence.
 		 */
 		if (XS_XFRLEN(xs)) {
 			ISP_DMAFREE(isp, xs, sp->req_handle);
 		}
 		isp_destroy_handle(isp, sp->req_handle);
 
 		if (isp->isp_nactive > 0) {
 		    isp->isp_nactive--;
 		}
 		complist[ndone++] = xs;	/* defer completion call until later */
 		ISP_MEMZERO(hp, QENTRY_LEN);	/* PERF */
 		last_etype = etype;
 		if (ndone == MAX_REQUESTQ_COMPLETIONS) {
 			break;
 		}
 	}
 
 	/*
 	 * If we looked at any commands, then it's valid to find out
 	 * what the outpointer is. It also is a trigger to update the
 	 * ISP's notion of what we've seen so far.
 	 */
 	if (nlooked) {
 		ISP_WRITE(isp, isp->isp_respoutrp, optr);
 		isp->isp_resodx = optr;
 		if (isp->isp_rscchiwater < ndone)
 			isp->isp_rscchiwater = ndone;
 	}
 
 out:
 
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT);
 	} else {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_CLEAR_RISC_INT);
 		ISP_WRITE(isp, BIU_SEMA, 0);
 	}
 
 	for (i = 0; i < ndone; i++) {
 		xs = complist[i];
 		if (xs) {
 			if (((isp->isp_dblev & (ISP_LOGDEBUG1|ISP_LOGDEBUG2|ISP_LOGDEBUG3))) ||
 			    ((isp->isp_dblev & (ISP_LOGDEBUG0|ISP_LOG_CWARN) && ((!XS_NOERR(xs)) || (*XS_STSP(xs) != SCSI_GOOD))))) {
 				isp_prt_endcmd(isp, xs);
 			}
 			isp->isp_rsltccmplt++;
 			isp_done(xs);
 		}
 	}
 }
 
 /*
  * Support routines.
  */
 
 void
 isp_prt_endcmd(ispsoftc_t *isp, XS_T *xs)
 {
 	char cdbstr[16 * 5 + 1];
 	int i, lim;
 
 	lim = XS_CDBLEN(xs) > 16? 16 : XS_CDBLEN(xs);
 	ISP_SNPRINTF(cdbstr, sizeof (cdbstr), "0x%02x ", XS_CDBP(xs)[0]);
 	for (i = 1; i < lim; i++) {
 		ISP_SNPRINTF(cdbstr, sizeof (cdbstr), "%s0x%02x ", cdbstr, XS_CDBP(xs)[i]);
 	}
 	if (XS_SENSE_VALID(xs)) {
 		isp_xs_prt(isp, xs, ISP_LOGALL, "FIN dl%d resid %ld CDB=%s SenseLength=%u/%u KEY/ASC/ASCQ=0x%02x/0x%02x/0x%02x",
 		    XS_XFRLEN(xs), (long) XS_GET_RESID(xs), cdbstr, XS_CUR_SNSLEN(xs), XS_TOT_SNSLEN(xs), XS_SNSKEY(xs), XS_SNSASC(xs), XS_SNSASCQ(xs));
 	} else {
 		isp_xs_prt(isp, xs, ISP_LOGALL, "FIN dl%d resid %ld CDB=%s STS 0x%x XS_ERR=0x%x", XS_XFRLEN(xs), (long) XS_GET_RESID(xs), cdbstr, *XS_STSP(xs), XS_ERR(xs));
 	}
 }
 
 /*
  * Parse an ASYNC mailbox complete
  *
  * Return non-zero if the event has been acknowledged.
  */
 static int
 isp_parse_async(ispsoftc_t *isp, uint16_t mbox)
 {
 	int acked = 0;
 	uint32_t h1 = 0, h2 = 0;
 	uint16_t chan = 0;
 
 	/*
 	 * Pick up the channel, but not if this is a ASYNC_RIO32_2,
 	 * where Mailboxes 6/7 have the second handle.
 	 */
 	if (mbox != ASYNC_RIO32_2) {
 		if (IS_DUALBUS(isp)) {
 			chan = ISP_READ(isp, OUTMAILBOX6);
 		}
 	}
 	isp_prt(isp, ISP_LOGDEBUG2, "Async Mbox 0x%x", mbox);
 
 	switch (mbox) {
 	case ASYNC_BUS_RESET:
 		ISP_SET_SENDMARKER(isp, chan, 1);
 #ifdef	ISP_TARGET_MODE
 		if (isp_target_async(isp, chan, mbox)) {
 			acked = 1;
 		}
 #endif
 		isp_async(isp, ISPASYNC_BUS_RESET, chan);
 		break;
 	case ASYNC_SYSTEM_ERROR:
 		isp->isp_dead = 1;
 		isp->isp_state = ISP_CRASHED;
 		/*
 		 * Were we waiting for a mailbox command to complete?
 		 * If so, it's dead, so wake up the waiter.
 		 */
 		if (isp->isp_mboxbsy) {
 			isp->isp_obits = 1;
 			isp->isp_mboxtmp[0] = MBOX_HOST_INTERFACE_ERROR;
 			MBOX_NOTIFY_COMPLETE(isp);
 		}
 		/*
 		 * It's up to the handler for isp_async to reinit stuff and
 		 * restart the firmware
 		 */
 		isp_async(isp, ISPASYNC_FW_CRASH);
 		acked = 1;
 		break;
 
 	case ASYNC_RQS_XFER_ERR:
 		isp_prt(isp, ISP_LOGERR, "Request Queue Transfer Error");
 		break;
 
 	case ASYNC_RSP_XFER_ERR:
 		isp_prt(isp, ISP_LOGERR, "Response Queue Transfer Error");
 		break;
 
 	case ASYNC_QWAKEUP:
 		/*
 		 * We've just been notified that the Queue has woken up.
 		 * We don't need to be chatty about this- just unlatch things
 		 * and move on.
 		 */
 		mbox = ISP_READ(isp, isp->isp_rqstoutrp);
 		break;
 
 	case ASYNC_TIMEOUT_RESET:
 		isp_prt(isp, ISP_LOGWARN, "timeout initiated SCSI bus reset of chan %d", chan);
 		ISP_SET_SENDMARKER(isp, chan, 1);
 #ifdef	ISP_TARGET_MODE
 		if (isp_target_async(isp, chan, mbox)) {
 			acked = 1;
 		}
 #endif
 		break;
 
 	case ASYNC_DEVICE_RESET:
 		isp_prt(isp, ISP_LOGINFO, "device reset on chan %d", chan);
 		ISP_SET_SENDMARKER(isp, chan, 1);
 #ifdef	ISP_TARGET_MODE
 		if (isp_target_async(isp, chan, mbox)) {
 			acked = 1;
 		}
 #endif
 		break;
 
 	case ASYNC_EXTMSG_UNDERRUN:
 		isp_prt(isp, ISP_LOGWARN, "extended message underrun");
 		break;
 
 	case ASYNC_SCAM_INT:
 		isp_prt(isp, ISP_LOGINFO, "SCAM interrupt");
 		break;
 
 	case ASYNC_HUNG_SCSI:
 		isp_prt(isp, ISP_LOGERR, "stalled SCSI Bus after DATA Overrun");
 		/* XXX: Need to issue SCSI reset at this point */
 		break;
 
 	case ASYNC_KILLED_BUS:
 		isp_prt(isp, ISP_LOGERR, "SCSI Bus reset after DATA Overrun");
 		break;
 
 	case ASYNC_BUS_TRANSIT:
 		mbox = ISP_READ(isp, OUTMAILBOX2);
 		switch (mbox & SXP_PINS_MODE_MASK) {
 		case SXP_PINS_LVD_MODE:
 			isp_prt(isp, ISP_LOGINFO, "Transition to LVD mode");
 			SDPARAM(isp, chan)->isp_diffmode = 0;
 			SDPARAM(isp, chan)->isp_ultramode = 0;
 			SDPARAM(isp, chan)->isp_lvdmode = 1;
 			break;
 		case SXP_PINS_HVD_MODE:
 			isp_prt(isp, ISP_LOGINFO,
 			    "Transition to Differential mode");
 			SDPARAM(isp, chan)->isp_diffmode = 1;
 			SDPARAM(isp, chan)->isp_ultramode = 0;
 			SDPARAM(isp, chan)->isp_lvdmode = 0;
 			break;
 		case SXP_PINS_SE_MODE:
 			isp_prt(isp, ISP_LOGINFO,
 			    "Transition to Single Ended mode");
 			SDPARAM(isp, chan)->isp_diffmode = 0;
 			SDPARAM(isp, chan)->isp_ultramode = 1;
 			SDPARAM(isp, chan)->isp_lvdmode = 0;
 			break;
 		default:
 			isp_prt(isp, ISP_LOGWARN,
 			    "Transition to Unknown Mode 0x%x", mbox);
 			break;
 		}
 		/*
 		 * XXX: Set up to renegotiate again!
 		 */
 		/* Can only be for a 1080... */
 		ISP_SET_SENDMARKER(isp, chan, 1);
 		break;
 
 	case ASYNC_CMD_CMPLT:
 	case ASYNC_RIO32_1:
 		if (!IS_ULTRA3(isp)) {
 			isp_prt(isp, ISP_LOGERR, "unexpected fast posting completion");
 			break;
 		}
 		/* FALLTHROUGH */
 		h1 = (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1);
 		break;
 
 	case ASYNC_RIO32_2:
 		h1 = (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1);
 		h2 = (ISP_READ(isp, OUTMAILBOX7) << 16) | ISP_READ(isp, OUTMAILBOX6);
 		break;
 
 	case ASYNC_RIO16_5:
 	case ASYNC_RIO16_4:
 	case ASYNC_RIO16_3:
 	case ASYNC_RIO16_2:
 	case ASYNC_RIO16_1:
 		isp_prt(isp, ISP_LOGERR, "unexpected 16 bit RIO handle");
 		break;
 	default:
 		isp_prt(isp, ISP_LOGWARN, "%s: unhandled async code 0x%x", __func__, mbox);
 		break;
 	}
 
 	if (h1 || h2) {
 		isp_prt(isp, ISP_LOGDEBUG3, "fast post/rio completion of 0x%08x", h1);
 		isp_fastpost_complete(isp, h1);
 		if (h2) {
 			isp_prt(isp, ISP_LOGDEBUG3, "fast post/rio completion of 0x%08x", h2);
 			isp_fastpost_complete(isp, h2);
 			if (isp->isp_fpcchiwater < 2) {
 				isp->isp_fpcchiwater = 2;
 			}
 		} else {
 			if (isp->isp_fpcchiwater < 1) {
 				isp->isp_fpcchiwater = 1;
 			}
 		}
 	} else {
 		isp->isp_intoasync++;
 	}
 	return (acked);
 }
 
 #define	GET_24XX_BUS(isp, chan, msg)										\
 	if (IS_24XX(isp)) {											\
 		chan = ISP_READ(isp, OUTMAILBOX3) & 0xff;							\
 		if (chan >= isp->isp_nchan) {									\
 			isp_prt(isp, ISP_LOGERR, "bogus channel %u for %s at line %d",	chan, msg, __LINE__);	\
 			break;											\
 		}												\
 	}
 
 
 static int
 isp_parse_async_fc(ispsoftc_t *isp, uint16_t mbox)
 {
 	int acked = 0;
 	uint16_t chan;
 
 	if (IS_DUALBUS(isp)) {
 		chan = ISP_READ(isp, OUTMAILBOX6);
 	} else {
 		chan = 0;
 	}
 	isp_prt(isp, ISP_LOGDEBUG2, "Async Mbox 0x%x", mbox);
 
 	switch (mbox) {
 	case ASYNC_SYSTEM_ERROR:
 		isp->isp_dead = 1;
 		isp->isp_state = ISP_CRASHED;
 		FCPARAM(isp, chan)->isp_loopstate = LOOP_NIL;
 		FCPARAM(isp, chan)->isp_fwstate = FW_CONFIG_WAIT;
 		/*
 		 * Were we waiting for a mailbox command to complete?
 		 * If so, it's dead, so wake up the waiter.
 		 */
 		if (isp->isp_mboxbsy) {
 			isp->isp_obits = 1;
 			isp->isp_mboxtmp[0] = MBOX_HOST_INTERFACE_ERROR;
 			MBOX_NOTIFY_COMPLETE(isp);
 		}
 		/*
 		 * It's up to the handler for isp_async to reinit stuff and
 		 * restart the firmware
 		 */
 		isp_async(isp, ISPASYNC_FW_CRASH);
 		acked = 1;
 		break;
 
 	case ASYNC_RQS_XFER_ERR:
 		isp_prt(isp, ISP_LOGERR, "Request Queue Transfer Error");
 		break;
 
 	case ASYNC_RSP_XFER_ERR:
 		isp_prt(isp, ISP_LOGERR, "Response Queue Transfer Error");
 		break;
 
 	case ASYNC_QWAKEUP:
 #ifdef	ISP_TARGET_MODE
 		if (IS_24XX(isp)) {
 			isp_prt(isp, ISP_LOGERR, "ATIO Queue Transfer Error");
 			break;
 		}
 #endif
 		isp_prt(isp, ISP_LOGERR, "%s: unexpected ASYNC_QWAKEUP code", __func__);
 		break;
 
 	case ASYNC_CMD_CMPLT:
 		isp_fastpost_complete(isp, (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1));
 		if (isp->isp_fpcchiwater < 1) {
 			isp->isp_fpcchiwater = 1;
 		}
 		break;
 
 	case ASYNC_RIOZIO_STALL:
 		break;
 
 	case ASYNC_CTIO_DONE:
 #ifdef	ISP_TARGET_MODE
 		if (isp_target_async(isp, (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1), mbox)) {
 			acked = 1;
 		} else {
 			isp->isp_fphccmplt++;
 		}
 #else
 		isp_prt(isp, ISP_LOGWARN, "unexpected ASYNC CTIO done");
 #endif
 		break;
 	case ASYNC_LIP_ERROR:
 	case ASYNC_LIP_F8:
 	case ASYNC_LIP_OCCURRED:
 	case ASYNC_PTPMODE:
 		/*
 		 * These are broadcast events that have to be sent across
 		 * all active channels.
 		 */
 		for (chan = 0; chan < isp->isp_nchan; chan++) {
 			fcparam *fcp = FCPARAM(isp, chan);
 			int topo = fcp->isp_topo;
 
 			if (fcp->role == ISP_ROLE_NONE) {
 				continue;
 			}
 
 			fcp->isp_fwstate = FW_CONFIG_WAIT;
 			fcp->isp_loopstate = LOOP_LIP_RCVD;
 			ISP_SET_SENDMARKER(isp, chan, 1);
 			ISP_MARK_PORTDB(isp, chan, 1);
 			isp_async(isp, ISPASYNC_LIP, chan);
 #ifdef	ISP_TARGET_MODE
 			if (isp_target_async(isp, chan, mbox)) {
 				acked = 1;
 			}
 #endif
 			/*
 			 * We've had problems with data corruption occuring on
 			 * commands that complete (with no apparent error) after
 			 * we receive a LIP. This has been observed mostly on
 			 * Local Loop topologies. To be safe, let's just mark
 			 * all active initiator commands as dead.
 			 */
 			if (topo == TOPO_NL_PORT || topo == TOPO_FL_PORT) {
 				int i, j;
 				for (i = j = 0; i < isp->isp_maxcmds; i++) {
 					XS_T *xs;
 					isp_hdl_t *hdp;
 
 					hdp = &isp->isp_xflist[i];
 					if (ISP_H2HT(hdp->handle) != ISP_HANDLE_INITIATOR) {
 						continue;
 					}
 					xs = hdp->cmd;
 					if (XS_CHANNEL(xs) != chan) {
 						continue;
 					}
 					j++;
 					isp_prt(isp, ISP_LOG_WARN1,
 					    "%d.%d.%jx bus reset set at %s:%u",
 					    XS_CHANNEL(xs), XS_TGT(xs),
 					    (uintmax_t)XS_LUN(xs),
 					    __func__, __LINE__);
 					XS_SETERR(xs, HBA_BUSRESET);
 				}
 				if (j) {
 					isp_prt(isp, ISP_LOGERR, lipd, chan, j);
 				}
 			}
 		}
 		break;
 
 	case ASYNC_LOOP_UP:
 		/*
 		 * This is a broadcast event that has to be sent across
 		 * all active channels.
 		 */
 		for (chan = 0; chan < isp->isp_nchan; chan++) {
 			fcparam *fcp = FCPARAM(isp, chan);
 
 			if (fcp->role == ISP_ROLE_NONE) {
 				continue;
 			}
 
 			ISP_SET_SENDMARKER(isp, chan, 1);
 
 			fcp->isp_fwstate = FW_CONFIG_WAIT;
 			fcp->isp_loopstate = LOOP_LIP_RCVD;
 			ISP_MARK_PORTDB(isp, chan, 1);
 			isp_async(isp, ISPASYNC_LOOP_UP, chan);
 #ifdef	ISP_TARGET_MODE
 			if (isp_target_async(isp, chan, mbox)) {
 				acked = 1;
 			}
 #endif
 		}
 		break;
 
 	case ASYNC_LOOP_DOWN:
 		/*
 		 * This is a broadcast event that has to be sent across
 		 * all active channels.
 		 */
 		for (chan = 0; chan < isp->isp_nchan; chan++) {
 			fcparam *fcp = FCPARAM(isp, chan);
 
 			if (fcp->role == ISP_ROLE_NONE) {
 				continue;
 			}
 
 			ISP_SET_SENDMARKER(isp, chan, 1);
 			fcp->isp_fwstate = FW_CONFIG_WAIT;
 			fcp->isp_loopstate = LOOP_NIL;
 			ISP_MARK_PORTDB(isp, chan, 1);
 			isp_async(isp, ISPASYNC_LOOP_DOWN, chan);
 #ifdef	ISP_TARGET_MODE
 			if (isp_target_async(isp, chan, mbox)) {
 				acked = 1;
 			}
 #endif
 		}
 		break;
 
 	case ASYNC_LOOP_RESET:
 		/*
 		 * This is a broadcast event that has to be sent across
 		 * all active channels.
 		 */
 		for (chan = 0; chan < isp->isp_nchan; chan++) {
 			fcparam *fcp = FCPARAM(isp, chan);
 
 			if (fcp->role == ISP_ROLE_NONE) {
 				continue;
 			}
 
 			ISP_SET_SENDMARKER(isp, chan, 1);
 			fcp->isp_fwstate = FW_CONFIG_WAIT;
 			fcp->isp_loopstate = LOOP_NIL;
 			ISP_MARK_PORTDB(isp, chan, 1);
 			isp_async(isp, ISPASYNC_LOOP_RESET, chan);
 #ifdef	ISP_TARGET_MODE
 			if (isp_target_async(isp, chan, mbox)) {
 				acked = 1;
 			}
 #endif
 		}
 		break;
 
 	case ASYNC_PDB_CHANGED:
 	{
 		int echan, nphdl, nlstate, reason;
 
 		if (IS_24XX(isp)) {
 			nphdl = ISP_READ(isp, OUTMAILBOX1);
 			nlstate = ISP_READ(isp, OUTMAILBOX2);
 			reason = ISP_READ(isp, OUTMAILBOX3) >> 8;
 			GET_24XX_BUS(isp, chan, "ASYNC_CHANGE_NOTIFY");
 			echan = (nphdl == NIL_HANDLE) ?
 			    isp->isp_nchan - 1 : chan;
 		} else {
 			nphdl = NIL_HANDLE;
 			nlstate = reason = 0;
 			chan = echan = 0;
 		}
 		for (; chan <= echan; chan++) {
 			fcparam *fcp = FCPARAM(isp, chan);
 
 			if (fcp->role == ISP_ROLE_NONE) {
 				continue;
 			}
 			ISP_SET_SENDMARKER(isp, chan, 1);
 			fcp->isp_loopstate = LOOP_PDB_RCVD;
 			ISP_MARK_PORTDB(isp, chan, 1);
 			isp_async(isp, ISPASYNC_CHANGE_NOTIFY, chan, ISPASYNC_CHANGE_PDB, nphdl, nlstate, reason);
 		}
 		break;
 	}
 	case ASYNC_CHANGE_NOTIFY:
 	{
 		int lochan, hichan;
 
 		if (ISP_FW_NEWER_THAN(isp, 4, 0, 25) && ISP_CAP_MULTI_ID(isp)) {
 			GET_24XX_BUS(isp, chan, "ASYNC_CHANGE_NOTIFY");
 			lochan = chan;
 			hichan = chan + 1;
 		} else {
 			lochan = 0;
 			hichan = isp->isp_nchan;
 		}
 		for (chan = lochan; chan < hichan; chan++) {
 			fcparam *fcp = FCPARAM(isp, chan);
 
 			if (fcp->role == ISP_ROLE_NONE) {
 				continue;
 			}
 
 			if (fcp->isp_topo == TOPO_F_PORT) {
 				fcp->isp_loopstate = LOOP_LSCAN_DONE;
 			} else {
 				fcp->isp_loopstate = LOOP_PDB_RCVD;
 			}
 			ISP_MARK_PORTDB(isp, chan, 1);
 			isp_async(isp, ISPASYNC_CHANGE_NOTIFY, chan, ISPASYNC_CHANGE_SNS);
 		}
 		break;
 	}
 
 	case ASYNC_CONNMODE:
 		/*
 		 * This only applies to 2100 amd 2200 cards
 		 */
 		if (!IS_2200(isp) && !IS_2100(isp)) {
 			isp_prt(isp, ISP_LOGWARN, "bad card for ASYNC_CONNMODE event");
 			break;
 		}
 		chan = 0;
 		mbox = ISP_READ(isp, OUTMAILBOX1);
 		ISP_MARK_PORTDB(isp, chan, 1);
 		switch (mbox) {
 		case ISP_CONN_LOOP:
 			isp_prt(isp, ISP_LOGINFO,
 			    "Point-to-Point -> Loop mode");
 			break;
 		case ISP_CONN_PTP:
 			isp_prt(isp, ISP_LOGINFO,
 			    "Loop -> Point-to-Point mode");
 			break;
 		case ISP_CONN_BADLIP:
 			isp_prt(isp, ISP_LOGWARN,
 			    "Point-to-Point -> Loop mode (BAD LIP)");
 			break;
 		case ISP_CONN_FATAL:
 			isp->isp_dead = 1;
 			isp->isp_state = ISP_CRASHED;
 			isp_prt(isp, ISP_LOGERR, "FATAL CONNECTION ERROR");
 			isp_async(isp, ISPASYNC_FW_CRASH);
 			return (-1);
 		case ISP_CONN_LOOPBACK:
 			isp_prt(isp, ISP_LOGWARN,
 			    "Looped Back in Point-to-Point mode");
 			break;
 		default:
 			isp_prt(isp, ISP_LOGWARN,
 			    "Unknown connection mode (0x%x)", mbox);
 			break;
 		}
 		isp_async(isp, ISPASYNC_CHANGE_NOTIFY, chan, ISPASYNC_CHANGE_OTHER);
 		FCPARAM(isp, chan)->sendmarker = 1;
 		FCPARAM(isp, chan)->isp_fwstate = FW_CONFIG_WAIT;
 		FCPARAM(isp, chan)->isp_loopstate = LOOP_LIP_RCVD;
 		break;
 
 	case ASYNC_RCV_ERR:
 		if (IS_24XX(isp)) {
 			isp_prt(isp, ISP_LOGWARN, "Receive Error");
 		} else {
 			isp_prt(isp, ISP_LOGWARN, "unexpected ASYNC_RCV_ERR");
 		}
 		break;
 	case ASYNC_RJT_SENT:	/* same as ASYNC_QFULL_SENT */
 		if (IS_24XX(isp)) {
 			isp_prt(isp, ISP_LOGTDEBUG0, "LS_RJT sent");
 			break;
 		} else if (IS_2200(isp)) {
 			isp_prt(isp, ISP_LOGTDEBUG0, "QFULL sent");
 			break;
 		}
 		/* FALLTHROUGH */
 	default:
 		isp_prt(isp, ISP_LOGWARN, "Unknown Async Code 0x%x", mbox);
 		break;
 	}
 	if (mbox != ASYNC_CTIO_DONE && mbox != ASYNC_CMD_CMPLT) {
 		isp->isp_intoasync++;
 	}
 	return (acked);
 }
 
 /*
  * Handle other response entries. A pointer to the request queue output
  * index is here in case we want to eat several entries at once, although
  * this is not used currently.
  */
 
 static int
 isp_handle_other_response(ispsoftc_t *isp, int type, isphdr_t *hp, uint32_t *optrp)
 {
+	isp_ridacq_t rid;
+	int chan, c;
+
 	switch (type) {
 	case RQSTYPE_STATUS_CONT:
 		isp_prt(isp, ISP_LOG_WARN1, "Ignored Continuation Response");
 		return (1);
 	case RQSTYPE_MARKER:
 		isp_prt(isp, ISP_LOG_WARN1, "Marker Response");
 		return (1);
+	case RQSTYPE_RPT_ID_ACQ:
+		isp_get_ridacq(isp, (isp_ridacq_t *)hp, &rid);
+		if (rid.ridacq_format == 0) {
+			for (chan = 0; chan < isp->isp_nchan; chan++) {
+				fcparam *fcp = FCPARAM(isp, chan);
+				if (fcp->role == ISP_ROLE_NONE)
+					continue;
+				c = (chan == 0) ? 127 : (chan - 1);
+				if (rid.ridacq_map[c / 16] & (1 << (c % 16)))
+					isp_async(isp, ISPASYNC_CHANGE_NOTIFY,
+					    chan, ISPASYNC_CHANGE_OTHER);
+			}
+		} else {
+			isp_async(isp, ISPASYNC_CHANGE_NOTIFY,
+			    rid.ridacq_vp_index, ISPASYNC_CHANGE_OTHER);
+		}
+		return (1);
 	case RQSTYPE_ATIO:
 	case RQSTYPE_CTIO:
 	case RQSTYPE_ENABLE_LUN:
 	case RQSTYPE_MODIFY_LUN:
 	case RQSTYPE_NOTIFY:
 	case RQSTYPE_NOTIFY_ACK:
 	case RQSTYPE_CTIO1:
 	case RQSTYPE_ATIO2:
 	case RQSTYPE_CTIO2:
 	case RQSTYPE_CTIO3:
 	case RQSTYPE_CTIO7:
 	case RQSTYPE_ABTS_RCVD:
 	case RQSTYPE_ABTS_RSP:
 		isp->isp_rsltccmplt++;	/* count as a response completion */
 #ifdef	ISP_TARGET_MODE
 		if (isp_target_notify(isp, (ispstatusreq_t *) hp, optrp)) {
 			return (1);
 		}
 #endif
-		/* FALLTHROUGH */
-	case RQSTYPE_RPT_ID_ACQ:
-		if (IS_24XX(isp)) {
-			isp_ridacq_t rid;
-			isp_get_ridacq(isp, (isp_ridacq_t *)hp, &rid);
-			if (rid.ridacq_format == 0) {
-			}
-			return (1);
-		}
 		/* FALLTHROUGH */
 	case RQSTYPE_REQUEST:
 	default:
 		ISP_DELAY(100);
 		if (type != isp_get_response_type(isp, hp)) {
 			/*
 			 * This is questionable- we're just papering over
 			 * something we've seen on SMP linux in target
 			 * mode- we don't really know what's happening
 			 * here that causes us to think we've gotten
 			 * an entry, but that either the entry isn't
 			 * filled out yet or our CPU read data is stale.
 			 */
 			isp_prt(isp, ISP_LOGINFO,
 				"unstable type in response queue");
 			return (-1);
 		}
 		isp_prt(isp, ISP_LOGWARN, "Unhandled Response Type 0x%x",
 		    isp_get_response_type(isp, hp));
 		return (0);
 	}
 }
 
 static void
 isp_parse_status(ispsoftc_t *isp, ispstatusreq_t *sp, XS_T *xs, long *rp)
 {
 	switch (sp->req_completion_status & 0xff) {
 	case RQCS_COMPLETE:
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_NOERROR);
 		}
 		return;
 
 	case RQCS_INCOMPLETE:
 		if ((sp->req_state_flags & RQSF_GOT_TARGET) == 0) {
 			isp_xs_prt(isp, xs, ISP_LOG_WARN1, "Selection Timeout @ %s:%d", __func__, __LINE__);
 			if (XS_NOERR(xs)) {
 				XS_SETERR(xs, HBA_SELTIMEOUT);
 				*rp = XS_XFRLEN(xs);
 			}
 			return;
 		}
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Command Incomplete, state 0x%x", sp->req_state_flags);
 		break;
 
 	case RQCS_DMA_ERROR:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "DMA Error");
 		*rp = XS_XFRLEN(xs);
 		break;
 
 	case RQCS_TRANSPORT_ERROR:
 	{
 		char buf[172];
 		ISP_SNPRINTF(buf, sizeof (buf), "states=>");
 		if (sp->req_state_flags & RQSF_GOT_BUS) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_BUS", buf);
 		}
 		if (sp->req_state_flags & RQSF_GOT_TARGET) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_TGT", buf);
 		}
 		if (sp->req_state_flags & RQSF_SENT_CDB) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s SENT_CDB", buf);
 		}
 		if (sp->req_state_flags & RQSF_XFRD_DATA) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s XFRD_DATA", buf);
 		}
 		if (sp->req_state_flags & RQSF_GOT_STATUS) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_STS", buf);
 		}
 		if (sp->req_state_flags & RQSF_GOT_SENSE) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_SNS", buf);
 		}
 		if (sp->req_state_flags & RQSF_XFER_COMPLETE) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s XFR_CMPLT", buf);
 		}
 		ISP_SNPRINTF(buf, sizeof (buf), "%s\nstatus=>", buf);
 		if (sp->req_status_flags & RQSTF_DISCONNECT) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Disconnect", buf);
 		}
 		if (sp->req_status_flags & RQSTF_SYNCHRONOUS) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Sync_xfr", buf);
 		}
 		if (sp->req_status_flags & RQSTF_PARITY_ERROR) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Parity", buf);
 		}
 		if (sp->req_status_flags & RQSTF_BUS_RESET) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Bus_Reset", buf);
 		}
 		if (sp->req_status_flags & RQSTF_DEVICE_RESET) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Device_Reset", buf);
 		}
 		if (sp->req_status_flags & RQSTF_ABORTED) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Aborted", buf);
 		}
 		if (sp->req_status_flags & RQSTF_TIMEOUT) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Timeout", buf);
 		}
 		if (sp->req_status_flags & RQSTF_NEGOTIATION) {
 			ISP_SNPRINTF(buf, sizeof (buf), "%s Negotiation", buf);
 		}
 		isp_xs_prt(isp, xs,  ISP_LOGERR, "Transport Error: %s", buf);
 		*rp = XS_XFRLEN(xs);
 		break;
 	}
 	case RQCS_RESET_OCCURRED:
 	{
 		int chan;
 		isp_xs_prt(isp, xs, ISP_LOGWARN, "Bus Reset destroyed command");
 		for (chan = 0; chan < isp->isp_nchan; chan++) {
 			FCPARAM(isp, chan)->sendmarker = 1;
 		}
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_BUSRESET);
 		}
 		*rp = XS_XFRLEN(xs);
 		return;
 	}
 	case RQCS_ABORTED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Command Aborted");
 		ISP_SET_SENDMARKER(isp, XS_CHANNEL(xs), 1);
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_ABORTED);
 		}
 		return;
 
 	case RQCS_TIMEOUT:
 		isp_xs_prt(isp, xs, ISP_LOGWARN, "Command timed out");
 		/*
 	 	 * XXX: Check to see if we logged out of the device.
 		 */
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_CMDTIMEOUT);
 		}
 		return;
 
 	case RQCS_DATA_OVERRUN:
 		XS_SET_RESID(xs, sp->req_resid);
 		isp_xs_prt(isp, xs, ISP_LOGERR, "data overrun (%ld)", (long) XS_GET_RESID(xs));
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_DATAOVR);
 		}
 		return;
 
 	case RQCS_COMMAND_OVERRUN:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "command overrun");
 		break;
 
 	case RQCS_STATUS_OVERRUN:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "status overrun");
 		break;
 
 	case RQCS_BAD_MESSAGE:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "msg not COMMAND COMPLETE after status");
 		break;
 
 	case RQCS_NO_MESSAGE_OUT:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "No MESSAGE OUT phase after selection");
 		break;
 
 	case RQCS_EXT_ID_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "EXTENDED IDENTIFY failed");
 		break;
 
 	case RQCS_IDE_MSG_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "INITIATOR DETECTED ERROR rejected");
 		break;
 
 	case RQCS_ABORT_MSG_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "ABORT OPERATION rejected");
 		break;
 
 	case RQCS_REJECT_MSG_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "MESSAGE REJECT rejected");
 		break;
 
 	case RQCS_NOP_MSG_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "NOP rejected");
 		break;
 
 	case RQCS_PARITY_ERROR_MSG_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "MESSAGE PARITY ERROR rejected");
 		break;
 
 	case RQCS_DEVICE_RESET_MSG_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGWARN, "BUS DEVICE RESET rejected");
 		break;
 
 	case RQCS_ID_MSG_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "IDENTIFY rejected");
 		break;
 
 	case RQCS_UNEXP_BUS_FREE:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Unexpected Bus Free");
 		break;
 
 	case RQCS_DATA_UNDERRUN:
 	{
 		if (IS_FC(isp)) {
 			int ru_marked = (sp->req_scsi_status & RQCS_RU) != 0;
 			if (!ru_marked || sp->req_resid > XS_XFRLEN(xs)) {
 				isp_xs_prt(isp, xs, ISP_LOGWARN, bun, XS_XFRLEN(xs), sp->req_resid, (ru_marked)? "marked" : "not marked");
 				if (XS_NOERR(xs)) {
 					XS_SETERR(xs, HBA_BOTCH);
 				}
 				return;
 			}
 		}
 		XS_SET_RESID(xs, sp->req_resid);
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_NOERROR);
 		}
 		return;
 	}
 
 	case RQCS_XACT_ERR1:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "HBA attempted queued transaction with disconnect not set");
 		break;
 
 	case RQCS_XACT_ERR2:
 		isp_xs_prt(isp, xs, ISP_LOGERR,
 		    "HBA attempted queued transaction to target routine %jx",
 		    (uintmax_t)XS_LUN(xs));
 		break;
 
 	case RQCS_XACT_ERR3:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "HBA attempted queued cmd when queueing disabled");
 		break;
 
 	case RQCS_BAD_ENTRY:
 		isp_prt(isp, ISP_LOGERR, "Invalid IOCB entry type detected");
 		break;
 
 	case RQCS_QUEUE_FULL:
 		isp_xs_prt(isp, xs, ISP_LOG_WARN1, "internal queues full status 0x%x", *XS_STSP(xs));
 
 		/*
 		 * If QFULL or some other status byte is set, then this
 		 * isn't an error, per se.
 		 *
 		 * Unfortunately, some QLogic f/w writers have, in
 		 * some cases, ommitted to *set* status to QFULL.
 		 */
 #if	0
 		if (*XS_STSP(xs) != SCSI_GOOD && XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_NOERROR);
 			return;
 		}
 
 #endif
 		*XS_STSP(xs) = SCSI_QFULL;
 		XS_SETERR(xs, HBA_NOERROR);
 		return;
 
 	case RQCS_PHASE_SKIPPED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "SCSI phase skipped");
 		break;
 
 	case RQCS_ARQS_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Auto Request Sense Failed");
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_ARQFAIL);
 		}
 		return;
 
 	case RQCS_WIDE_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Wide Negotiation Failed");
 		if (IS_SCSI(isp)) {
 			sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs));
 			sdp->isp_devparam[XS_TGT(xs)].goal_flags &= ~DPARM_WIDE;
 			sdp->isp_devparam[XS_TGT(xs)].dev_update = 1;
 			sdp->update = 1;
 		}
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_NOERROR);
 		}
 		return;
 
 	case RQCS_SYNCXFER_FAILED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "SDTR Message Failed");
 		if (IS_SCSI(isp)) {
 			sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs));
 			sdp += XS_CHANNEL(xs);
 			sdp->isp_devparam[XS_TGT(xs)].goal_flags &= ~DPARM_SYNC;
 			sdp->isp_devparam[XS_TGT(xs)].dev_update = 1;
 			sdp->update = 1;
 		}
 		break;
 
 	case RQCS_LVD_BUSERR:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Bad LVD condition");
 		break;
 
 	case RQCS_PORT_UNAVAILABLE:
 		/*
 		 * No such port on the loop. Moral equivalent of SELTIMEO
 		 */
 	case RQCS_PORT_LOGGED_OUT:
 	{
 		const char *reason;
 		uint8_t sts = sp->req_completion_status & 0xff;
 
 		/*
 		 * It was there (maybe)- treat as a selection timeout.
 		 */
 		if (sts == RQCS_PORT_UNAVAILABLE) {
 			reason = "unavailable";
 		} else {
 			reason = "logout";
 		}
 
 		isp_prt(isp, ISP_LOGINFO, "port %s for target %d", reason, XS_TGT(xs));
 
 		/*
 		 * If we're on a local loop, force a LIP (which is overkill)
 		 * to force a re-login of this unit. If we're on fabric,
 		 * then we'll have to log in again as a matter of course.
 		 */
 		if (FCPARAM(isp, 0)->isp_topo == TOPO_NL_PORT ||
 		    FCPARAM(isp, 0)->isp_topo == TOPO_FL_PORT) {
 			mbreg_t mbs;
 			MBSINIT(&mbs, MBOX_INIT_LIP, MBLOGALL, 0);
 			if (ISP_CAP_2KLOGIN(isp)) {
 				mbs.ibits = (1 << 10);
 			}
 			isp_mboxcmd_qnw(isp, &mbs, 1);
 		}
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 		}
 		return;
 	}
 	case RQCS_PORT_CHANGED:
 		isp_prt(isp, ISP_LOGWARN, "port changed for target %d", XS_TGT(xs));
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 		}
 		return;
 
 	case RQCS_PORT_BUSY:
 		isp_prt(isp, ISP_LOGWARN, "port busy for target %d", XS_TGT(xs));
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_TGTBSY);
 		}
 		return;
 
 	default:
 		isp_prt(isp, ISP_LOGERR, "Unknown Completion Status 0x%x", sp->req_completion_status);
 		break;
 	}
 	if (XS_NOERR(xs)) {
 		XS_SETERR(xs, HBA_BOTCH);
 	}
 }
 
 static void
 isp_parse_status_24xx(ispsoftc_t *isp, isp24xx_statusreq_t *sp, XS_T *xs, long *rp)
 {
 	int ru_marked, sv_marked;
 	int chan = XS_CHANNEL(xs);
 
 	switch (sp->req_completion_status) {
 	case RQCS_COMPLETE:
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_NOERROR);
 		}
 		return;
 
 	case RQCS_DMA_ERROR:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "DMA error");
 		break;
 
 	case RQCS_TRANSPORT_ERROR:
 		isp_xs_prt(isp, xs,  ISP_LOGERR, "Transport Error");
 		break;
 
 	case RQCS_RESET_OCCURRED:
 		isp_xs_prt(isp, xs, ISP_LOGWARN, "reset destroyed command");
 		FCPARAM(isp, chan)->sendmarker = 1;
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_BUSRESET);
 		}
 		return;
 
 	case RQCS_ABORTED:
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Command Aborted");
 		FCPARAM(isp, chan)->sendmarker = 1;
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_ABORTED);
 		}
 		return;
 
 	case RQCS_TIMEOUT:
 		isp_xs_prt(isp, xs, ISP_LOGWARN, "Command Timed Out");
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_CMDTIMEOUT);
 		}
 		return;
 
 	case RQCS_DATA_OVERRUN:
 		XS_SET_RESID(xs, sp->req_resid);
 		isp_xs_prt(isp, xs, ISP_LOGERR, "Data Overrun");
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_DATAOVR);
 		}
 		return;
 
 	case RQCS_24XX_DRE:	/* data reassembly error */
 		isp_prt(isp, ISP_LOGERR, "Chan %d data reassembly error for target %d", chan, XS_TGT(xs));
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_ABORTED);
 		}
 		*rp = XS_XFRLEN(xs);
 		return;
 
 	case RQCS_24XX_TABORT:	/* aborted by target */
 		isp_prt(isp, ISP_LOGERR, "Chan %d target %d sent ABTS", chan, XS_TGT(xs));
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_ABORTED);
 		}
 		return;
 
 	case RQCS_DATA_UNDERRUN:
 		ru_marked = (sp->req_scsi_status & RQCS_RU) != 0;
 		/*
 		 * We can get an underrun w/o things being marked
 		 * if we got a non-zero status.
 		 */
 		sv_marked = (sp->req_scsi_status & (RQCS_SV|RQCS_RV)) != 0;
 		if ((ru_marked == 0 && sv_marked == 0) ||
 		    (sp->req_resid > XS_XFRLEN(xs))) {
 			isp_xs_prt(isp, xs, ISP_LOGWARN, bun, XS_XFRLEN(xs), sp->req_resid, (ru_marked)? "marked" : "not marked");
 			if (XS_NOERR(xs)) {
 				XS_SETERR(xs, HBA_BOTCH);
 			}
 			return;
 		}
 		XS_SET_RESID(xs, sp->req_resid);
 		isp_xs_prt(isp, xs, ISP_LOG_WARN1, "Data Underrun (%d) for command 0x%x", sp->req_resid, XS_CDBP(xs)[0] & 0xff);
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_NOERROR);
 		}
 		return;
 
 	case RQCS_PORT_UNAVAILABLE:
 		/*
 		 * No such port on the loop. Moral equivalent of SELTIMEO
 		 */
 	case RQCS_PORT_LOGGED_OUT:
 	{
 		const char *reason;
 		uint8_t sts = sp->req_completion_status & 0xff;
 
 		/*
 		 * It was there (maybe)- treat as a selection timeout.
 		 */
 		if (sts == RQCS_PORT_UNAVAILABLE) {
 			reason = "unavailable";
 		} else {
 			reason = "logout";
 		}
 
 		isp_prt(isp, ISP_LOGINFO, "Chan %d port %s for target %d",
 		    chan, reason, XS_TGT(xs));
 
 		/*
 		 * There is no MBOX_INIT_LIP for the 24XX.
 		 */
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 		}
 		return;
 	}
 	case RQCS_PORT_CHANGED:
 		isp_prt(isp, ISP_LOGWARN, "port changed for target %d chan %d", XS_TGT(xs), chan);
 		if (XS_NOERR(xs)) {
 			XS_SETERR(xs, HBA_SELTIMEOUT);
 		}
 		return;
 
 
 	case RQCS_24XX_ENOMEM:	/* f/w resource unavailable */
 		isp_prt(isp, ISP_LOGWARN, "f/w resource unavailable for target %d chan %d", XS_TGT(xs), chan);
 		if (XS_NOERR(xs)) {
 			*XS_STSP(xs) = SCSI_BUSY;
 			XS_SETERR(xs, HBA_TGTBSY);
 		}
 		return;
 
 	case RQCS_24XX_TMO:	/* task management overrun */
 		isp_prt(isp, ISP_LOGWARN, "command for target %d overlapped task management for chan %d", XS_TGT(xs), chan);
 		if (XS_NOERR(xs)) {
 			*XS_STSP(xs) = SCSI_BUSY;
 			XS_SETERR(xs, HBA_TGTBSY);
 		}
 		return;
 
 	default:
 		isp_prt(isp, ISP_LOGERR, "Unknown Completion Status 0x%x on chan %d", sp->req_completion_status, chan);
 		break;
 	}
 	if (XS_NOERR(xs)) {
 		XS_SETERR(xs, HBA_BOTCH);
 	}
 }
 
 static void
 isp_fastpost_complete(ispsoftc_t *isp, uint32_t fph)
 {
 	XS_T *xs;
 
 	if (fph == 0) {
 		return;
 	}
 	xs = isp_find_xs(isp, fph);
 	if (xs == NULL) {
 		isp_prt(isp, ISP_LOGWARN,
 		    "Command for fast post handle 0x%x not found", fph);
 		return;
 	}
 	isp_destroy_handle(isp, fph);
 
 	/*
 	 * Since we don't have a result queue entry item,
 	 * we must believe that SCSI status is zero and
 	 * that all data transferred.
 	 */
 	XS_SET_RESID(xs, 0);
 	*XS_STSP(xs) = SCSI_GOOD;
 	if (XS_XFRLEN(xs)) {
 		ISP_DMAFREE(isp, xs, fph);
 	}
 	if (isp->isp_nactive) {
 		isp->isp_nactive--;
 	}
 	isp->isp_fphccmplt++;
 	isp_done(xs);
 }
 
 static int
 isp_mbox_continue(ispsoftc_t *isp)
 {
 	mbreg_t mbs;
 	uint16_t *ptr;
 	uint32_t offset;
 
 	switch (isp->isp_lastmbxcmd) {
 	case MBOX_WRITE_RAM_WORD:
 	case MBOX_READ_RAM_WORD:
 	case MBOX_WRITE_RAM_WORD_EXTENDED:
 	case MBOX_READ_RAM_WORD_EXTENDED:
 		break;
 	default:
 		return (1);
 	}
 	if (isp->isp_mboxtmp[0] != MBOX_COMMAND_COMPLETE) {
 		isp->isp_mbxwrk0 = 0;
 		return (-1);
 	}
 
 	/*
 	 * Clear the previous interrupt.
 	 */
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT);
 	} else {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_CLEAR_RISC_INT);
 		ISP_WRITE(isp, BIU_SEMA, 0);
 	}
 
 	/*
 	 * Continue with next word.
 	 */
 	ISP_MEMZERO(&mbs, sizeof (mbs));
 	ptr = isp->isp_mbxworkp;
 	switch (isp->isp_lastmbxcmd) {
 	case MBOX_WRITE_RAM_WORD:
 		mbs.param[1] = isp->isp_mbxwrk1++;
 		mbs.param[2] = *ptr++;
 		break;
 	case MBOX_READ_RAM_WORD:
 		*ptr++ = isp->isp_mboxtmp[2];
 		mbs.param[1] = isp->isp_mbxwrk1++;
 		break;
 	case MBOX_WRITE_RAM_WORD_EXTENDED:
 		if (IS_24XX(isp)) {
 			uint32_t *lptr = (uint32_t *)ptr;
 			mbs.param[2] = lptr[0];
 			mbs.param[3] = lptr[0] >> 16;
 			lptr++;
 			ptr = (uint16_t *)lptr;
 		} else {
 			mbs.param[2] = *ptr++;
 		}
 		offset = isp->isp_mbxwrk1;
 		offset |= isp->isp_mbxwrk8 << 16;
 		mbs.param[1] = offset;
 		mbs.param[8] = offset >> 16;
 		offset++;
 		isp->isp_mbxwrk1 = offset;
 		isp->isp_mbxwrk8 = offset >> 16;
 		break;
 	case MBOX_READ_RAM_WORD_EXTENDED:
 		if (IS_24XX(isp)) {
 			uint32_t *lptr = (uint32_t *)ptr;
 			uint32_t val = isp->isp_mboxtmp[2];
 			val |= (isp->isp_mboxtmp[3]) << 16;
 			*lptr++ = val;
 			ptr = (uint16_t *)lptr;
 		} else {
 			*ptr++ = isp->isp_mboxtmp[2];
 		}
 		offset = isp->isp_mbxwrk1;
 		offset |= isp->isp_mbxwrk8 << 16;
 		mbs.param[1] = offset;
 		mbs.param[8] = offset >> 16;
 		offset++;
 		isp->isp_mbxwrk1 = offset;
 		isp->isp_mbxwrk8 = offset >> 16;
 		break;
 	}
 	isp->isp_mbxworkp = ptr;
 	isp->isp_mbxwrk0--;
 	mbs.param[0] = isp->isp_lastmbxcmd;
 	mbs.logval = MBLOGALL;
 	isp_mboxcmd_qnw(isp, &mbs, 0);
 	return (0);
 }
 
 #define	ISP_SCSI_IBITS(op)		(mbpscsi[((op)<<1)])
 #define	ISP_SCSI_OBITS(op)		(mbpscsi[((op)<<1) + 1])
 #define	ISP_SCSI_OPMAP(in, out)		in, out
 static const uint8_t mbpscsi[] = {
 	ISP_SCSI_OPMAP(0x01, 0x01),	/* 0x00: MBOX_NO_OP */
 	ISP_SCSI_OPMAP(0x1f, 0x01),	/* 0x01: MBOX_LOAD_RAM */
 	ISP_SCSI_OPMAP(0x03, 0x01),	/* 0x02: MBOX_EXEC_FIRMWARE */
 	ISP_SCSI_OPMAP(0x1f, 0x01),	/* 0x03: MBOX_DUMP_RAM */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x04: MBOX_WRITE_RAM_WORD */
 	ISP_SCSI_OPMAP(0x03, 0x07),	/* 0x05: MBOX_READ_RAM_WORD */
 	ISP_SCSI_OPMAP(0x3f, 0x3f),	/* 0x06: MBOX_MAILBOX_REG_TEST */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x07: MBOX_VERIFY_CHECKSUM	*/
 	ISP_SCSI_OPMAP(0x01, 0x0f),	/* 0x08: MBOX_ABOUT_FIRMWARE */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x09: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x0a: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x0b: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x0c: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x0d: */
 	ISP_SCSI_OPMAP(0x01, 0x05),	/* 0x0e: MBOX_CHECK_FIRMWARE */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x0f: */
 	ISP_SCSI_OPMAP(0x1f, 0x1f),	/* 0x10: MBOX_INIT_REQ_QUEUE */
 	ISP_SCSI_OPMAP(0x3f, 0x3f),	/* 0x11: MBOX_INIT_RES_QUEUE */
 	ISP_SCSI_OPMAP(0x0f, 0x0f),	/* 0x12: MBOX_EXECUTE_IOCB */
 	ISP_SCSI_OPMAP(0x03, 0x03),	/* 0x13: MBOX_WAKE_UP	*/
 	ISP_SCSI_OPMAP(0x01, 0x3f),	/* 0x14: MBOX_STOP_FIRMWARE */
 	ISP_SCSI_OPMAP(0x0f, 0x0f),	/* 0x15: MBOX_ABORT */
 	ISP_SCSI_OPMAP(0x03, 0x03),	/* 0x16: MBOX_ABORT_DEVICE */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x17: MBOX_ABORT_TARGET */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x18: MBOX_BUS_RESET */
 	ISP_SCSI_OPMAP(0x03, 0x07),	/* 0x19: MBOX_STOP_QUEUE */
 	ISP_SCSI_OPMAP(0x03, 0x07),	/* 0x1a: MBOX_START_QUEUE */
 	ISP_SCSI_OPMAP(0x03, 0x07),	/* 0x1b: MBOX_SINGLE_STEP_QUEUE */
 	ISP_SCSI_OPMAP(0x03, 0x07),	/* 0x1c: MBOX_ABORT_QUEUE */
 	ISP_SCSI_OPMAP(0x03, 0x4f),	/* 0x1d: MBOX_GET_DEV_QUEUE_STATUS */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x1e: */
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x1f: MBOX_GET_FIRMWARE_STATUS */
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x20: MBOX_GET_INIT_SCSI_ID */
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x21: MBOX_GET_SELECT_TIMEOUT */
 	ISP_SCSI_OPMAP(0x01, 0xc7),	/* 0x22: MBOX_GET_RETRY_COUNT	*/
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x23: MBOX_GET_TAG_AGE_LIMIT */
 	ISP_SCSI_OPMAP(0x01, 0x03),	/* 0x24: MBOX_GET_CLOCK_RATE */
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x25: MBOX_GET_ACT_NEG_STATE */
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x26: MBOX_GET_ASYNC_DATA_SETUP_TIME */
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x27: MBOX_GET_PCI_PARAMS */
 	ISP_SCSI_OPMAP(0x03, 0x4f),	/* 0x28: MBOX_GET_TARGET_PARAMS */
 	ISP_SCSI_OPMAP(0x03, 0x0f),	/* 0x29: MBOX_GET_DEV_QUEUE_PARAMS */
 	ISP_SCSI_OPMAP(0x01, 0x07),	/* 0x2a: MBOX_GET_RESET_DELAY_PARAMS */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x2b: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x2c: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x2d: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x2e: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x2f: */
 	ISP_SCSI_OPMAP(0x03, 0x03),	/* 0x30: MBOX_SET_INIT_SCSI_ID */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x31: MBOX_SET_SELECT_TIMEOUT */
 	ISP_SCSI_OPMAP(0xc7, 0xc7),	/* 0x32: MBOX_SET_RETRY_COUNT	*/
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x33: MBOX_SET_TAG_AGE_LIMIT */
 	ISP_SCSI_OPMAP(0x03, 0x03),	/* 0x34: MBOX_SET_CLOCK_RATE */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x35: MBOX_SET_ACT_NEG_STATE */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x36: MBOX_SET_ASYNC_DATA_SETUP_TIME */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x37: MBOX_SET_PCI_CONTROL_PARAMS */
 	ISP_SCSI_OPMAP(0x4f, 0x4f),	/* 0x38: MBOX_SET_TARGET_PARAMS */
 	ISP_SCSI_OPMAP(0x0f, 0x0f),	/* 0x39: MBOX_SET_DEV_QUEUE_PARAMS */
 	ISP_SCSI_OPMAP(0x07, 0x07),	/* 0x3a: MBOX_SET_RESET_DELAY_PARAMS */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x3b: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x3c: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x3d: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x3e: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x3f: */
 	ISP_SCSI_OPMAP(0x01, 0x03),	/* 0x40: MBOX_RETURN_BIOS_BLOCK_ADDR */
 	ISP_SCSI_OPMAP(0x3f, 0x01),	/* 0x41: MBOX_WRITE_FOUR_RAM_WORDS */
 	ISP_SCSI_OPMAP(0x03, 0x07),	/* 0x42: MBOX_EXEC_BIOS_IOCB */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x43: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x44: */
 	ISP_SCSI_OPMAP(0x03, 0x03),	/* 0x45: SET SYSTEM PARAMETER */
 	ISP_SCSI_OPMAP(0x01, 0x03),	/* 0x46: GET SYSTEM PARAMETER */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x47: */
 	ISP_SCSI_OPMAP(0x01, 0xcf),	/* 0x48: GET SCAM CONFIGURATION */
 	ISP_SCSI_OPMAP(0xcf, 0xcf),	/* 0x49: SET SCAM CONFIGURATION */
 	ISP_SCSI_OPMAP(0x03, 0x03),	/* 0x4a: MBOX_SET_FIRMWARE_FEATURES */
 	ISP_SCSI_OPMAP(0x01, 0x03),	/* 0x4b: MBOX_GET_FIRMWARE_FEATURES */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x4c: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x4d: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x4e: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x4f: */
 	ISP_SCSI_OPMAP(0xdf, 0xdf),	/* 0x50: LOAD RAM A64 */
 	ISP_SCSI_OPMAP(0xdf, 0xdf),	/* 0x51: DUMP RAM A64 */
 	ISP_SCSI_OPMAP(0xdf, 0xff),	/* 0x52: INITIALIZE REQUEST QUEUE A64 */
 	ISP_SCSI_OPMAP(0xef, 0xff),	/* 0x53: INITIALIZE RESPONSE QUEUE A64 */
 	ISP_SCSI_OPMAP(0xcf, 0x01),	/* 0x54: EXECUCUTE COMMAND IOCB A64 */
 	ISP_SCSI_OPMAP(0x07, 0x01),	/* 0x55: ENABLE TARGET MODE */
 	ISP_SCSI_OPMAP(0x03, 0x0f),	/* 0x56: GET TARGET STATUS */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x57: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x58: */
 	ISP_SCSI_OPMAP(0x00, 0x00),	/* 0x59: */
 	ISP_SCSI_OPMAP(0x03, 0x03),	/* 0x5a: SET DATA OVERRUN RECOVERY MODE */
 	ISP_SCSI_OPMAP(0x01, 0x03),	/* 0x5b: GET DATA OVERRUN RECOVERY MODE */
 	ISP_SCSI_OPMAP(0x0f, 0x0f),	/* 0x5c: SET HOST DATA */
 	ISP_SCSI_OPMAP(0x01, 0x01)	/* 0x5d: GET NOST DATA */
 };
 #define	MAX_SCSI_OPCODE	0x5d
 
 static const char *scsi_mbcmd_names[] = {
 	"NO-OP",
 	"LOAD RAM",
 	"EXEC FIRMWARE",
 	"DUMP RAM",
 	"WRITE RAM WORD",
 	"READ RAM WORD",
 	"MAILBOX REG TEST",
 	"VERIFY CHECKSUM",
 	"ABOUT FIRMWARE",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"CHECK FIRMWARE",
 	NULL,
 	"INIT REQUEST QUEUE",
 	"INIT RESULT QUEUE",
 	"EXECUTE IOCB",
 	"WAKE UP",
 	"STOP FIRMWARE",
 	"ABORT",
 	"ABORT DEVICE",
 	"ABORT TARGET",
 	"BUS RESET",
 	"STOP QUEUE",
 	"START QUEUE",
 	"SINGLE STEP QUEUE",
 	"ABORT QUEUE",
 	"GET DEV QUEUE STATUS",
 	NULL,
 	"GET FIRMWARE STATUS",
 	"GET INIT SCSI ID",
 	"GET SELECT TIMEOUT",
 	"GET RETRY COUNT",
 	"GET TAG AGE LIMIT",
 	"GET CLOCK RATE",
 	"GET ACT NEG STATE",
 	"GET ASYNC DATA SETUP TIME",
 	"GET PCI PARAMS",
 	"GET TARGET PARAMS",
 	"GET DEV QUEUE PARAMS",
 	"GET RESET DELAY PARAMS",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"SET INIT SCSI ID",
 	"SET SELECT TIMEOUT",
 	"SET RETRY COUNT",
 	"SET TAG AGE LIMIT",
 	"SET CLOCK RATE",
 	"SET ACT NEG STATE",
 	"SET ASYNC DATA SETUP TIME",
 	"SET PCI CONTROL PARAMS",
 	"SET TARGET PARAMS",
 	"SET DEV QUEUE PARAMS",
 	"SET RESET DELAY PARAMS",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"RETURN BIOS BLOCK ADDR",
 	"WRITE FOUR RAM WORDS",
 	"EXEC BIOS IOCB",
 	NULL,
 	NULL,
 	"SET SYSTEM PARAMETER",
 	"GET SYSTEM PARAMETER",
 	NULL,
 	"GET SCAM CONFIGURATION",
 	"SET SCAM CONFIGURATION",
 	"SET FIRMWARE FEATURES",
 	"GET FIRMWARE FEATURES",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"LOAD RAM A64",
 	"DUMP RAM A64",
 	"INITIALIZE REQUEST QUEUE A64",
 	"INITIALIZE RESPONSE QUEUE A64",
 	"EXECUTE IOCB A64",
 	"ENABLE TARGET MODE",
 	"GET TARGET MODE STATE",
 	NULL,
 	NULL,
 	NULL,
 	"SET DATA OVERRUN RECOVERY MODE",
 	"GET DATA OVERRUN RECOVERY MODE",
 	"SET HOST DATA",
 	"GET NOST DATA",
 };
 
 #define	ISP_FC_IBITS(op)	((mbpfc[((op)<<3) + 0] << 24) | (mbpfc[((op)<<3) + 1] << 16) | (mbpfc[((op)<<3) + 2] << 8) | (mbpfc[((op)<<3) + 3]))
 #define	ISP_FC_OBITS(op)	((mbpfc[((op)<<3) + 4] << 24) | (mbpfc[((op)<<3) + 5] << 16) | (mbpfc[((op)<<3) + 6] << 8) | (mbpfc[((op)<<3) + 7]))
 
 #define	ISP_FC_OPMAP(in0, out0)							  0,   0,   0, in0,    0,    0,    0, out0
 #define	ISP_FC_OPMAP_HALF(in1, in0, out1, out0)					  0,   0, in1, in0,    0,    0, out1, out0
 #define	ISP_FC_OPMAP_FULL(in3, in2, in1, in0, out3, out2, out1, out0)		in3, in2, in1, in0, out3, out2, out1, out0
 static const uint32_t mbpfc[] = {
 	ISP_FC_OPMAP(0x01, 0x01),	/* 0x00: MBOX_NO_OP */
 	ISP_FC_OPMAP(0x1f, 0x01),	/* 0x01: MBOX_LOAD_RAM */
 	ISP_FC_OPMAP(0x0f, 0x01),	/* 0x02: MBOX_EXEC_FIRMWARE */
 	ISP_FC_OPMAP(0xdf, 0x01),	/* 0x03: MBOX_DUMP_RAM */
 	ISP_FC_OPMAP(0x07, 0x07),	/* 0x04: MBOX_WRITE_RAM_WORD */
 	ISP_FC_OPMAP(0x03, 0x07),	/* 0x05: MBOX_READ_RAM_WORD */
 	ISP_FC_OPMAP_FULL(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff),	/* 0x06: MBOX_MAILBOX_REG_TEST */
 	ISP_FC_OPMAP(0x07, 0x07),	/* 0x07: MBOX_VERIFY_CHECKSUM	*/
 	ISP_FC_OPMAP_FULL(0x0, 0x0, 0x0, 0x01, 0x0, 0x3, 0x80, 0x7f),	/* 0x08: MBOX_ABOUT_FIRMWARE */
 	ISP_FC_OPMAP(0xdf, 0x01),	/* 0x09: MBOX_LOAD_RISC_RAM_2100 */
 	ISP_FC_OPMAP(0xdf, 0x01),	/* 0x0a: DUMP RAM */
 	ISP_FC_OPMAP_HALF(0x1, 0xff, 0x0, 0x01),	/* 0x0b: MBOX_LOAD_RISC_RAM */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x0c: */
 	ISP_FC_OPMAP_HALF(0x1, 0x0f, 0x0, 0x01),	/* 0x0d: MBOX_WRITE_RAM_WORD_EXTENDED */
 	ISP_FC_OPMAP(0x01, 0x05),	/* 0x0e: MBOX_CHECK_FIRMWARE */
 	ISP_FC_OPMAP_HALF(0x1, 0x03, 0x0, 0x0d),	/* 0x0f: MBOX_READ_RAM_WORD_EXTENDED */
 	ISP_FC_OPMAP(0x1f, 0x11),	/* 0x10: MBOX_INIT_REQ_QUEUE */
 	ISP_FC_OPMAP(0x2f, 0x21),	/* 0x11: MBOX_INIT_RES_QUEUE */
 	ISP_FC_OPMAP(0x0f, 0x01),	/* 0x12: MBOX_EXECUTE_IOCB */
 	ISP_FC_OPMAP(0x03, 0x03),	/* 0x13: MBOX_WAKE_UP	*/
 	ISP_FC_OPMAP(0x01, 0xff),	/* 0x14: MBOX_STOP_FIRMWARE */
 	ISP_FC_OPMAP(0x4f, 0x01),	/* 0x15: MBOX_ABORT */
 	ISP_FC_OPMAP(0x07, 0x01),	/* 0x16: MBOX_ABORT_DEVICE */
 	ISP_FC_OPMAP(0x07, 0x01),	/* 0x17: MBOX_ABORT_TARGET */
 	ISP_FC_OPMAP(0x03, 0x03),	/* 0x18: MBOX_BUS_RESET */
 	ISP_FC_OPMAP(0x07, 0x05),	/* 0x19: MBOX_STOP_QUEUE */
 	ISP_FC_OPMAP(0x07, 0x05),	/* 0x1a: MBOX_START_QUEUE */
 	ISP_FC_OPMAP(0x07, 0x05),	/* 0x1b: MBOX_SINGLE_STEP_QUEUE */
 	ISP_FC_OPMAP(0x07, 0x05),	/* 0x1c: MBOX_ABORT_QUEUE */
 	ISP_FC_OPMAP(0x07, 0x03),	/* 0x1d: MBOX_GET_DEV_QUEUE_STATUS */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x1e: */
 	ISP_FC_OPMAP(0x01, 0x07),	/* 0x1f: MBOX_GET_FIRMWARE_STATUS */
 	ISP_FC_OPMAP_HALF(0x2, 0x01, 0x7e, 0xcf),	/* 0x20: MBOX_GET_LOOP_ID */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x21: */
 	ISP_FC_OPMAP(0x01, 0x07),	/* 0x22: MBOX_GET_RETRY_COUNT	*/
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x23: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x24: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x25: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x26: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x27: */
 	ISP_FC_OPMAP(0x01, 0x03),	/* 0x28: MBOX_GET_FIRMWARE_OPTIONS */
 	ISP_FC_OPMAP(0x03, 0x07),	/* 0x29: MBOX_GET_PORT_QUEUE_PARAMS */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x2a: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x2b: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x2c: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x2d: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x2e: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x2f: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x30: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x31: */
 	ISP_FC_OPMAP(0x07, 0x07),	/* 0x32: MBOX_SET_RETRY_COUNT	*/
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x33: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x34: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x35: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x36: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x37: */
 	ISP_FC_OPMAP(0x0f, 0x01),	/* 0x38: MBOX_SET_FIRMWARE_OPTIONS */
 	ISP_FC_OPMAP(0x0f, 0x07),	/* 0x39: MBOX_SET_PORT_QUEUE_PARAMS */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x3a: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x3b: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x3c: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x3d: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x3e: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x3f: */
 	ISP_FC_OPMAP(0x03, 0x01),	/* 0x40: MBOX_LOOP_PORT_BYPASS */
 	ISP_FC_OPMAP(0x03, 0x01),	/* 0x41: MBOX_LOOP_PORT_ENABLE */
 	ISP_FC_OPMAP_HALF(0x0, 0x01, 0x3, 0xcf),	/* 0x42: MBOX_GET_RESOURCE_COUNT */
 	ISP_FC_OPMAP(0x01, 0x01),	/* 0x43: MBOX_REQUEST_OFFLINE_MODE */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x44: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x45: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x46: */
 	ISP_FC_OPMAP(0xcf, 0x03),	/* 0x47: GET PORT_DATABASE ENHANCED */
 	ISP_FC_OPMAP(0xcf, 0x0f),	/* 0x48: MBOX_INIT_FIRMWARE_MULTI_ID */
 	ISP_FC_OPMAP(0xcd, 0x01),	/* 0x49: MBOX_GET_VP_DATABASE */
 	ISP_FC_OPMAP_HALF(0x2, 0xcd, 0x0, 0x01),	/* 0x4a: MBOX_GET_VP_DATABASE_ENTRY */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x4b: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x4c: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x4d: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x4e: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x4f: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x50: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x51: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x52: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x53: */
 	ISP_FC_OPMAP(0xcf, 0x01),	/* 0x54: EXECUTE IOCB A64 */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x55: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x56: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x57: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x58: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x59: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x5a: */
 	ISP_FC_OPMAP(0x03, 0x01),	/* 0x5b: MBOX_DRIVER_HEARTBEAT */
 	ISP_FC_OPMAP(0xcf, 0x01),	/* 0x5c: MBOX_FW_HEARTBEAT */
 	ISP_FC_OPMAP(0x07, 0x03),	/* 0x5d: MBOX_GET_SET_DATA_RATE */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x5e: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x5f: */
 	ISP_FC_OPMAP(0xcf, 0x0f),	/* 0x60: MBOX_INIT_FIRMWARE */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x61: */
 	ISP_FC_OPMAP(0x01, 0x01),	/* 0x62: MBOX_INIT_LIP */
 	ISP_FC_OPMAP(0xcd, 0x03),	/* 0x63: MBOX_GET_FC_AL_POSITION_MAP */
 	ISP_FC_OPMAP(0xcf, 0x01),	/* 0x64: MBOX_GET_PORT_DB */
 	ISP_FC_OPMAP(0x07, 0x01),	/* 0x65: MBOX_CLEAR_ACA */
 	ISP_FC_OPMAP(0x07, 0x01),	/* 0x66: MBOX_TARGET_RESET */
 	ISP_FC_OPMAP(0x07, 0x01),	/* 0x67: MBOX_CLEAR_TASK_SET */
 	ISP_FC_OPMAP(0x07, 0x01),	/* 0x68: MBOX_ABORT_TASK_SET */
 	ISP_FC_OPMAP(0x01, 0x07),	/* 0x69: MBOX_GET_FW_STATE */
 	ISP_FC_OPMAP_HALF(0x6, 0x03, 0x0, 0xcf),	/* 0x6a: MBOX_GET_PORT_NAME */
 	ISP_FC_OPMAP(0xcf, 0x01),	/* 0x6b: MBOX_GET_LINK_STATUS */
 	ISP_FC_OPMAP(0x0f, 0x01),	/* 0x6c: MBOX_INIT_LIP_RESET */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x6d: */
 	ISP_FC_OPMAP(0xcf, 0x03),	/* 0x6e: MBOX_SEND_SNS */
 	ISP_FC_OPMAP(0x0f, 0x07),	/* 0x6f: MBOX_FABRIC_LOGIN */
 	ISP_FC_OPMAP(0x03, 0x01),	/* 0x70: MBOX_SEND_CHANGE_REQUEST */
 	ISP_FC_OPMAP(0x03, 0x03),	/* 0x71: MBOX_FABRIC_LOGOUT */
 	ISP_FC_OPMAP(0x0f, 0x0f),	/* 0x72: MBOX_INIT_LIP_LOGIN */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x73: */
 	ISP_FC_OPMAP(0x07, 0x01),	/* 0x74: LOGIN LOOP PORT */
 	ISP_FC_OPMAP_HALF(0x03, 0xcf, 0x00, 0x07),	/* 0x75: GET PORT/NODE NAME LIST */
 	ISP_FC_OPMAP(0x4f, 0x01),	/* 0x76: SET VENDOR ID */
 	ISP_FC_OPMAP(0xcd, 0x01),	/* 0x77: INITIALIZE IP MAILBOX */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x78: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x79: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x7a: */
 	ISP_FC_OPMAP(0x00, 0x00),	/* 0x7b: */
 	ISP_FC_OPMAP_HALF(0x03, 0x4f, 0x00, 0x07),	/* 0x7c: Get ID List */
 	ISP_FC_OPMAP(0xcf, 0x01),	/* 0x7d: SEND LFA */
 	ISP_FC_OPMAP(0x0f, 0x01)	/* 0x7e: LUN RESET */
 };
 #define	MAX_FC_OPCODE	0x7e
 /*
  * Footnotes
  *
  * (1): this sets bits 21..16 in mailbox register #8, which we nominally
  *	do not access at this time in the core driver. The caller is
  *	responsible for setting this register first (Gross!). The assumption
  *	is that we won't overflow.
  */
 
 static const char *fc_mbcmd_names[] = {
 	"NO-OP",
 	"LOAD RAM",
 	"EXEC FIRMWARE",
 	"DUMP RAM",
 	"WRITE RAM WORD",
 	"READ RAM WORD",
 	"MAILBOX REG TEST",
 	"VERIFY CHECKSUM",
 	"ABOUT FIRMWARE",
 	"LOAD RAM (2100)",
 	"DUMP RAM",
 	"LOAD RISC RAM",
 	NULL,
 	"WRITE RAM WORD EXTENDED",
 	"CHECK FIRMWARE",
 	"READ RAM WORD EXTENDED",
 	"INIT REQUEST QUEUE",
 	"INIT RESULT QUEUE",
 	"EXECUTE IOCB",
 	"WAKE UP",
 	"STOP FIRMWARE",
 	"ABORT",
 	"ABORT DEVICE",
 	"ABORT TARGET",
 	"BUS RESET",
 	"STOP QUEUE",
 	"START QUEUE",
 	"SINGLE STEP QUEUE",
 	"ABORT QUEUE",
 	"GET DEV QUEUE STATUS",
 	NULL,
 	"GET FIRMWARE STATUS",
 	"GET LOOP ID",
 	NULL,
 	"GET RETRY COUNT",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"GET FIRMWARE OPTIONS",
 	"GET PORT QUEUE PARAMS",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"SET RETRY COUNT",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"SET FIRMWARE OPTIONS",
 	"SET PORT QUEUE PARAMS",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"LOOP PORT BYPASS",
 	"LOOP PORT ENABLE",
 	"GET RESOURCE COUNT",
 	"REQUEST NON PARTICIPATING MODE",
 	NULL,
 	NULL,
 	NULL,
 	"GET PORT DATABASE ENHANCED",
 	"INIT FIRMWARE MULTI ID",
 	"GET VP DATABASE",
 	"GET VP DATABASE ENTRY",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"EXECUTE IOCB A64",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"DRIVER HEARTBEAT",
 	NULL,
 	"GET/SET DATA RATE",
 	NULL,
 	NULL,
 	"INIT FIRMWARE",
 	NULL,
 	"INIT LIP",
 	"GET FC-AL POSITION MAP",
 	"GET PORT DATABASE",
 	"CLEAR ACA",
 	"TARGET RESET",
 	"CLEAR TASK SET",
 	"ABORT TASK SET",
 	"GET FW STATE",
 	"GET PORT NAME",
 	"GET LINK STATUS",
 	"INIT LIP RESET",
 	NULL,
 	"SEND SNS",
 	"FABRIC LOGIN",
 	"SEND CHANGE REQUEST",
 	"FABRIC LOGOUT",
 	"INIT LIP LOGIN",
 	NULL,
 	"LOGIN LOOP PORT",
 	"GET PORT/NODE NAME LIST",
 	"SET VENDOR ID",
 	"INITIALIZE IP MAILBOX",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"Get ID List",
 	"SEND LFA",
 	"Lun RESET"
 };
 
 static void
 isp_mboxcmd_qnw(ispsoftc_t *isp, mbreg_t *mbp, int nodelay)
 {
 	unsigned int ibits, obits, box, opcode;
 
 	opcode = mbp->param[0];
 	if (IS_FC(isp)) {
 		ibits = ISP_FC_IBITS(opcode);
 		obits = ISP_FC_OBITS(opcode);
 	} else {
 		ibits = ISP_SCSI_IBITS(opcode);
 		obits = ISP_SCSI_OBITS(opcode);
 	}
 	ibits |= mbp->ibits;
 	obits |= mbp->obits;
 	for (box = 0; box < ISP_NMBOX(isp); box++) {
 		if (ibits & (1 << box)) {
 			ISP_WRITE(isp, MBOX_OFF(box), mbp->param[box]);
 		}
 		if (nodelay == 0) {
 			isp->isp_mboxtmp[box] = mbp->param[box] = 0;
 		}
 	}
 	if (nodelay == 0) {
 		isp->isp_lastmbxcmd = opcode;
 		isp->isp_obits = obits;
 		isp->isp_mboxbsy = 1;
 	}
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_SET_HOST_INT);
 	} else {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_SET_HOST_INT);
 	}
 	/*
 	 * Oddly enough, if we're not delaying for an answer,
 	 * delay a bit to give the f/w a chance to pick up the
 	 * command.
 	 */
 	if (nodelay) {
 		ISP_DELAY(1000);
 	}
 }
 
 static void
 isp_mboxcmd(ispsoftc_t *isp, mbreg_t *mbp)
 {
 	const char *cname, *xname, *sname;
 	char tname[16], mname[16];
 	unsigned int ibits, obits, box, opcode;
 
 	opcode = mbp->param[0];
 	if (IS_FC(isp)) {
 		if (opcode > MAX_FC_OPCODE) {
 			mbp->param[0] = MBOX_INVALID_COMMAND;
 			isp_prt(isp, ISP_LOGERR, "Unknown Command 0x%x", opcode);
 			return;
 		}
 		cname = fc_mbcmd_names[opcode];
 		ibits = ISP_FC_IBITS(opcode);
 		obits = ISP_FC_OBITS(opcode);
 	} else {
 		if (opcode > MAX_SCSI_OPCODE) {
 			mbp->param[0] = MBOX_INVALID_COMMAND;
 			isp_prt(isp, ISP_LOGERR, "Unknown Command 0x%x", opcode);
 			return;
 		}
 		cname = scsi_mbcmd_names[opcode];
 		ibits = ISP_SCSI_IBITS(opcode);
 		obits = ISP_SCSI_OBITS(opcode);
 	}
 	if (cname == NULL) {
 		cname = tname;
 		ISP_SNPRINTF(tname, sizeof tname, "opcode %x", opcode);
 	}
 	isp_prt(isp, ISP_LOGDEBUG3, "Mailbox Command '%s'", cname);
 
 	/*
 	 * Pick up any additional bits that the caller might have set.
 	 */
 	ibits |= mbp->ibits;
 	obits |= mbp->obits;
 
 	/*
 	 * Mask any bits that the caller wants us to mask
 	 */
 	ibits &= mbp->ibitm;
 	obits &= mbp->obitm;
 
 
 	if (ibits == 0 && obits == 0) {
 		mbp->param[0] = MBOX_COMMAND_PARAM_ERROR;
 		isp_prt(isp, ISP_LOGERR, "no parameters for 0x%x", opcode);
 		return;
 	}
 
 	/*
 	 * Get exclusive usage of mailbox registers.
 	 */
 	if (MBOX_ACQUIRE(isp)) {
 		mbp->param[0] = MBOX_REGS_BUSY;
 		goto out;
 	}
 
 	for (box = 0; box < ISP_NMBOX(isp); box++) {
 		if (ibits & (1 << box)) {
 			isp_prt(isp, ISP_LOGDEBUG3, "IN mbox %d = 0x%04x", box,
 			    mbp->param[box]);
 			ISP_WRITE(isp, MBOX_OFF(box), mbp->param[box]);
 		}
 		isp->isp_mboxtmp[box] = mbp->param[box] = 0;
 	}
 
 	isp->isp_lastmbxcmd = opcode;
 
 	/*
 	 * We assume that we can't overwrite a previous command.
 	 */
 	isp->isp_obits = obits;
 	isp->isp_mboxbsy = 1;
 
 	/*
 	 * Set Host Interrupt condition so that RISC will pick up mailbox regs.
 	 */
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_SET_HOST_INT);
 	} else {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_SET_HOST_INT);
 	}
 
 	/*
 	 * While we haven't finished the command, spin our wheels here.
 	 */
 	MBOX_WAIT_COMPLETE(isp, mbp);
 
 	/*
 	 * Did the command time out?
 	 */
 	if (mbp->param[0] == MBOX_TIMEOUT) {
 		isp->isp_mboxbsy = 0;
 		MBOX_RELEASE(isp);
 		goto out;
 	}
 
 	/*
 	 * Copy back output registers.
 	 */
 	for (box = 0; box < ISP_NMBOX(isp); box++) {
 		if (obits & (1 << box)) {
 			mbp->param[box] = isp->isp_mboxtmp[box];
 			isp_prt(isp, ISP_LOGDEBUG3, "OUT mbox %d = 0x%04x", box,
 			    mbp->param[box]);
 		}
 	}
 
 	isp->isp_mboxbsy = 0;
 	MBOX_RELEASE(isp);
 out:
 	if (mbp->logval == 0 || mbp->param[0] == MBOX_COMMAND_COMPLETE)
 		return;
 
 	if ((mbp->param[0] & 0xbfe0) == 0 &&
 	    (mbp->logval & MBLOGMASK(mbp->param[0])) == 0)
 		return;
 
 	xname = NULL;
 	sname = "";
 	switch (mbp->param[0]) {
 	case MBOX_INVALID_COMMAND:
 		xname = "INVALID COMMAND";
 		break;
 	case MBOX_HOST_INTERFACE_ERROR:
 		xname = "HOST INTERFACE ERROR";
 		break;
 	case MBOX_TEST_FAILED:
 		xname = "TEST FAILED";
 		break;
 	case MBOX_COMMAND_ERROR:
 		xname = "COMMAND ERROR";
 		ISP_SNPRINTF(mname, sizeof(mname), " subcode 0x%x",
 		    mbp->param[1]);
 		sname = mname;
 		break;
 	case MBOX_COMMAND_PARAM_ERROR:
 		xname = "COMMAND PARAMETER ERROR";
 		break;
 	case MBOX_PORT_ID_USED:
 		xname = "PORT ID ALREADY IN USE";
 		break;
 	case MBOX_LOOP_ID_USED:
 		xname = "LOOP ID ALREADY IN USE";
 		break;
 	case MBOX_ALL_IDS_USED:
 		xname = "ALL LOOP IDS IN USE";
 		break;
 	case MBOX_NOT_LOGGED_IN:
 		xname = "NOT LOGGED IN";
 		break;
 	case MBOX_LINK_DOWN_ERROR:
 		xname = "LINK DOWN ERROR";
 		break;
 	case MBOX_LOOPBACK_ERROR:
 		xname = "LOOPBACK ERROR";
 		break;
 	case MBOX_CHECKSUM_ERROR:
 		xname = "CHECKSUM ERROR";
 		break;
 	case MBOX_INVALID_PRODUCT_KEY:
 		xname = "INVALID PRODUCT KEY";
 		break;
 	case MBOX_REGS_BUSY:
 		xname = "REGISTERS BUSY";
 		break;
 	case MBOX_TIMEOUT:
 		xname = "TIMEOUT";
 		break;
 	default:
 		ISP_SNPRINTF(mname, sizeof mname, "error 0x%x", mbp->param[0]);
 		xname = mname;
 		break;
 	}
 	if (xname) {
 		isp_prt(isp, ISP_LOGALL, "Mailbox Command '%s' failed (%s%s)",
 		    cname, xname, sname);
 	}
 }
 
 static void
 isp_fw_state(ispsoftc_t *isp, int chan)
 {
 	if (IS_FC(isp)) {
 		mbreg_t mbs;
 		fcparam *fcp = FCPARAM(isp, chan);
 
 		MBSINIT(&mbs, MBOX_GET_FW_STATE, MBLOGALL, 0);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] == MBOX_COMMAND_COMPLETE) {
 			fcp->isp_fwstate = mbs.param[1];
 		}
 	}
 }
 
 static void
 isp_spi_update(ispsoftc_t *isp, int chan)
 {
 	int tgt;
 	mbreg_t mbs;
 	sdparam *sdp;
 
 	if (IS_FC(isp)) {
 		/*
 		 * There are no 'per-bus' settings for Fibre Channel.
 		 */
 		return;
 	}
 	sdp = SDPARAM(isp, chan);
 	sdp->update = 0;
 
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		uint16_t flags, period, offset;
 		int get;
 
 		if (sdp->isp_devparam[tgt].dev_enable == 0) {
 			sdp->isp_devparam[tgt].dev_update = 0;
 			sdp->isp_devparam[tgt].dev_refresh = 0;
 			isp_prt(isp, ISP_LOGDEBUG0, "skipping target %d bus %d update", tgt, chan);
 			continue;
 		}
 		/*
 		 * If the goal is to update the status of the device,
 		 * take what's in goal_flags and try and set the device
 		 * toward that. Otherwise, if we're just refreshing the
 		 * current device state, get the current parameters.
 		 */
 
 		MBSINIT(&mbs, 0, MBLOGALL, 0);
 
 		/*
 		 * Refresh overrides set
 		 */
 		if (sdp->isp_devparam[tgt].dev_refresh) {
 			mbs.param[0] = MBOX_GET_TARGET_PARAMS;
 			get = 1;
 		} else if (sdp->isp_devparam[tgt].dev_update) {
 			mbs.param[0] = MBOX_SET_TARGET_PARAMS;
 
 			/*
 			 * Make sure goal_flags has "Renegotiate on Error"
 			 * on and "Freeze Queue on Error" off.
 			 */
 			sdp->isp_devparam[tgt].goal_flags |= DPARM_RENEG;
 			sdp->isp_devparam[tgt].goal_flags &= ~DPARM_QFRZ;
 			mbs.param[2] = sdp->isp_devparam[tgt].goal_flags;
 
 			/*
 			 * Insist that PARITY must be enabled
 			 * if SYNC or WIDE is enabled.
 			 */
 			if ((mbs.param[2] & (DPARM_SYNC|DPARM_WIDE)) != 0) {
 				mbs.param[2] |= DPARM_PARITY;
 			}
 
 			if (mbs.param[2] & DPARM_SYNC) {
 				mbs.param[3] =
 				    (sdp->isp_devparam[tgt].goal_offset << 8) |
 				    (sdp->isp_devparam[tgt].goal_period);
 			}
 			/*
 			 * A command completion later that has
 			 * RQSTF_NEGOTIATION set can cause
 			 * the dev_refresh/announce cycle also.
 			 *
 			 * Note: It is really important to update our current
 			 * flags with at least the state of TAG capabilities-
 			 * otherwise we might try and send a tagged command
 			 * when we have it all turned off. So change it here
 			 * to say that current already matches goal.
 			 */
 			sdp->isp_devparam[tgt].actv_flags &= ~DPARM_TQING;
 			sdp->isp_devparam[tgt].actv_flags |=
 			    (sdp->isp_devparam[tgt].goal_flags & DPARM_TQING);
 			isp_prt(isp, ISP_LOGDEBUG0, "bus %d set tgt %d flags 0x%x off 0x%x period 0x%x",
 			    chan, tgt, mbs.param[2], mbs.param[3] >> 8, mbs.param[3] & 0xff);
 			get = 0;
 		} else {
 			continue;
 		}
 		mbs.param[1] = (chan << 15) | (tgt << 8);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			continue;
 		}
 		if (get == 0) {
 			sdp->sendmarker = 1;
 			sdp->isp_devparam[tgt].dev_update = 0;
 			sdp->isp_devparam[tgt].dev_refresh = 1;
 		} else {
 			sdp->isp_devparam[tgt].dev_refresh = 0;
 			flags = mbs.param[2];
 			period = mbs.param[3] & 0xff;
 			offset = mbs.param[3] >> 8;
 			sdp->isp_devparam[tgt].actv_flags = flags;
 			sdp->isp_devparam[tgt].actv_period = period;
 			sdp->isp_devparam[tgt].actv_offset = offset;
 			isp_async(isp, ISPASYNC_NEW_TGT_PARAMS, chan, tgt);
 		}
 	}
 
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		if (sdp->isp_devparam[tgt].dev_update ||
 		    sdp->isp_devparam[tgt].dev_refresh) {
 			sdp->update = 1;
 			break;
 		}
 	}
 }
 
 static void
 isp_setdfltsdparm(ispsoftc_t *isp)
 {
 	int tgt;
 	sdparam *sdp, *sdp1;
 
 	sdp = SDPARAM(isp, 0);
 	sdp->role = GET_DEFAULT_ROLE(isp, 0);
 	if (IS_DUALBUS(isp)) {
 		sdp1 = sdp + 1;
 		sdp1->role = GET_DEFAULT_ROLE(isp, 1);
 	} else {
 		sdp1 = NULL;
 	}
 
 	/*
 	 * Establish some default parameters.
 	 */
 	sdp->isp_cmd_dma_burst_enable = 0;
 	sdp->isp_data_dma_burst_enabl = 1;
 	sdp->isp_fifo_threshold = 0;
 	sdp->isp_initiator_id = DEFAULT_IID(isp, 0);
 	if (isp->isp_type >= ISP_HA_SCSI_1040) {
 		sdp->isp_async_data_setup = 9;
 	} else {
 		sdp->isp_async_data_setup = 6;
 	}
 	sdp->isp_selection_timeout = 250;
 	sdp->isp_max_queue_depth = MAXISPREQUEST(isp);
 	sdp->isp_tag_aging = 8;
 	sdp->isp_bus_reset_delay = 5;
 	/*
 	 * Don't retry selection, busy or queue full automatically- reflect
 	 * these back to us.
 	 */
 	sdp->isp_retry_count = 0;
 	sdp->isp_retry_delay = 0;
 
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		sdp->isp_devparam[tgt].exc_throttle = ISP_EXEC_THROTTLE;
 		sdp->isp_devparam[tgt].dev_enable = 1;
 	}
 
 	/*
 	 * The trick here is to establish a default for the default (honk!)
 	 * state (goal_flags). Then try and get the current status from
 	 * the card to fill in the current state. We don't, in fact, set
 	 * the default to the SAFE default state- that's not the goal state.
 	 */
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		uint8_t off, per;
 		sdp->isp_devparam[tgt].actv_offset = 0;
 		sdp->isp_devparam[tgt].actv_period = 0;
 		sdp->isp_devparam[tgt].actv_flags = 0;
 
 		sdp->isp_devparam[tgt].goal_flags =
 		    sdp->isp_devparam[tgt].nvrm_flags = DPARM_DEFAULT;
 
 		/*
 		 * We default to Wide/Fast for versions less than a 1040
 		 * (unless it's SBus).
 		 */
 		if (IS_ULTRA3(isp)) {
 			off = ISP_80M_SYNCPARMS >> 8;
 			per = ISP_80M_SYNCPARMS & 0xff;
 		} else if (IS_ULTRA2(isp)) {
 			off = ISP_40M_SYNCPARMS >> 8;
 			per = ISP_40M_SYNCPARMS & 0xff;
 		} else if (IS_1240(isp)) {
 			off = ISP_20M_SYNCPARMS >> 8;
 			per = ISP_20M_SYNCPARMS & 0xff;
 		} else if ((isp->isp_bustype == ISP_BT_SBUS &&
 		    isp->isp_type < ISP_HA_SCSI_1020A) ||
 		    (isp->isp_bustype == ISP_BT_PCI &&
 		    isp->isp_type < ISP_HA_SCSI_1040) ||
 		    (isp->isp_clock && isp->isp_clock < 60) ||
 		    (sdp->isp_ultramode == 0)) {
 			off = ISP_10M_SYNCPARMS >> 8;
 			per = ISP_10M_SYNCPARMS & 0xff;
 		} else {
 			off = ISP_20M_SYNCPARMS_1040 >> 8;
 			per = ISP_20M_SYNCPARMS_1040 & 0xff;
 		}
 		sdp->isp_devparam[tgt].goal_offset =
 		    sdp->isp_devparam[tgt].nvrm_offset = off;
 		sdp->isp_devparam[tgt].goal_period =
 		    sdp->isp_devparam[tgt].nvrm_period = per;
 
 	}
 
 	/*
 	 * If we're a dual bus card, just copy the data over
 	 */
 	if (sdp1) {
 		*sdp1 = *sdp;
 		sdp1->isp_initiator_id = DEFAULT_IID(isp, 1);
 	}
 
 	/*
 	 * If we've not been told to avoid reading NVRAM, try and read it.
 	 * If we're successful reading it, we can then return because NVRAM
 	 * will tell us what the desired settings are. Otherwise, we establish
 	 * some reasonable 'fake' nvram and goal defaults.
 	 */
 	if ((isp->isp_confopts & ISP_CFG_NONVRAM) == 0) {
 		mbreg_t mbs;
 
 		if (isp_read_nvram(isp, 0) == 0) {
 			if (IS_DUALBUS(isp)) {
 				if (isp_read_nvram(isp, 1) == 0) {
 					return;
 				}
 			}
 		}
 		MBSINIT(&mbs, MBOX_GET_ACT_NEG_STATE, MBLOGNONE, 0);
 		isp_mboxcmd(isp, &mbs);
 		if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			sdp->isp_req_ack_active_neg = 1;
 			sdp->isp_data_line_active_neg = 1;
 			if (sdp1) {
 				sdp1->isp_req_ack_active_neg = 1;
 				sdp1->isp_data_line_active_neg = 1;
 			}
 		} else {
 			sdp->isp_req_ack_active_neg =
 			    (mbs.param[1] >> 4) & 0x1;
 			sdp->isp_data_line_active_neg =
 			    (mbs.param[1] >> 5) & 0x1;
 			if (sdp1) {
 				sdp1->isp_req_ack_active_neg =
 				    (mbs.param[2] >> 4) & 0x1;
 				sdp1->isp_data_line_active_neg =
 				    (mbs.param[2] >> 5) & 0x1;
 			}
 		}
 	}
 
 }
 
 static void
 isp_setdfltfcparm(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 
 	/*
 	 * Establish some default parameters.
 	 */
 	fcp->role = GET_DEFAULT_ROLE(isp, chan);
 	fcp->isp_maxalloc = ICB_DFLT_ALLOC;
 	fcp->isp_retry_delay = ICB_DFLT_RDELAY;
 	fcp->isp_retry_count = ICB_DFLT_RCOUNT;
 	fcp->isp_loopid = DEFAULT_LOOPID(isp, chan);
 	fcp->isp_wwnn_nvram = DEFAULT_NODEWWN(isp, chan);
 	fcp->isp_wwpn_nvram = DEFAULT_PORTWWN(isp, chan);
 	fcp->isp_fwoptions = 0;
 	fcp->isp_lasthdl = NIL_HANDLE;
 
 	if (IS_24XX(isp)) {
 		fcp->isp_fwoptions |= ICB2400_OPT1_FAIRNESS;
 		fcp->isp_fwoptions |= ICB2400_OPT1_HARD_ADDRESS;
 		if (isp->isp_confopts & ISP_CFG_FULL_DUPLEX) {
 			fcp->isp_fwoptions |= ICB2400_OPT1_FULL_DUPLEX;
 		}
 		fcp->isp_fwoptions |= ICB2400_OPT1_BOTH_WWNS;
 	} else {
 		fcp->isp_fwoptions |= ICBOPT_FAIRNESS;
 		fcp->isp_fwoptions |= ICBOPT_PDBCHANGE_AE;
 		fcp->isp_fwoptions |= ICBOPT_HARD_ADDRESS;
 		if (isp->isp_confopts & ISP_CFG_FULL_DUPLEX) {
 			fcp->isp_fwoptions |= ICBOPT_FULL_DUPLEX;
 		}
 		/*
 		 * Make sure this is turned off now until we get
 		 * extended options from NVRAM
 		 */
 		fcp->isp_fwoptions &= ~ICBOPT_EXTENDED;
 	}
 
 
 	/*
 	 * Now try and read NVRAM unless told to not do so.
 	 * This will set fcparam's isp_wwnn_nvram && isp_wwpn_nvram.
 	 */
 	if ((isp->isp_confopts & ISP_CFG_NONVRAM) == 0) {
 		int i, j = 0;
 		/*
 		 * Give a couple of tries at reading NVRAM.
 		 */
 		for (i = 0; i < 2; i++) {
 			j = isp_read_nvram(isp, chan);
 			if (j == 0) {
 				break;
 			}
 		}
 		if (j) {
 			isp->isp_confopts |= ISP_CFG_NONVRAM;
 		}
 	}
 
 	fcp->isp_wwnn = ACTIVE_NODEWWN(isp, chan);
 	fcp->isp_wwpn = ACTIVE_PORTWWN(isp, chan);
 	isp_prt(isp, ISP_LOGCONFIG, "Chan %d 0x%08x%08x/0x%08x%08x Role %s",
 	    chan, (uint32_t) (fcp->isp_wwnn >> 32), (uint32_t) (fcp->isp_wwnn),
 	    (uint32_t) (fcp->isp_wwpn >> 32), (uint32_t) (fcp->isp_wwpn),
 	    isp_class3_roles[fcp->role]);
 }
 
 /*
  * Re-initialize the ISP and complete all orphaned commands
  * with a 'botched' notice. The reset/init routines should
  * not disturb an already active list of commands.
  */
 
 int
 isp_reinit(ispsoftc_t *isp, int do_load_defaults)
 {
 	int i, res = 0;
 
 	if (isp->isp_state != ISP_RESETSTATE)
 		isp_reset(isp, do_load_defaults);
 	if (isp->isp_state != ISP_RESETSTATE) {
 		res = EIO;
 		isp_prt(isp, ISP_LOGERR, "%s: cannot reset card", __func__);
 		ISP_DISABLE_INTS(isp);
 		goto cleanup;
 	}
 
 	isp_init(isp);
 	if (isp->isp_state > ISP_RESETSTATE &&
 	    isp->isp_state != ISP_RUNSTATE) {
 		res = EIO;
 		isp_prt(isp, ISP_LOGERR, "%s: cannot init card", __func__);
 		ISP_DISABLE_INTS(isp);
 		if (IS_FC(isp)) {
 			/*
 			 * If we're in ISP_ROLE_NONE, turn off the lasers.
 			 */
 			if (!IS_24XX(isp)) {
 				ISP_WRITE(isp, BIU2100_CSR, BIU2100_FPM0_REGS);
 				ISP_WRITE(isp, FPM_DIAG_CONFIG, FPM_SOFT_RESET);
 				ISP_WRITE(isp, BIU2100_CSR, BIU2100_FB_REGS);
 				ISP_WRITE(isp, FBM_CMD, FBMCMD_FIFO_RESET_ALL);
 				ISP_WRITE(isp, BIU2100_CSR, BIU2100_RISC_REGS);
 			}
 		}
 	}
 
  cleanup:
 	isp->isp_nactive = 0;
 	isp_clear_commands(isp);
 	if (IS_FC(isp)) {
 		for (i = 0; i < isp->isp_nchan; i++)
 			ISP_MARK_PORTDB(isp, i, -1);
 	}
 	return (res);
 }
 
 /*
  * NVRAM Routines
  */
 static int
 isp_read_nvram(ispsoftc_t *isp, int bus)
 {
 	int i, amt, retval;
 	uint8_t csum, minversion;
 	union {
 		uint8_t _x[ISP2400_NVRAM_SIZE];
 		uint16_t _s[ISP2400_NVRAM_SIZE>>1];
 	} _n;
 #define	nvram_data	_n._x
 #define	nvram_words	_n._s
 
 	if (IS_24XX(isp)) {
 		return (isp_read_nvram_2400(isp, nvram_data));
 	} else if (IS_FC(isp)) {
 		amt = ISP2100_NVRAM_SIZE;
 		minversion = 1;
 	} else if (IS_ULTRA2(isp)) {
 		amt = ISP1080_NVRAM_SIZE;
 		minversion = 0;
 	} else {
 		amt = ISP_NVRAM_SIZE;
 		minversion = 2;
 	}
 
 	for (i = 0; i < amt>>1; i++) {
 		isp_rdnvram_word(isp, i, &nvram_words[i]);
 	}
 
 	if (nvram_data[0] != 'I' || nvram_data[1] != 'S' ||
 	    nvram_data[2] != 'P') {
 		if (isp->isp_bustype != ISP_BT_SBUS) {
 			isp_prt(isp, ISP_LOGWARN, "invalid NVRAM header");
 			isp_prt(isp, ISP_LOGDEBUG0, "%x %x %x", nvram_data[0], nvram_data[1], nvram_data[2]);
 		}
 		retval = -1;
 		goto out;
 	}
 
 	for (csum = 0, i = 0; i < amt; i++) {
 		csum += nvram_data[i];
 	}
 	if (csum != 0) {
 		isp_prt(isp, ISP_LOGWARN, "invalid NVRAM checksum");
 		retval = -1;
 		goto out;
 	}
 
 	if (ISP_NVRAM_VERSION(nvram_data) < minversion) {
 		isp_prt(isp, ISP_LOGWARN, "version %d NVRAM not understood",
 		    ISP_NVRAM_VERSION(nvram_data));
 		retval = -1;
 		goto out;
 	}
 
 	if (IS_ULTRA3(isp)) {
 		isp_parse_nvram_12160(isp, bus, nvram_data);
 	} else if (IS_1080(isp)) {
 		isp_parse_nvram_1080(isp, bus, nvram_data);
 	} else if (IS_1280(isp) || IS_1240(isp)) {
 		isp_parse_nvram_1080(isp, bus, nvram_data);
 	} else if (IS_SCSI(isp)) {
 		isp_parse_nvram_1020(isp, nvram_data);
 	} else {
 		isp_parse_nvram_2100(isp, nvram_data);
 	}
 	retval = 0;
 out:
 	return (retval);
 #undef	nvram_data
 #undef	nvram_words
 }
 
 static int
 isp_read_nvram_2400(ispsoftc_t *isp, uint8_t *nvram_data)
 {
 	int retval = 0;
 	uint32_t addr, csum, lwrds, *dptr;
 
 	if (isp->isp_port) {
 		addr = ISP2400_NVRAM_PORT1_ADDR;
 	} else {
 		addr = ISP2400_NVRAM_PORT0_ADDR;
 	}
 
 	dptr = (uint32_t *) nvram_data;
 	for (lwrds = 0; lwrds < ISP2400_NVRAM_SIZE >> 2; lwrds++) {
 		isp_rd_2400_nvram(isp, addr++, dptr++);
 	}
 	if (nvram_data[0] != 'I' || nvram_data[1] != 'S' ||
 	    nvram_data[2] != 'P') {
 		isp_prt(isp, ISP_LOGWARN, "invalid NVRAM header (%x %x %x)",
 		    nvram_data[0], nvram_data[1], nvram_data[2]);
 		retval = -1;
 		goto out;
 	}
 	dptr = (uint32_t *) nvram_data;
 	for (csum = 0, lwrds = 0; lwrds < ISP2400_NVRAM_SIZE >> 2; lwrds++) {
 		uint32_t tmp;
 		ISP_IOXGET_32(isp, &dptr[lwrds], tmp);
 		csum += tmp;
 	}
 	if (csum != 0) {
 		isp_prt(isp, ISP_LOGWARN, "invalid NVRAM checksum");
 		retval = -1;
 		goto out;
 	}
 	isp_parse_nvram_2400(isp, nvram_data);
 out:
 	return (retval);
 }
 
 static void
 isp_rdnvram_word(ispsoftc_t *isp, int wo, uint16_t *rp)
 {
 	int i, cbits;
 	uint16_t bit, rqst, junk;
 
 	ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT);
 	ISP_DELAY(10);
 	ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT|BIU_NVRAM_CLOCK);
 	ISP_DELAY(10);
 
 	if (IS_FC(isp)) {
 		wo &= ((ISP2100_NVRAM_SIZE >> 1) - 1);
 		if (IS_2312(isp) && isp->isp_port) {
 			wo += 128;
 		}
 		rqst = (ISP_NVRAM_READ << 8) | wo;
 		cbits = 10;
 	} else if (IS_ULTRA2(isp)) {
 		wo &= ((ISP1080_NVRAM_SIZE >> 1) - 1);
 		rqst = (ISP_NVRAM_READ << 8) | wo;
 		cbits = 10;
 	} else {
 		wo &= ((ISP_NVRAM_SIZE >> 1) - 1);
 		rqst = (ISP_NVRAM_READ << 6) | wo;
 		cbits = 8;
 	}
 
 	/*
 	 * Clock the word select request out...
 	 */
 	for (i = cbits; i >= 0; i--) {
 		if ((rqst >> i) & 1) {
 			bit = BIU_NVRAM_SELECT | BIU_NVRAM_DATAOUT;
 		} else {
 			bit = BIU_NVRAM_SELECT;
 		}
 		ISP_WRITE(isp, BIU_NVRAM, bit);
 		ISP_DELAY(10);
 		junk = ISP_READ(isp, BIU_NVRAM);	/* force PCI flush */
 		ISP_WRITE(isp, BIU_NVRAM, bit | BIU_NVRAM_CLOCK);
 		ISP_DELAY(10);
 		junk = ISP_READ(isp, BIU_NVRAM);	/* force PCI flush */
 		ISP_WRITE(isp, BIU_NVRAM, bit);
 		ISP_DELAY(10);
 		junk = ISP_READ(isp, BIU_NVRAM);	/* force PCI flush */
 	}
 	/*
 	 * Now read the result back in (bits come back in MSB format).
 	 */
 	*rp = 0;
 	for (i = 0; i < 16; i++) {
 		uint16_t rv;
 		*rp <<= 1;
 		ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT|BIU_NVRAM_CLOCK);
 		ISP_DELAY(10);
 		rv = ISP_READ(isp, BIU_NVRAM);
 		if (rv & BIU_NVRAM_DATAIN) {
 			*rp |= 1;
 		}
 		ISP_DELAY(10);
 		ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT);
 		ISP_DELAY(10);
 		junk = ISP_READ(isp, BIU_NVRAM);	/* force PCI flush */
 	}
 	ISP_WRITE(isp, BIU_NVRAM, 0);
 	ISP_DELAY(10);
 	junk = ISP_READ(isp, BIU_NVRAM);	/* force PCI flush */
 	ISP_SWIZZLE_NVRAM_WORD(isp, rp);
 }
 
 static void
 isp_rd_2400_nvram(ispsoftc_t *isp, uint32_t addr, uint32_t *rp)
 {
 	int loops = 0;
 	uint32_t base = 0x7ffe0000;
 	uint32_t tmp = 0;
 
 	if (IS_25XX(isp)) {
 		base = 0x7ff00000 | 0x48000;
 	}
 	ISP_WRITE(isp, BIU2400_FLASH_ADDR, base | addr);
 	for (loops = 0; loops < 5000; loops++) {
 		ISP_DELAY(10);
 		tmp = ISP_READ(isp, BIU2400_FLASH_ADDR);
 		if ((tmp & (1U << 31)) != 0) {
 			break;
 		}
 	}
 	if (tmp & (1U << 31)) {
 		*rp = ISP_READ(isp, BIU2400_FLASH_DATA);
 		ISP_SWIZZLE_NVRAM_LONG(isp, rp);
 	} else {
 		*rp = 0xffffffff;
 	}
 }
 
 static void
 isp_parse_nvram_1020(ispsoftc_t *isp, uint8_t *nvram_data)
 {
 	sdparam *sdp = SDPARAM(isp, 0);
 	int tgt;
 
 	sdp->isp_fifo_threshold =
 		ISP_NVRAM_FIFO_THRESHOLD(nvram_data) |
 		(ISP_NVRAM_FIFO_THRESHOLD_128(nvram_data) << 2);
 
 	if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0)
 		sdp->isp_initiator_id = ISP_NVRAM_INITIATOR_ID(nvram_data);
 
 	sdp->isp_bus_reset_delay =
 		ISP_NVRAM_BUS_RESET_DELAY(nvram_data);
 
 	sdp->isp_retry_count =
 		ISP_NVRAM_BUS_RETRY_COUNT(nvram_data);
 
 	sdp->isp_retry_delay =
 		ISP_NVRAM_BUS_RETRY_DELAY(nvram_data);
 
 	sdp->isp_async_data_setup =
 		ISP_NVRAM_ASYNC_DATA_SETUP_TIME(nvram_data);
 
 	if (isp->isp_type >= ISP_HA_SCSI_1040) {
 		if (sdp->isp_async_data_setup < 9) {
 			sdp->isp_async_data_setup = 9;
 		}
 	} else {
 		if (sdp->isp_async_data_setup != 6) {
 			sdp->isp_async_data_setup = 6;
 		}
 	}
 
 	sdp->isp_req_ack_active_neg =
 		ISP_NVRAM_REQ_ACK_ACTIVE_NEGATION(nvram_data);
 
 	sdp->isp_data_line_active_neg =
 		ISP_NVRAM_DATA_LINE_ACTIVE_NEGATION(nvram_data);
 
 	sdp->isp_data_dma_burst_enabl =
 		ISP_NVRAM_DATA_DMA_BURST_ENABLE(nvram_data);
 
 	sdp->isp_cmd_dma_burst_enable =
 		ISP_NVRAM_CMD_DMA_BURST_ENABLE(nvram_data);
 
 	sdp->isp_tag_aging =
 		ISP_NVRAM_TAG_AGE_LIMIT(nvram_data);
 
 	sdp->isp_selection_timeout =
 		ISP_NVRAM_SELECTION_TIMEOUT(nvram_data);
 
 	sdp->isp_max_queue_depth =
 		ISP_NVRAM_MAX_QUEUE_DEPTH(nvram_data);
 
 	sdp->isp_fast_mttr = ISP_NVRAM_FAST_MTTR_ENABLE(nvram_data);
 
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		sdp->isp_devparam[tgt].dev_enable =
 			ISP_NVRAM_TGT_DEVICE_ENABLE(nvram_data, tgt);
 		sdp->isp_devparam[tgt].exc_throttle =
 			ISP_NVRAM_TGT_EXEC_THROTTLE(nvram_data, tgt);
 		sdp->isp_devparam[tgt].nvrm_offset =
 			ISP_NVRAM_TGT_SYNC_OFFSET(nvram_data, tgt);
 		sdp->isp_devparam[tgt].nvrm_period =
 			ISP_NVRAM_TGT_SYNC_PERIOD(nvram_data, tgt);
 		/*
 		 * We probably shouldn't lie about this, but it
 		 * it makes it much safer if we limit NVRAM values
 		 * to sanity.
 		 */
 		if (isp->isp_type < ISP_HA_SCSI_1040) {
 			/*
 			 * If we're not ultra, we can't possibly
 			 * be a shorter period than this.
 			 */
 			if (sdp->isp_devparam[tgt].nvrm_period < 0x19) {
 				sdp->isp_devparam[tgt].nvrm_period = 0x19;
 			}
 			if (sdp->isp_devparam[tgt].nvrm_offset > 0xc) {
 				sdp->isp_devparam[tgt].nvrm_offset = 0x0c;
 			}
 		} else {
 			if (sdp->isp_devparam[tgt].nvrm_offset > 0x8) {
 				sdp->isp_devparam[tgt].nvrm_offset = 0x8;
 			}
 		}
 		sdp->isp_devparam[tgt].nvrm_flags = 0;
 		if (ISP_NVRAM_TGT_RENEG(nvram_data, tgt))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_RENEG;
 		sdp->isp_devparam[tgt].nvrm_flags |= DPARM_ARQ;
 		if (ISP_NVRAM_TGT_TQING(nvram_data, tgt))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_TQING;
 		if (ISP_NVRAM_TGT_SYNC(nvram_data, tgt))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_SYNC;
 		if (ISP_NVRAM_TGT_WIDE(nvram_data, tgt))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_WIDE;
 		if (ISP_NVRAM_TGT_PARITY(nvram_data, tgt))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_PARITY;
 		if (ISP_NVRAM_TGT_DISC(nvram_data, tgt))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_DISC;
 		sdp->isp_devparam[tgt].actv_flags = 0; /* we don't know */
 		sdp->isp_devparam[tgt].goal_offset =
 		    sdp->isp_devparam[tgt].nvrm_offset;
 		sdp->isp_devparam[tgt].goal_period =
 		    sdp->isp_devparam[tgt].nvrm_period;
 		sdp->isp_devparam[tgt].goal_flags =
 		    sdp->isp_devparam[tgt].nvrm_flags;
 	}
 }
 
 static void
 isp_parse_nvram_1080(ispsoftc_t *isp, int bus, uint8_t *nvram_data)
 {
 	sdparam *sdp = SDPARAM(isp, bus);
 	int tgt;
 
 	sdp->isp_fifo_threshold =
 	    ISP1080_NVRAM_FIFO_THRESHOLD(nvram_data);
 
 	if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0)
 		sdp->isp_initiator_id = ISP1080_NVRAM_INITIATOR_ID(nvram_data, bus);
 
 	sdp->isp_bus_reset_delay =
 	    ISP1080_NVRAM_BUS_RESET_DELAY(nvram_data, bus);
 
 	sdp->isp_retry_count =
 	    ISP1080_NVRAM_BUS_RETRY_COUNT(nvram_data, bus);
 
 	sdp->isp_retry_delay =
 	    ISP1080_NVRAM_BUS_RETRY_DELAY(nvram_data, bus);
 
 	sdp->isp_async_data_setup =
 	    ISP1080_NVRAM_ASYNC_DATA_SETUP_TIME(nvram_data, bus);
 
 	sdp->isp_req_ack_active_neg =
 	    ISP1080_NVRAM_REQ_ACK_ACTIVE_NEGATION(nvram_data, bus);
 
 	sdp->isp_data_line_active_neg =
 	    ISP1080_NVRAM_DATA_LINE_ACTIVE_NEGATION(nvram_data, bus);
 
 	sdp->isp_data_dma_burst_enabl =
 	    ISP1080_NVRAM_BURST_ENABLE(nvram_data);
 
 	sdp->isp_cmd_dma_burst_enable =
 	    ISP1080_NVRAM_BURST_ENABLE(nvram_data);
 
 	sdp->isp_selection_timeout =
 	    ISP1080_NVRAM_SELECTION_TIMEOUT(nvram_data, bus);
 
 	sdp->isp_max_queue_depth =
 	     ISP1080_NVRAM_MAX_QUEUE_DEPTH(nvram_data, bus);
 
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		sdp->isp_devparam[tgt].dev_enable =
 		    ISP1080_NVRAM_TGT_DEVICE_ENABLE(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].exc_throttle =
 			ISP1080_NVRAM_TGT_EXEC_THROTTLE(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].nvrm_offset =
 			ISP1080_NVRAM_TGT_SYNC_OFFSET(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].nvrm_period =
 			ISP1080_NVRAM_TGT_SYNC_PERIOD(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].nvrm_flags = 0;
 		if (ISP1080_NVRAM_TGT_RENEG(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_RENEG;
 		sdp->isp_devparam[tgt].nvrm_flags |= DPARM_ARQ;
 		if (ISP1080_NVRAM_TGT_TQING(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_TQING;
 		if (ISP1080_NVRAM_TGT_SYNC(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_SYNC;
 		if (ISP1080_NVRAM_TGT_WIDE(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_WIDE;
 		if (ISP1080_NVRAM_TGT_PARITY(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_PARITY;
 		if (ISP1080_NVRAM_TGT_DISC(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_DISC;
 		sdp->isp_devparam[tgt].actv_flags = 0;
 		sdp->isp_devparam[tgt].goal_offset =
 		    sdp->isp_devparam[tgt].nvrm_offset;
 		sdp->isp_devparam[tgt].goal_period =
 		    sdp->isp_devparam[tgt].nvrm_period;
 		sdp->isp_devparam[tgt].goal_flags =
 		    sdp->isp_devparam[tgt].nvrm_flags;
 	}
 }
 
 static void
 isp_parse_nvram_12160(ispsoftc_t *isp, int bus, uint8_t *nvram_data)
 {
 	sdparam *sdp = SDPARAM(isp, bus);
 	int tgt;
 
 	sdp->isp_fifo_threshold =
 	    ISP12160_NVRAM_FIFO_THRESHOLD(nvram_data);
 
 	if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0)
 		sdp->isp_initiator_id = ISP12160_NVRAM_INITIATOR_ID(nvram_data, bus);
 
 	sdp->isp_bus_reset_delay =
 	    ISP12160_NVRAM_BUS_RESET_DELAY(nvram_data, bus);
 
 	sdp->isp_retry_count =
 	    ISP12160_NVRAM_BUS_RETRY_COUNT(nvram_data, bus);
 
 	sdp->isp_retry_delay =
 	    ISP12160_NVRAM_BUS_RETRY_DELAY(nvram_data, bus);
 
 	sdp->isp_async_data_setup =
 	    ISP12160_NVRAM_ASYNC_DATA_SETUP_TIME(nvram_data, bus);
 
 	sdp->isp_req_ack_active_neg =
 	    ISP12160_NVRAM_REQ_ACK_ACTIVE_NEGATION(nvram_data, bus);
 
 	sdp->isp_data_line_active_neg =
 	    ISP12160_NVRAM_DATA_LINE_ACTIVE_NEGATION(nvram_data, bus);
 
 	sdp->isp_data_dma_burst_enabl =
 	    ISP12160_NVRAM_BURST_ENABLE(nvram_data);
 
 	sdp->isp_cmd_dma_burst_enable =
 	    ISP12160_NVRAM_BURST_ENABLE(nvram_data);
 
 	sdp->isp_selection_timeout =
 	    ISP12160_NVRAM_SELECTION_TIMEOUT(nvram_data, bus);
 
 	sdp->isp_max_queue_depth =
 	     ISP12160_NVRAM_MAX_QUEUE_DEPTH(nvram_data, bus);
 
 	for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
 		sdp->isp_devparam[tgt].dev_enable =
 		    ISP12160_NVRAM_TGT_DEVICE_ENABLE(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].exc_throttle =
 			ISP12160_NVRAM_TGT_EXEC_THROTTLE(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].nvrm_offset =
 			ISP12160_NVRAM_TGT_SYNC_OFFSET(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].nvrm_period =
 			ISP12160_NVRAM_TGT_SYNC_PERIOD(nvram_data, tgt, bus);
 		sdp->isp_devparam[tgt].nvrm_flags = 0;
 		if (ISP12160_NVRAM_TGT_RENEG(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_RENEG;
 		sdp->isp_devparam[tgt].nvrm_flags |= DPARM_ARQ;
 		if (ISP12160_NVRAM_TGT_TQING(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_TQING;
 		if (ISP12160_NVRAM_TGT_SYNC(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_SYNC;
 		if (ISP12160_NVRAM_TGT_WIDE(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_WIDE;
 		if (ISP12160_NVRAM_TGT_PARITY(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_PARITY;
 		if (ISP12160_NVRAM_TGT_DISC(nvram_data, tgt, bus))
 			sdp->isp_devparam[tgt].nvrm_flags |= DPARM_DISC;
 		sdp->isp_devparam[tgt].actv_flags = 0;
 		sdp->isp_devparam[tgt].goal_offset =
 		    sdp->isp_devparam[tgt].nvrm_offset;
 		sdp->isp_devparam[tgt].goal_period =
 		    sdp->isp_devparam[tgt].nvrm_period;
 		sdp->isp_devparam[tgt].goal_flags =
 		    sdp->isp_devparam[tgt].nvrm_flags;
 	}
 }
 
 static void
 isp_parse_nvram_2100(ispsoftc_t *isp, uint8_t *nvram_data)
 {
 	fcparam *fcp = FCPARAM(isp, 0);
 	uint64_t wwn;
 
 	/*
 	 * There is NVRAM storage for both Port and Node entities-
 	 * but the Node entity appears to be unused on all the cards
 	 * I can find. However, we should account for this being set
 	 * at some point in the future.
 	 *
 	 * Qlogic WWNs have an NAA of 2, but usually nothing shows up in
 	 * bits 48..60. In the case of the 2202, it appears that they do
 	 * use bit 48 to distinguish between the two instances on the card.
 	 * The 2204, which I've never seen, *probably* extends this method.
 	 */
 	wwn = ISP2100_NVRAM_PORT_NAME(nvram_data);
 	if (wwn) {
 		isp_prt(isp, ISP_LOGCONFIG, "NVRAM Port WWN 0x%08x%08x",
 		    (uint32_t) (wwn >> 32), (uint32_t) (wwn));
 		if ((wwn >> 60) == 0) {
 			wwn |= (((uint64_t) 2)<< 60);
 		}
 	}
 	fcp->isp_wwpn_nvram = wwn;
 	if (IS_2200(isp) || IS_23XX(isp)) {
 		wwn = ISP2100_NVRAM_NODE_NAME(nvram_data);
 		if (wwn) {
 			isp_prt(isp, ISP_LOGCONFIG, "NVRAM Node WWN 0x%08x%08x",
 			    (uint32_t) (wwn >> 32),
 			    (uint32_t) (wwn));
 			if ((wwn >> 60) == 0) {
 				wwn |= (((uint64_t) 2)<< 60);
 			}
 		} else {
 			wwn = fcp->isp_wwpn_nvram & ~((uint64_t) 0xfff << 48);
 		}
 	} else {
 		wwn &= ~((uint64_t) 0xfff << 48);
 	}
 	fcp->isp_wwnn_nvram = wwn;
 
 	fcp->isp_maxalloc = ISP2100_NVRAM_MAXIOCBALLOCATION(nvram_data);
 	if ((isp->isp_confopts & ISP_CFG_OWNFSZ) == 0) {
 		DEFAULT_FRAMESIZE(isp) =
 		    ISP2100_NVRAM_MAXFRAMELENGTH(nvram_data);
 	}
 	fcp->isp_retry_delay = ISP2100_NVRAM_RETRY_DELAY(nvram_data);
 	fcp->isp_retry_count = ISP2100_NVRAM_RETRY_COUNT(nvram_data);
 	if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0) {
 		fcp->isp_loopid = ISP2100_NVRAM_HARDLOOPID(nvram_data);
 	}
 	if ((isp->isp_confopts & ISP_CFG_OWNEXCTHROTTLE) == 0) {
 		DEFAULT_EXEC_THROTTLE(isp) =
 			ISP2100_NVRAM_EXECUTION_THROTTLE(nvram_data);
 	}
 	fcp->isp_fwoptions = ISP2100_NVRAM_OPTIONS(nvram_data);
 	isp_prt(isp, ISP_LOGDEBUG0,
 	    "NVRAM 0x%08x%08x 0x%08x%08x maxalloc %d maxframelen %d",
 	    (uint32_t) (fcp->isp_wwnn_nvram >> 32),
 	    (uint32_t) fcp->isp_wwnn_nvram,
 	    (uint32_t) (fcp->isp_wwpn_nvram >> 32),
 	    (uint32_t) fcp->isp_wwpn_nvram,
 	    ISP2100_NVRAM_MAXIOCBALLOCATION(nvram_data),
 	    ISP2100_NVRAM_MAXFRAMELENGTH(nvram_data));
 	isp_prt(isp, ISP_LOGDEBUG0,
 	    "execthrottle %d fwoptions 0x%x hardloop %d tov %d",
 	    ISP2100_NVRAM_EXECUTION_THROTTLE(nvram_data),
 	    ISP2100_NVRAM_OPTIONS(nvram_data),
 	    ISP2100_NVRAM_HARDLOOPID(nvram_data),
 	    ISP2100_NVRAM_TOV(nvram_data));
 	fcp->isp_xfwoptions = ISP2100_XFW_OPTIONS(nvram_data);
 	fcp->isp_zfwoptions = ISP2100_ZFW_OPTIONS(nvram_data);
 	isp_prt(isp, ISP_LOGDEBUG0, "xfwoptions 0x%x zfw options 0x%x",
 	    ISP2100_XFW_OPTIONS(nvram_data), ISP2100_ZFW_OPTIONS(nvram_data));
 }
 
 static void
 isp_parse_nvram_2400(ispsoftc_t *isp, uint8_t *nvram_data)
 {
 	fcparam *fcp = FCPARAM(isp, 0);
 	uint64_t wwn;
 
 	isp_prt(isp, ISP_LOGDEBUG0,
 	    "NVRAM 0x%08x%08x 0x%08x%08x exchg_cnt %d maxframelen %d",
 	    (uint32_t) (ISP2400_NVRAM_NODE_NAME(nvram_data) >> 32),
 	    (uint32_t) (ISP2400_NVRAM_NODE_NAME(nvram_data)),
 	    (uint32_t) (ISP2400_NVRAM_PORT_NAME(nvram_data) >> 32),
 	    (uint32_t) (ISP2400_NVRAM_PORT_NAME(nvram_data)),
 	    ISP2400_NVRAM_EXCHANGE_COUNT(nvram_data),
 	    ISP2400_NVRAM_MAXFRAMELENGTH(nvram_data));
 	isp_prt(isp, ISP_LOGDEBUG0,
 	    "NVRAM execthr %d loopid %d fwopt1 0x%x fwopt2 0x%x fwopt3 0x%x",
 	    ISP2400_NVRAM_EXECUTION_THROTTLE(nvram_data),
 	    ISP2400_NVRAM_HARDLOOPID(nvram_data),
 	    ISP2400_NVRAM_FIRMWARE_OPTIONS1(nvram_data),
 	    ISP2400_NVRAM_FIRMWARE_OPTIONS2(nvram_data),
 	    ISP2400_NVRAM_FIRMWARE_OPTIONS3(nvram_data));
 
 	wwn = ISP2400_NVRAM_PORT_NAME(nvram_data);
 	fcp->isp_wwpn_nvram = wwn;
 
 	wwn = ISP2400_NVRAM_NODE_NAME(nvram_data);
 	if (wwn) {
 		if ((wwn >> 60) != 2 && (wwn >> 60) != 5) {
 			wwn = 0;
 		}
 	}
 	if (wwn == 0 && (fcp->isp_wwpn_nvram >> 60) == 2) {
 		wwn = fcp->isp_wwpn_nvram;
 		wwn &= ~((uint64_t) 0xfff << 48);
 	}
 	fcp->isp_wwnn_nvram = wwn;
 
 	if (ISP2400_NVRAM_EXCHANGE_COUNT(nvram_data)) {
 		fcp->isp_maxalloc = ISP2400_NVRAM_EXCHANGE_COUNT(nvram_data);
 	}
 	if ((isp->isp_confopts & ISP_CFG_OWNFSZ) == 0) {
 		DEFAULT_FRAMESIZE(isp) =
 		    ISP2400_NVRAM_MAXFRAMELENGTH(nvram_data);
 	}
 	if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0) {
 		fcp->isp_loopid = ISP2400_NVRAM_HARDLOOPID(nvram_data);
 	}
 	if ((isp->isp_confopts & ISP_CFG_OWNEXCTHROTTLE) == 0) {
 		DEFAULT_EXEC_THROTTLE(isp) =
 			ISP2400_NVRAM_EXECUTION_THROTTLE(nvram_data);
 	}
 	fcp->isp_fwoptions = ISP2400_NVRAM_FIRMWARE_OPTIONS1(nvram_data);
 	fcp->isp_xfwoptions = ISP2400_NVRAM_FIRMWARE_OPTIONS2(nvram_data);
 	fcp->isp_zfwoptions = ISP2400_NVRAM_FIRMWARE_OPTIONS3(nvram_data);
 }
Index: projects/powernv/dev/isp/isp_freebsd.c
===================================================================
--- projects/powernv/dev/isp/isp_freebsd.c	(revision 290990)
+++ projects/powernv/dev/isp/isp_freebsd.c	(revision 290991)
@@ -1,5592 +1,5580 @@
 /*-
  * Copyright (c) 1997-2009 by Matthew Jacob
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Platform (FreeBSD) dependent common attachment code for Qlogic adapters.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <dev/isp/isp_freebsd.h>
 #include <sys/unistd.h>
 #include <sys/kthread.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 #include <sys/ioccom.h>
 #include <dev/isp/isp_ioctl.h>
 #include <sys/devicestat.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_xpt_periph.h>
 
 #if	__FreeBSD_version < 800002 
 #define	THREAD_CREATE	kthread_create
 #else
 #define	THREAD_CREATE	kproc_create
 #endif
 
 MODULE_VERSION(isp, 1);
 MODULE_DEPEND(isp, cam, 1, 1, 1);
 int isp_announced = 0;
 int isp_fabric_hysteresis = 5;
 int isp_loop_down_limit = 60;	/* default loop down limit */
 int isp_quickboot_time = 7;	/* don't wait more than N secs for loop up */
 int isp_gone_device_time = 30;	/* grace time before reporting device lost */
 static const char prom3[] = "Chan %d [%u] PortID 0x%06x Departed because of %s";
 
 static void isp_freeze_loopdown(ispsoftc_t *, int, char *);
 static d_ioctl_t ispioctl;
 static void isp_intr_enable(void *);
 static void isp_cam_async(void *, uint32_t, struct cam_path *, void *);
 static void isp_poll(struct cam_sim *);
 static timeout_t isp_watchdog;
 static timeout_t isp_gdt;
 static task_fn_t isp_gdt_task;
 static timeout_t isp_ldt;
 static task_fn_t isp_ldt_task;
 static void isp_kthread(void *);
 static void isp_action(struct cam_sim *, union ccb *);
 static int isp_timer_count;
 static void isp_timer(void *);
 
 static struct cdevsw isp_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_ioctl =	ispioctl,
 	.d_name =	"isp",
 };
 
 static int
 isp_role_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	ispsoftc_t *isp = (ispsoftc_t *)arg1;
 	int chan = arg2;
 	int error, old, value;
 
 	value = FCPARAM(isp, chan)->role;
 
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	if (value < ISP_ROLE_NONE || value > ISP_ROLE_BOTH)
 		return (EINVAL);
 
 	ISP_LOCK(isp);
 	old = FCPARAM(isp, chan)->role;
 
 	/* We don't allow target mode switch from here. */
 	value = (old & ISP_ROLE_TARGET) | (value & ISP_ROLE_INITIATOR);
 
 	/* If nothing has changed -- we are done. */
 	if (value == old) {
 		ISP_UNLOCK(isp);
 		return (0);
 	}
 
 	/* Actually change the role. */
 	error = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, value);
 	ISP_UNLOCK(isp);
 	return (error);
 }
 
 static int
 isp_attach_chan(ispsoftc_t *isp, struct cam_devq *devq, int chan)
 {
 	struct ccb_setasync csa;
 	struct cam_sim *sim;
 	struct cam_path *path;
 
 	/*
 	 * Construct our SIM entry.
 	 */
 	sim = cam_sim_alloc(isp_action, isp_poll, "isp", isp, device_get_unit(isp->isp_dev), &isp->isp_osinfo.lock, isp->isp_maxcmds, isp->isp_maxcmds, devq);
 
 	if (sim == NULL) {
 		return (ENOMEM);
 	}
 
 	ISP_LOCK(isp);
 	if (xpt_bus_register(sim, isp->isp_dev, chan) != CAM_SUCCESS) {
 		ISP_UNLOCK(isp);
 		cam_sim_free(sim, FALSE);
 		return (EIO);
 	}
 	ISP_UNLOCK(isp);
 	if (xpt_create_path(&path, NULL, cam_sim_path(sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		ISP_LOCK(isp);
 		xpt_bus_deregister(cam_sim_path(sim));
 		ISP_UNLOCK(isp);
 		cam_sim_free(sim, FALSE);
 		return (ENXIO);
 	}
 	xpt_setup_ccb(&csa.ccb_h, path, 5);
 	csa.ccb_h.func_code = XPT_SASYNC_CB;
 	csa.event_enable = AC_LOST_DEVICE;
 	csa.callback = isp_cam_async;
 	csa.callback_arg = sim;
 
 	ISP_LOCK(isp);
 	xpt_action((union ccb *)&csa);
 	ISP_UNLOCK(isp);
 
 	if (IS_SCSI(isp)) {
 		struct isp_spi *spi = ISP_SPI_PC(isp, chan);
 		spi->sim = sim;
 		spi->path = path;
 	} else {
 		fcparam *fcp = FCPARAM(isp, chan);
 		struct isp_fc *fc = ISP_FC_PC(isp, chan);
 		struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(isp->isp_osinfo.dev);
 		struct sysctl_oid *tree = device_get_sysctl_tree(isp->isp_osinfo.dev);
 		char name[16];
 
 		ISP_LOCK(isp);
 		fc->sim = sim;
 		fc->path = path;
 		fc->isp = isp;
 		fc->ready = 1;
 
 		callout_init_mtx(&fc->ldt, &isp->isp_osinfo.lock, 0);
 		callout_init_mtx(&fc->gdt, &isp->isp_osinfo.lock, 0);
 		TASK_INIT(&fc->ltask, 1, isp_ldt_task, fc);
 		TASK_INIT(&fc->gtask, 1, isp_gdt_task, fc);
 
 		/*
 		 * We start by being "loop down" if we have an initiator role
 		 */
 		if (fcp->role & ISP_ROLE_INITIATOR) {
 			isp_freeze_loopdown(isp, chan, "isp_attach");
 			callout_reset(&fc->ldt, isp_quickboot_time * hz, isp_ldt, fc);
 			isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Starting Initial Loop Down Timer @ %lu", (unsigned long) time_uptime);
 		}
 		ISP_UNLOCK(isp);
 		if (THREAD_CREATE(isp_kthread, fc, &fc->kproc, 0, 0, "%s: fc_thrd%d", device_get_nameunit(isp->isp_osinfo.dev), chan)) {
 			xpt_free_path(fc->path);
 			ISP_LOCK(isp);
 			if (callout_active(&fc->ldt))
 				callout_stop(&fc->ldt);
 			xpt_bus_deregister(cam_sim_path(fc->sim));
 			ISP_UNLOCK(isp);
 			cam_sim_free(fc->sim, FALSE);
 			return (ENOMEM);
 		}
 		fc->num_threads += 1;
 		if (chan > 0) {
 			snprintf(name, sizeof(name), "chan%d", chan);
 			tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tree),
 			    OID_AUTO, name, CTLFLAG_RW, 0, "Virtual channel");
 		}
 		SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "wwnn", CTLFLAG_RD, &fcp->isp_wwnn,
 		    "World Wide Node Name");
 		SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "wwpn", CTLFLAG_RD, &fcp->isp_wwpn,
 		    "World Wide Port Name");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "loop_down_limit", CTLFLAG_RW, &fc->loop_down_limit, 0,
 		    "Loop Down Limit");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "gone_device_time", CTLFLAG_RW, &fc->gone_device_time, 0,
 		    "Gone Device Time");
 #if defined(ISP_TARGET_MODE) && defined(DEBUG)
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "inject_lost_data_frame", CTLFLAG_RW, &fc->inject_lost_data_frame, 0,
 		    "Cause a Lost Frame on a Read");
 #endif
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "role", CTLTYPE_INT | CTLFLAG_RW, isp, chan,
 		    isp_role_sysctl, "I", "Current role");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "speed", CTLFLAG_RD, &fcp->isp_gbspeed, 0,
 		    "Connection speed in gigabits");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "linkstate", CTLFLAG_RD, &fcp->isp_linkstate, 0,
 		    "Link state");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "fwstate", CTLFLAG_RD, &fcp->isp_fwstate, 0,
 		    "Firmware state");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "loopstate", CTLFLAG_RD, &fcp->isp_loopstate, 0,
 		    "Loop state");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		    "topo", CTLFLAG_RD, &fcp->isp_topo, 0,
 		    "Connection topology");
 	}
 	return (0);
 }
 
 static void
 isp_detach_chan(ispsoftc_t *isp, int chan)
 {
 	struct cam_sim *sim;
 	struct cam_path *path;
 	struct ccb_setasync csa;
 	int *num_threads;
 
 	ISP_GET_PC(isp, chan, sim, sim);
 	ISP_GET_PC(isp, chan, path, path);
 	ISP_GET_PC_ADDR(isp, chan, num_threads, num_threads);
 
 	xpt_setup_ccb(&csa.ccb_h, path, 5);
 	csa.ccb_h.func_code = XPT_SASYNC_CB;
 	csa.event_enable = 0;
 	csa.callback = isp_cam_async;
 	csa.callback_arg = sim;
 	xpt_action((union ccb *)&csa);
 	xpt_free_path(path);
 	xpt_bus_deregister(cam_sim_path(sim));
 	cam_sim_free(sim, FALSE);
 
 	/* Wait for the channel's spawned threads to exit. */
 	wakeup(isp->isp_osinfo.pc.ptr);
 	while (*num_threads != 0)
 		mtx_sleep(isp, &isp->isp_osinfo.lock, PRIBIO, "isp_reap", 100);
 }
 
 int
 isp_attach(ispsoftc_t *isp)
 {
 	const char *nu = device_get_nameunit(isp->isp_osinfo.dev);
 	int du = device_get_unit(isp->isp_dev);
 	int chan;
 
 	isp->isp_osinfo.ehook.ich_func = isp_intr_enable;
 	isp->isp_osinfo.ehook.ich_arg = isp;
 	/*
 	 * Haha. Set this first, because if we're loaded as a module isp_intr_enable
 	 * will be called right awawy, which will clear isp_osinfo.ehook_active,
 	 * which would be unwise to then set again later.
 	 */
 	isp->isp_osinfo.ehook_active = 1;
 	if (config_intrhook_establish(&isp->isp_osinfo.ehook) != 0) {
 		isp_prt(isp, ISP_LOGERR, "could not establish interrupt enable hook");
 		return (-EIO);
 	}
 
 	/*
 	 * Create the device queue for our SIM(s).
 	 */
 	isp->isp_osinfo.devq = cam_simq_alloc(isp->isp_maxcmds);
 	if (isp->isp_osinfo.devq == NULL) {
 		config_intrhook_disestablish(&isp->isp_osinfo.ehook);
 		return (EIO);
 	}
 
 	for (chan = 0; chan < isp->isp_nchan; chan++) {
 		if (isp_attach_chan(isp, isp->isp_osinfo.devq, chan)) {
 			goto unwind;
 		}
 	}
 
 	callout_init_mtx(&isp->isp_osinfo.tmo, &isp->isp_osinfo.lock, 0);
 	isp_timer_count = hz >> 2;
 	callout_reset(&isp->isp_osinfo.tmo, isp_timer_count, isp_timer, isp);
 	isp->isp_osinfo.timer_active = 1;
 
 	isp->isp_osinfo.cdev = make_dev(&isp_cdevsw, du, UID_ROOT, GID_OPERATOR, 0600, "%s", nu);
 	if (isp->isp_osinfo.cdev) {
 		isp->isp_osinfo.cdev->si_drv1 = isp;
 	}
 	return (0);
 
 unwind:
 	while (--chan >= 0) {
 		struct cam_sim *sim;
 		struct cam_path *path;
 
 		ISP_GET_PC(isp, chan, sim, sim);
 		ISP_GET_PC(isp, chan, path, path);
 		xpt_free_path(path);
 		ISP_LOCK(isp);
 		xpt_bus_deregister(cam_sim_path(sim));
 		ISP_UNLOCK(isp);
 		cam_sim_free(sim, FALSE);
 	}
 	if (isp->isp_osinfo.ehook_active) {
 		config_intrhook_disestablish(&isp->isp_osinfo.ehook);
 		isp->isp_osinfo.ehook_active = 0;
 	}
 	if (isp->isp_osinfo.cdev) {
 		destroy_dev(isp->isp_osinfo.cdev);
 		isp->isp_osinfo.cdev = NULL;
 	}
 	cam_simq_free(isp->isp_osinfo.devq);
 	isp->isp_osinfo.devq = NULL;
 	return (-1);
 }
 
 int
 isp_detach(ispsoftc_t *isp)
 {
 	struct cam_sim *sim;
 	int chan;
 
 	ISP_LOCK(isp);
 	for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1) {
 		ISP_GET_PC(isp, chan, sim, sim);
 		if (sim->refcount > 2) {
 			ISP_UNLOCK(isp);
 			return (EBUSY);
 		}
 	}
 	/* Tell spawned threads that we're exiting. */
 	isp->isp_osinfo.is_exiting = 1;
 	if (isp->isp_osinfo.timer_active) {
 		callout_stop(&isp->isp_osinfo.tmo);
 		isp->isp_osinfo.timer_active = 0;
 	}
 	for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1)
 		isp_detach_chan(isp, chan);
 	ISP_UNLOCK(isp);
 
 	if (isp->isp_osinfo.cdev) {
 		destroy_dev(isp->isp_osinfo.cdev);
 		isp->isp_osinfo.cdev = NULL;
 	}
 	if (isp->isp_osinfo.ehook_active) {
 		config_intrhook_disestablish(&isp->isp_osinfo.ehook);
 		isp->isp_osinfo.ehook_active = 0;
 	}
 	if (isp->isp_osinfo.devq != NULL) {
 		cam_simq_free(isp->isp_osinfo.devq);
 		isp->isp_osinfo.devq = NULL;
 	}
 	return (0);
 }
 
 static void
 isp_freeze_loopdown(ispsoftc_t *isp, int chan, char *msg)
 {
 	if (IS_FC(isp)) {
 		struct isp_fc *fc = ISP_FC_PC(isp, chan);
 		if (fc->simqfrozen == 0) {
 			isp_prt(isp, ISP_LOGDEBUG0,
 			    "Chan %d %s -- freeze simq (loopdown)", chan, msg);
 			fc->simqfrozen = SIMQFRZ_LOOPDOWN;
 #if __FreeBSD_version >= 1000039
 			xpt_hold_boot();
 #endif
 			xpt_freeze_simq(fc->sim, 1);
 		} else {
 			isp_prt(isp, ISP_LOGDEBUG0,
 			    "Chan %d %s -- mark frozen (loopdown)", chan, msg);
 			fc->simqfrozen |= SIMQFRZ_LOOPDOWN;
 		}
 	}
 }
 
 static void
 isp_unfreeze_loopdown(ispsoftc_t *isp, int chan)
 {
 	if (IS_FC(isp)) {
 		struct isp_fc *fc = ISP_FC_PC(isp, chan);
 		int wasfrozen = fc->simqfrozen & SIMQFRZ_LOOPDOWN;
 		fc->simqfrozen &= ~SIMQFRZ_LOOPDOWN;
 		if (wasfrozen && fc->simqfrozen == 0) {
 			isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d releasing simq", __func__, chan);
 			xpt_release_simq(fc->sim, 1);
 #if __FreeBSD_version >= 1000039
 			xpt_release_boot();
 #endif
 		}
 	}
 }
 
 
 static int
 ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td)
 {
 	ispsoftc_t *isp;
 	int nr, chan, retval = ENOTTY;
 
 	isp = dev->si_drv1;
 
 	switch (c) {
 	case ISP_SDBLEV:
 	{
 		int olddblev = isp->isp_dblev;
 		isp->isp_dblev = *(int *)addr;
 		*(int *)addr = olddblev;
 		retval = 0;
 		break;
 	}
 	case ISP_GETROLE:
 		chan = *(int *)addr;
 		if (chan < 0 || chan >= isp->isp_nchan) {
 			retval = -ENXIO;
 			break;
 		}
 		if (IS_FC(isp)) {
 			*(int *)addr = FCPARAM(isp, chan)->role;
 		} else {
 			*(int *)addr = SDPARAM(isp, chan)->role;
 		}
 		retval = 0;
 		break;
 	case ISP_SETROLE:
 		nr = *(int *)addr;
 		chan = nr >> 8;
 		if (chan < 0 || chan >= isp->isp_nchan) {
 			retval = -ENXIO;
 			break;
 		}
 		nr &= 0xff;
 		if (nr & ~(ISP_ROLE_INITIATOR|ISP_ROLE_TARGET)) {
 			retval = EINVAL;
 			break;
 		}
 		ISP_LOCK(isp);
 		if (IS_FC(isp))
 			*(int *)addr = FCPARAM(isp, chan)->role;
 		else
 			*(int *)addr = SDPARAM(isp, chan)->role;
 		retval = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, nr);
 		ISP_UNLOCK(isp);
 		retval = 0;
 		break;
 
 	case ISP_RESETHBA:
 		ISP_LOCK(isp);
 		isp_reinit(isp, 0);
 		ISP_UNLOCK(isp);
 		retval = 0;
 		break;
 
 	case ISP_RESCAN:
 		if (IS_FC(isp)) {
 			chan = *(int *)addr;
 			if (chan < 0 || chan >= isp->isp_nchan) {
 				retval = -ENXIO;
 				break;
 			}
 			ISP_LOCK(isp);
 			if (isp_fc_runstate(isp, chan, 5 * 1000000)) {
 				retval = EIO;
 			} else {
 				retval = 0;
 			}
 			ISP_UNLOCK(isp);
 		}
 		break;
 
 	case ISP_FC_LIP:
 		if (IS_FC(isp)) {
 			chan = *(int *)addr;
 			if (chan < 0 || chan >= isp->isp_nchan) {
 				retval = -ENXIO;
 				break;
 			}
 			ISP_LOCK(isp);
 			if (isp_control(isp, ISPCTL_SEND_LIP, chan)) {
 				retval = EIO;
 			} else {
 				retval = 0;
 			}
 			ISP_UNLOCK(isp);
 		}
 		break;
 	case ISP_FC_GETDINFO:
 	{
 		struct isp_fc_device *ifc = (struct isp_fc_device *) addr;
 		fcportdb_t *lp;
 
 		if (IS_SCSI(isp)) {
 			break;
 		}
 		if (ifc->loopid >= MAX_FC_TARG) {
 			retval = EINVAL;
 			break;
 		}
 		lp = &FCPARAM(isp, ifc->chan)->portdb[ifc->loopid];
 		if (lp->state != FC_PORTDB_STATE_NIL) {
 			ifc->role = (lp->prli_word3 & SVC3_ROLE_MASK) >> SVC3_ROLE_SHIFT;
 			ifc->loopid = lp->handle;
 			ifc->portid = lp->portid;
 			ifc->node_wwn = lp->node_wwn;
 			ifc->port_wwn = lp->port_wwn;
 			retval = 0;
 		} else {
 			retval = ENODEV;
 		}
 		break;
 	}
 	case ISP_GET_STATS:
 	{
 		isp_stats_t *sp = (isp_stats_t *) addr;
 
 		ISP_MEMZERO(sp, sizeof (*sp));
 		sp->isp_stat_version = ISP_STATS_VERSION;
 		sp->isp_type = isp->isp_type;
 		sp->isp_revision = isp->isp_revision;
 		ISP_LOCK(isp);
 		sp->isp_stats[ISP_INTCNT] = isp->isp_intcnt;
 		sp->isp_stats[ISP_INTBOGUS] = isp->isp_intbogus;
 		sp->isp_stats[ISP_INTMBOXC] = isp->isp_intmboxc;
 		sp->isp_stats[ISP_INGOASYNC] = isp->isp_intoasync;
 		sp->isp_stats[ISP_RSLTCCMPLT] = isp->isp_rsltccmplt;
 		sp->isp_stats[ISP_FPHCCMCPLT] = isp->isp_fphccmplt;
 		sp->isp_stats[ISP_RSCCHIWAT] = isp->isp_rscchiwater;
 		sp->isp_stats[ISP_FPCCHIWAT] = isp->isp_fpcchiwater;
 		ISP_UNLOCK(isp);
 		retval = 0;
 		break;
 	}
 	case ISP_CLR_STATS:
 		ISP_LOCK(isp);
 		isp->isp_intcnt = 0;
 		isp->isp_intbogus = 0;
 		isp->isp_intmboxc = 0;
 		isp->isp_intoasync = 0;
 		isp->isp_rsltccmplt = 0;
 		isp->isp_fphccmplt = 0;
 		isp->isp_rscchiwater = 0;
 		isp->isp_fpcchiwater = 0;
 		ISP_UNLOCK(isp);
 		retval = 0;
 		break;
 	case ISP_FC_GETHINFO:
 	{
 		struct isp_hba_device *hba = (struct isp_hba_device *) addr;
 		int chan = hba->fc_channel;
 
 		if (chan < 0 || chan >= isp->isp_nchan) {
 			retval = ENXIO;
 			break;
 		}
 		hba->fc_fw_major = ISP_FW_MAJORX(isp->isp_fwrev);
 		hba->fc_fw_minor = ISP_FW_MINORX(isp->isp_fwrev);
 		hba->fc_fw_micro = ISP_FW_MICROX(isp->isp_fwrev);
 		hba->fc_nchannels = isp->isp_nchan;
 		if (IS_FC(isp)) {
 			hba->fc_nports = MAX_FC_TARG;
 			hba->fc_speed = FCPARAM(isp, hba->fc_channel)->isp_gbspeed;
 			hba->fc_topology = FCPARAM(isp, chan)->isp_topo + 1;
 			hba->fc_loopid = FCPARAM(isp, chan)->isp_loopid;
 			hba->nvram_node_wwn = FCPARAM(isp, chan)->isp_wwnn_nvram;
 			hba->nvram_port_wwn = FCPARAM(isp, chan)->isp_wwpn_nvram;
 			hba->active_node_wwn = FCPARAM(isp, chan)->isp_wwnn;
 			hba->active_port_wwn = FCPARAM(isp, chan)->isp_wwpn;
 		} else {
 			hba->fc_nports = MAX_TARGETS;
 			hba->fc_speed = 0;
 			hba->fc_topology = 0;
 			hba->nvram_node_wwn = 0ull;
 			hba->nvram_port_wwn = 0ull;
 			hba->active_node_wwn = 0ull;
 			hba->active_port_wwn = 0ull;
 		}
 		retval = 0;
 		break;
 	}
 	case ISP_TSK_MGMT:
 	{
 		int needmarker;
 		struct isp_fc_tsk_mgmt *fct = (struct isp_fc_tsk_mgmt *) addr;
 		uint16_t loopid;
 		mbreg_t mbs;
 
 		if (IS_SCSI(isp)) {
 			break;
 		}
 
 		chan = fct->chan;
 		if (chan < 0 || chan >= isp->isp_nchan) {
 			retval = -ENXIO;
 			break;
 		}
 
 		needmarker = retval = 0;
 		loopid = fct->loopid;
 		ISP_LOCK(isp);
 		if (IS_24XX(isp)) {
 			uint8_t local[QENTRY_LEN];
 			isp24xx_tmf_t *tmf;
 			isp24xx_statusreq_t *sp;
 			fcparam *fcp = FCPARAM(isp, chan);
 			fcportdb_t *lp;
 			int i;
 
 			for (i = 0; i < MAX_FC_TARG; i++) {
 				lp = &fcp->portdb[i];
 				if (lp->handle == loopid) {
 					break;
 				}
 			}
 			if (i == MAX_FC_TARG) {
 				retval = ENXIO;
 				ISP_UNLOCK(isp);
 				break;
 			}
 			/* XXX VALIDATE LP XXX */
 			tmf = (isp24xx_tmf_t *) local;
 			ISP_MEMZERO(tmf, QENTRY_LEN);
 			tmf->tmf_header.rqs_entry_type = RQSTYPE_TSK_MGMT;
 			tmf->tmf_header.rqs_entry_count = 1;
 			tmf->tmf_nphdl = lp->handle;
 			tmf->tmf_delay = 2;
 			tmf->tmf_timeout = 2;
 			tmf->tmf_tidlo = lp->portid;
 			tmf->tmf_tidhi = lp->portid >> 16;
 			tmf->tmf_vpidx = ISP_GET_VPIDX(isp, chan);
 			tmf->tmf_lun[1] = fct->lun & 0xff;
 			if (fct->lun >= 256) {
 				tmf->tmf_lun[0] = 0x40 | (fct->lun >> 8);
 			}
 			switch (fct->action) {
 			case IPT_CLEAR_ACA:
 				tmf->tmf_flags = ISP24XX_TMF_CLEAR_ACA;
 				break;
 			case IPT_TARGET_RESET:
 				tmf->tmf_flags = ISP24XX_TMF_TARGET_RESET;
 				needmarker = 1;
 				break;
 			case IPT_LUN_RESET:
 				tmf->tmf_flags = ISP24XX_TMF_LUN_RESET;
 				needmarker = 1;
 				break;
 			case IPT_CLEAR_TASK_SET:
 				tmf->tmf_flags = ISP24XX_TMF_CLEAR_TASK_SET;
 				needmarker = 1;
 				break;
 			case IPT_ABORT_TASK_SET:
 				tmf->tmf_flags = ISP24XX_TMF_ABORT_TASK_SET;
 				needmarker = 1;
 				break;
 			default:
 				retval = EINVAL;
 				break;
 			}
 			if (retval) {
 				ISP_UNLOCK(isp);
 				break;
 			}
 			MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 5000000);
 			mbs.param[1] = QENTRY_LEN;
 			mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 			mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 			mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 			mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 
 			if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 				ISP_UNLOCK(isp);
 				retval = ENOMEM;
 				break;
 			}
 			isp_put_24xx_tmf(isp, tmf, fcp->isp_scratch);
 			MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan);
 			sp = (isp24xx_statusreq_t *) local;
 			sp->req_completion_status = 1;
 			retval = isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs);
 			MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan);
 			isp_get_24xx_response(isp, &((isp24xx_statusreq_t *)fcp->isp_scratch)[1], sp);
 			FC_SCRATCH_RELEASE(isp, chan);
 			if (retval || sp->req_completion_status != 0) {
 				FC_SCRATCH_RELEASE(isp, chan);
 				retval = EIO;
 			}
 			if (retval == 0) {
 				if (needmarker) {
 					fcp->sendmarker = 1;
 				}
 			}
 		} else {
 			MBSINIT(&mbs, 0, MBLOGALL, 0);
 			if (ISP_CAP_2KLOGIN(isp) == 0) {
 				loopid <<= 8;
 			}
 			switch (fct->action) {
 			case IPT_CLEAR_ACA:
 				mbs.param[0] = MBOX_CLEAR_ACA;
 				mbs.param[1] = loopid;
 				mbs.param[2] = fct->lun;
 				break;
 			case IPT_TARGET_RESET:
 				mbs.param[0] = MBOX_TARGET_RESET;
 				mbs.param[1] = loopid;
 				needmarker = 1;
 				break;
 			case IPT_LUN_RESET:
 				mbs.param[0] = MBOX_LUN_RESET;
 				mbs.param[1] = loopid;
 				mbs.param[2] = fct->lun;
 				needmarker = 1;
 				break;
 			case IPT_CLEAR_TASK_SET:
 				mbs.param[0] = MBOX_CLEAR_TASK_SET;
 				mbs.param[1] = loopid;
 				mbs.param[2] = fct->lun;
 				needmarker = 1;
 				break;
 			case IPT_ABORT_TASK_SET:
 				mbs.param[0] = MBOX_ABORT_TASK_SET;
 				mbs.param[1] = loopid;
 				mbs.param[2] = fct->lun;
 				needmarker = 1;
 				break;
 			default:
 				retval = EINVAL;
 				break;
 			}
 			if (retval == 0) {
 				if (needmarker) {
 					FCPARAM(isp, chan)->sendmarker = 1;
 				}
 				retval = isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs);
 				if (retval) {
 					retval = EIO;
 				}
 			}
 		}
 		ISP_UNLOCK(isp);
 		break;
 	}
 	default:
 		break;
 	}
 	return (retval);
 }
 
 static void
 isp_intr_enable(void *arg)
 {
 	int chan;
 	ispsoftc_t *isp = arg;
 	ISP_LOCK(isp);
 	for (chan = 0; chan < isp->isp_nchan; chan++) {
 		if (IS_FC(isp)) {
 			if (FCPARAM(isp, chan)->role != ISP_ROLE_NONE) {
 				ISP_ENABLE_INTS(isp);
 				break;
 			}
 		} else {
 			if (SDPARAM(isp, chan)->role != ISP_ROLE_NONE) {
 				ISP_ENABLE_INTS(isp);
 				break;
 			}
 		}
 	}
 	isp->isp_osinfo.ehook_active = 0;
 	ISP_UNLOCK(isp);
 	/* Release our hook so that the boot can continue. */
 	config_intrhook_disestablish(&isp->isp_osinfo.ehook);
 }
 
 /*
  * Local Inlines
  */
 
 static ISP_INLINE int isp_get_pcmd(ispsoftc_t *, union ccb *);
 static ISP_INLINE void isp_free_pcmd(ispsoftc_t *, union ccb *);
 
 static ISP_INLINE int
 isp_get_pcmd(ispsoftc_t *isp, union ccb *ccb)
 {
 	ISP_PCMD(ccb) = isp->isp_osinfo.pcmd_free;
 	if (ISP_PCMD(ccb) == NULL) {
 		return (-1);
 	}
 	isp->isp_osinfo.pcmd_free = ((struct isp_pcmd *)ISP_PCMD(ccb))->next;
 	return (0);
 }
 
 static ISP_INLINE void
 isp_free_pcmd(ispsoftc_t *isp, union ccb *ccb)
 {
 	if (ISP_PCMD(ccb)) {
 #ifdef	ISP_TARGET_MODE
 		PISP_PCMD(ccb)->datalen = 0;
 		PISP_PCMD(ccb)->totslen = 0;
 		PISP_PCMD(ccb)->cumslen = 0;
 		PISP_PCMD(ccb)->crn = 0;
 #endif
 		PISP_PCMD(ccb)->next = isp->isp_osinfo.pcmd_free;
 		isp->isp_osinfo.pcmd_free = ISP_PCMD(ccb);
 		ISP_PCMD(ccb) = NULL;
 	}
 }
 
 /*
  * Put the target mode functions here, because some are inlines
  */
 #ifdef	ISP_TARGET_MODE
 static ISP_INLINE void isp_tmlock(ispsoftc_t *, const char *);
 static ISP_INLINE void isp_tmunlk(ispsoftc_t *);
 static ISP_INLINE int is_any_lun_enabled(ispsoftc_t *, int);
 static ISP_INLINE int is_lun_enabled(ispsoftc_t *, int, lun_id_t);
 static ISP_INLINE tstate_t *get_lun_statep(ispsoftc_t *, int, lun_id_t);
 static ISP_INLINE tstate_t *get_lun_statep_from_tag(ispsoftc_t *, int, uint32_t);
 static ISP_INLINE void rls_lun_statep(ispsoftc_t *, tstate_t *);
 static ISP_INLINE inot_private_data_t *get_ntp_from_tagdata(ispsoftc_t *, uint32_t, uint32_t, tstate_t **);
 static ISP_INLINE atio_private_data_t *isp_get_atpd(ispsoftc_t *, tstate_t *, uint32_t);
 static ISP_INLINE atio_private_data_t *isp_find_atpd(ispsoftc_t *, tstate_t *, uint32_t);
 static ISP_INLINE void isp_put_atpd(ispsoftc_t *, tstate_t *, atio_private_data_t *);
 static ISP_INLINE inot_private_data_t *isp_get_ntpd(ispsoftc_t *, tstate_t *);
 static ISP_INLINE inot_private_data_t *isp_find_ntpd(ispsoftc_t *, tstate_t *, uint32_t, uint32_t);
 static ISP_INLINE void isp_put_ntpd(ispsoftc_t *, tstate_t *, inot_private_data_t *);
 static cam_status create_lun_state(ispsoftc_t *, int, struct cam_path *, tstate_t **);
 static void destroy_lun_state(ispsoftc_t *, tstate_t *);
 static void isp_enable_lun(ispsoftc_t *, union ccb *);
 static cam_status isp_enable_deferred_luns(ispsoftc_t *, int);
 static cam_status isp_enable_deferred(ispsoftc_t *, int, lun_id_t);
 static void isp_disable_lun(ispsoftc_t *, union ccb *);
 static int isp_enable_target_mode(ispsoftc_t *, int);
 static int isp_disable_target_mode(ispsoftc_t *, int);
 static void isp_ledone(ispsoftc_t *, lun_entry_t *);
 static timeout_t isp_refire_putback_atio;
 static timeout_t isp_refire_notify_ack;
 static void isp_complete_ctio(union ccb *);
 static void isp_target_putback_atio(union ccb *);
 enum Start_Ctio_How { FROM_CAM, FROM_TIMER, FROM_SRR, FROM_CTIO_DONE };
 static void isp_target_start_ctio(ispsoftc_t *, union ccb *, enum Start_Ctio_How);
 static void isp_handle_platform_atio(ispsoftc_t *, at_entry_t *);
 static void isp_handle_platform_atio2(ispsoftc_t *, at2_entry_t *);
 static void isp_handle_platform_atio7(ispsoftc_t *, at7_entry_t *);
 static void isp_handle_platform_ctio(ispsoftc_t *, void *);
 static void isp_handle_platform_notify_scsi(ispsoftc_t *, in_entry_t *);
 static void isp_handle_platform_notify_fc(ispsoftc_t *, in_fcentry_t *);
 static void isp_handle_platform_notify_24xx(ispsoftc_t *, in_fcentry_24xx_t *);
 static int isp_handle_platform_target_notify_ack(ispsoftc_t *, isp_notify_t *);
 static void isp_handle_platform_target_tmf(ispsoftc_t *, isp_notify_t *);
 static void isp_target_mark_aborted(ispsoftc_t *, union ccb *);
 static void isp_target_mark_aborted_early(ispsoftc_t *, tstate_t *, uint32_t);
 
 static ISP_INLINE void
 isp_tmlock(ispsoftc_t *isp, const char *msg)
 {
 	while (isp->isp_osinfo.tmbusy) {
 		isp->isp_osinfo.tmwanted = 1;
 		mtx_sleep(isp, &isp->isp_lock, PRIBIO, msg, 0);
 	}
 	isp->isp_osinfo.tmbusy = 1;
 }
 
 static ISP_INLINE void
 isp_tmunlk(ispsoftc_t *isp)
 {
 	isp->isp_osinfo.tmbusy = 0;
 	if (isp->isp_osinfo.tmwanted) {
 		isp->isp_osinfo.tmwanted = 0;
 		wakeup(isp);
 	}
 }
 
 static ISP_INLINE int
 is_any_lun_enabled(ispsoftc_t *isp, int bus)
 {
 	struct tslist *lhp;
 	int i;
 
 	for (i = 0; i < LUN_HASH_SIZE; i++) {
 		ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp);
 		if (SLIST_FIRST(lhp))
 			return (1);
 	}
 	return (0);
 }
 
 static ISP_INLINE int
 is_lun_enabled(ispsoftc_t *isp, int bus, lun_id_t lun)
 {
 	tstate_t *tptr;
 	struct tslist *lhp;
 
 	ISP_GET_PC_ADDR(isp, bus, lun_hash[LUN_HASH_FUNC(lun)], lhp);
 	SLIST_FOREACH(tptr, lhp, next) {
 		if (tptr->ts_lun == lun) {
 			return (1);
 		}
 	}
 	return (0);
 }
 
 static void
 dump_tstates(ispsoftc_t *isp, int bus)
 {
 	int i, j;
 	struct tslist *lhp;
 	tstate_t *tptr = NULL;
 
 	if (bus >= isp->isp_nchan) {
 		return;
 	}
 	for (i = 0; i < LUN_HASH_SIZE; i++) {
 		ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp);
 		j = 0;
 		SLIST_FOREACH(tptr, lhp, next) {
 			xpt_print(tptr->owner, "[%d, %d] atio_cnt=%d inot_cnt=%d\n", i, j, tptr->atio_count, tptr->inot_count);
 			j++;
 		}
 	}
 }
 
 static ISP_INLINE tstate_t *
 get_lun_statep(ispsoftc_t *isp, int bus, lun_id_t lun)
 {
 	tstate_t *tptr = NULL;
 	struct tslist *lhp;
 
 	if (bus < isp->isp_nchan) {
 		ISP_GET_PC_ADDR(isp, bus, lun_hash[LUN_HASH_FUNC(lun)], lhp);
 		SLIST_FOREACH(tptr, lhp, next) {
 			if (tptr->ts_lun == lun) {
 				tptr->hold++;
 				return (tptr);
 			}
 		}
 	}
 	return (NULL);
 }
 
 static ISP_INLINE tstate_t *
 get_lun_statep_from_tag(ispsoftc_t *isp, int bus, uint32_t tagval)
 {
 	tstate_t *tptr = NULL;
 	atio_private_data_t *atp;
 	struct tslist *lhp;
 	int i;
 
 	if (bus < isp->isp_nchan && tagval != 0) {
 		for (i = 0; i < LUN_HASH_SIZE; i++) {
 			ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp);
 			SLIST_FOREACH(tptr, lhp, next) {
 				atp = isp_find_atpd(isp, tptr, tagval);
 				if (atp) {
 					tptr->hold++;
 					return (tptr);
 				}
 			}
 		}
 	}
 	return (NULL);
 }
 
 static ISP_INLINE inot_private_data_t *
 get_ntp_from_tagdata(ispsoftc_t *isp, uint32_t tag_id, uint32_t seq_id, tstate_t **rslt)
 {
 	inot_private_data_t *ntp;
 	tstate_t *tptr;
 	struct tslist *lhp;
 	int bus, i;
 
 	for (bus = 0; bus < isp->isp_nchan; bus++) {
 		for (i = 0; i < LUN_HASH_SIZE; i++) {
 			ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp);
 			SLIST_FOREACH(tptr, lhp, next) {
 				ntp = isp_find_ntpd(isp, tptr, tag_id, seq_id);
 				if (ntp) {
 					*rslt = tptr;
 					tptr->hold++;
 					return (ntp);
 				}
 			}
 		}
 	}
 	return (NULL);
 }
 
 static ISP_INLINE void
 rls_lun_statep(ispsoftc_t *isp, tstate_t *tptr)
 {
 	KASSERT((tptr->hold), ("tptr not held"));
 	tptr->hold--;
 }
 
 static void
 isp_tmcmd_restart(ispsoftc_t *isp)
 {
 	inot_private_data_t *ntp;
 	inot_private_data_t *restart_queue;
 	tstate_t *tptr;
 	union ccb *ccb;
 	struct tslist *lhp;
 	int bus, i;
 
 	for (bus = 0; bus < isp->isp_nchan; bus++) {
 		for (i = 0; i < LUN_HASH_SIZE; i++) {
 			ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp);
 			SLIST_FOREACH(tptr, lhp, next) {
 				if ((restart_queue = tptr->restart_queue) != NULL)
 					tptr->restart_queue = NULL;
 				while (restart_queue) {
 					ntp = restart_queue;
 					restart_queue = ntp->rd.nt.nt_hba;
 					if (IS_24XX(isp)) {
 						isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at7_entry_t *)ntp->rd.data)->at_rxid);
 						isp_handle_platform_atio7(isp, (at7_entry_t *) ntp->rd.data);
 					} else {
 						isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at2_entry_t *)ntp->rd.data)->at_rxid);
 						isp_handle_platform_atio2(isp, (at2_entry_t *) ntp->rd.data);
 					}
 					isp_put_ntpd(isp, tptr, ntp);
 					if (tptr->restart_queue && restart_queue != NULL) {
 						ntp = tptr->restart_queue;
 						tptr->restart_queue = restart_queue;
 						while (restart_queue->rd.nt.nt_hba) {
 							restart_queue = restart_queue->rd.nt.nt_hba;
 						}
 						restart_queue->rd.nt.nt_hba = ntp;
 						break;
 					}
 				}
 				/*
 				 * We only need to do this once per tptr
 				 */
 				if (!TAILQ_EMPTY(&tptr->waitq)) {
 					ccb = (union ccb *)TAILQ_LAST(&tptr->waitq, isp_ccbq);
 					TAILQ_REMOVE(&tptr->waitq, &ccb->ccb_h, periph_links.tqe);
 					isp_target_start_ctio(isp, ccb, FROM_TIMER);
 				}
 			}
 		}
 	}
 }
 
 static ISP_INLINE atio_private_data_t *
 isp_get_atpd(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag)
 {
 	atio_private_data_t *atp;
 
 	atp = LIST_FIRST(&tptr->atfree);
 	if (atp) {
 		LIST_REMOVE(atp, next);
 		atp->tag = tag;
 		LIST_INSERT_HEAD(&tptr->atused[ATPDPHASH(tag)], atp, next);
 	}
 	return (atp);
 }
 
 static ISP_INLINE atio_private_data_t *
 isp_find_atpd(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag)
 {
 	atio_private_data_t *atp;
 
 	LIST_FOREACH(atp, &tptr->atused[ATPDPHASH(tag)], next) {
 		if (atp->tag == tag)
 			return (atp);
 	}
 	return (NULL);
 }
 
 static ISP_INLINE void
 isp_put_atpd(ispsoftc_t *isp, tstate_t *tptr, atio_private_data_t *atp)
 {
 	if (atp->ests) {
 		isp_put_ecmd(isp, atp->ests);
 	}
 	LIST_REMOVE(atp, next);
 	memset(atp, 0, sizeof (*atp));
 	LIST_INSERT_HEAD(&tptr->atfree, atp, next);
 }
 
 static void
 isp_dump_atpd(ispsoftc_t *isp, tstate_t *tptr)
 {
 	atio_private_data_t *atp;
 	const char *states[8] = { "Free", "ATIO", "CAM", "CTIO", "LAST_CTIO", "PDON", "?6", "7" };
 
 	for (atp = tptr->atpool; atp < &tptr->atpool[ATPDPSIZE]; atp++) {
 		xpt_print(tptr->owner, "ATP: [0x%x] origdlen %u bytes_xfrd %u lun %u nphdl 0x%04x s_id 0x%06x d_id 0x%06x oxid 0x%04x state %s\n",
 		    atp->tag, atp->orig_datalen, atp->bytes_xfered, atp->lun, atp->nphdl, atp->sid, atp->portid, atp->oxid, states[atp->state & 0x7]);
 	}
 }
 
 
 static ISP_INLINE inot_private_data_t *
 isp_get_ntpd(ispsoftc_t *isp, tstate_t *tptr)
 {
 	inot_private_data_t *ntp;
 	ntp = tptr->ntfree;
 	if (ntp) {
 		tptr->ntfree = ntp->next;
 	}
 	return (ntp);
 }
 
 static ISP_INLINE inot_private_data_t *
 isp_find_ntpd(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag_id, uint32_t seq_id)
 {
 	inot_private_data_t *ntp;
 	for (ntp = tptr->ntpool; ntp < &tptr->ntpool[ATPDPSIZE]; ntp++) {
 		if (ntp->rd.tag_id == tag_id && ntp->rd.seq_id == seq_id) {
 			return (ntp);
 		}
 	}
 	return (NULL);
 }
 
 static ISP_INLINE void
 isp_put_ntpd(ispsoftc_t *isp, tstate_t *tptr, inot_private_data_t *ntp)
 {
 	ntp->rd.tag_id = ntp->rd.seq_id = 0;
 	ntp->next = tptr->ntfree;
 	tptr->ntfree = ntp;
 }
 
 static cam_status
 create_lun_state(ispsoftc_t *isp, int bus, struct cam_path *path, tstate_t **rslt)
 {
 	cam_status status;
 	lun_id_t lun;
 	struct tslist *lhp;
 	tstate_t *tptr;
 	int i;
 
 	lun = xpt_path_lun_id(path);
 	if (lun != CAM_LUN_WILDCARD) {
 		if (ISP_MAX_LUNS(isp) > 0 && lun >= ISP_MAX_LUNS(isp)) {
 			return (CAM_LUN_INVALID);
 		}
 	}
 	if (is_lun_enabled(isp, bus, lun)) {
 		return (CAM_LUN_ALRDY_ENA);
 	}
 	tptr = malloc(sizeof (tstate_t), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (tptr == NULL) {
 		return (CAM_RESRC_UNAVAIL);
 	}
 	tptr->ts_lun = lun;
 	status = xpt_create_path(&tptr->owner, NULL, xpt_path_path_id(path), xpt_path_target_id(path), lun);
 	if (status != CAM_REQ_CMP) {
 		free(tptr, M_DEVBUF);
 		return (status);
 	}
 	SLIST_INIT(&tptr->atios);
 	SLIST_INIT(&tptr->inots);
 	TAILQ_INIT(&tptr->waitq);
 	LIST_INIT(&tptr->atfree);
 	for (i = ATPDPSIZE-1; i >= 0; i--)
 		LIST_INSERT_HEAD(&tptr->atfree, &tptr->atpool[i], next);
 	for (i = 0; i < ATPDPHASHSIZE; i++)
 		LIST_INIT(&tptr->atused[i]);
 	for (i = 0; i < ATPDPSIZE-1; i++)
 		tptr->ntpool[i].next = &tptr->ntpool[i+1];
 	tptr->ntfree = tptr->ntpool;
 	tptr->hold = 1;
 	ISP_GET_PC_ADDR(isp, bus, lun_hash[LUN_HASH_FUNC(lun)], lhp);
 	SLIST_INSERT_HEAD(lhp, tptr, next);
 	*rslt = tptr;
 	ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, path, "created tstate\n");
 	return (CAM_REQ_CMP);
 }
 
 static ISP_INLINE void
 destroy_lun_state(ispsoftc_t *isp, tstate_t *tptr)
 {
 	union ccb *ccb;
 	struct tslist *lhp;
 
 	KASSERT((tptr->hold != 0), ("tptr is not held"));
 	KASSERT((tptr->hold == 1), ("tptr still held (%d)", tptr->hold));
 	do {
 		ccb = (union ccb *)SLIST_FIRST(&tptr->atios);
 		if (ccb) {
 			SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle);
 			ccb->ccb_h.status = CAM_REQ_ABORTED;
 			xpt_done(ccb);
 		}
 	} while (ccb);
 	do {
 		ccb = (union ccb *)SLIST_FIRST(&tptr->inots);
 		if (ccb) {
 			SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle);
 			ccb->ccb_h.status = CAM_REQ_ABORTED;
 			xpt_done(ccb);
 		}
 	} while (ccb);
 	ISP_GET_PC_ADDR(isp, cam_sim_bus(xpt_path_sim(tptr->owner)), lun_hash[LUN_HASH_FUNC(tptr->ts_lun)], lhp);
 	SLIST_REMOVE(lhp, tptr, tstate, next);
 	ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, tptr->owner, "destroyed tstate\n");
 	xpt_free_path(tptr->owner);
 	free(tptr, M_DEVBUF);
 }
 
 /*
  * Enable a lun.
  */
 static void
 isp_enable_lun(ispsoftc_t *isp, union ccb *ccb)
 {
 	tstate_t *tptr = NULL;
 	int bus, tm_enabled, target_role;
 	target_id_t target;
 	lun_id_t lun;
 
 
 	/*
 	 * We only support either a wildcard target/lun or a target ID of zero and a non-wildcard lun
 	 */
 	bus = XS_CHANNEL(ccb);
 	target = ccb->ccb_h.target_id;
 	lun = ccb->ccb_h.target_lun;
 	ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path,
 	    "enabling lun %jx\n", (uintmax_t)lun);
 	if (target == CAM_TARGET_WILDCARD && lun != CAM_LUN_WILDCARD) {
 		ccb->ccb_h.status = CAM_LUN_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 
 	if (target != CAM_TARGET_WILDCARD && lun == CAM_LUN_WILDCARD) {
 		ccb->ccb_h.status = CAM_LUN_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 	if (isp->isp_dblev & ISP_LOGTDEBUG0) {
 		xpt_print(ccb->ccb_h.path,
 		    "enabling lun 0x%jx on channel %d\n", (uintmax_t)lun, bus);
 	}
 
 	/*
 	 * Wait until we're not busy with the lun enables subsystem
 	 */
 	isp_tmlock(isp, "isp_enable_lun");
 
 	/*
 	 * This is as a good a place as any to check f/w capabilities.
 	 */
 
 	if (IS_FC(isp)) {
 		if (ISP_CAP_TMODE(isp) == 0) {
 			xpt_print(ccb->ccb_h.path, "firmware does not support target mode\n");
 			ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
 			goto done;
 		}
 		/*
 		 * We *could* handle non-SCCLUN f/w, but we'd have to
 		 * dork with our already fragile enable/disable code.
 		 */
 		if (ISP_CAP_SCCFW(isp) == 0) {
 			xpt_print(ccb->ccb_h.path, "firmware not SCCLUN capable\n");
 			ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
 			goto done;
 		}
 
 		target_role = (FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) != 0;
 
 	} else {
 		target_role = (SDPARAM(isp, bus)->role & ISP_ROLE_TARGET) != 0;
 	}
 
 	/*
 	 * Create the state pointer.
 	 * It should not already exist.
 	 */
 	tptr = get_lun_statep(isp, bus, lun);
 	if (tptr) {
 		ccb->ccb_h.status = CAM_LUN_ALRDY_ENA;
 		goto done;
 	}
 	ccb->ccb_h.status = create_lun_state(isp, bus, ccb->ccb_h.path, &tptr);
 	if (ccb->ccb_h.status != CAM_REQ_CMP) {
 		goto done;
 	}
 
 	/*
 	 * We have a tricky maneuver to perform here.
 	 *
 	 * If target mode isn't already enabled here,
 	 * *and* our current role includes target mode,
 	 * we enable target mode here.
 	 *
 	 */
 	ISP_GET_PC(isp, bus, tm_enabled, tm_enabled);
 	if (tm_enabled == 0 && target_role != 0) {
 		if (isp_enable_target_mode(isp, bus)) {
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 			destroy_lun_state(isp, tptr);
 			tptr = NULL;
 			goto done;
 		}
 		tm_enabled = 1;
 	}
 
 	/*
 	 * Now check to see whether this bus is in target mode already.
 	 *
 	 * If not, a later role change into target mode will finish the job.
 	 */
 	if (tm_enabled == 0) {
 		ISP_SET_PC(isp, bus, tm_enable_defer, 1);
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_print(ccb->ccb_h.path, "Target Mode not enabled yet- lun enable deferred\n");
 		goto done1;
 	}
 
 	/*
 	 * Enable the lun.
 	 */
 	ccb->ccb_h.status = isp_enable_deferred(isp, bus, lun);
 
 done:
 	if (ccb->ccb_h.status != CAM_REQ_CMP)  {
 		if (tptr) {
 			destroy_lun_state(isp, tptr);
 			tptr = NULL;
 		}
 	} else {
 		tptr->enabled = 1;
 	}
 done1:
 	if (tptr) {
 		rls_lun_statep(isp, tptr);
 	}
 
 	/*
 	 * And we're outta here....
 	 */
 	isp_tmunlk(isp);
 	xpt_done(ccb);
 }
 
 static cam_status
 isp_enable_deferred_luns(ispsoftc_t *isp, int bus)
 {
 	tstate_t *tptr = NULL;
 	struct tslist *lhp;
 	int i, n;
 
 
 	ISP_GET_PC(isp, bus, tm_enabled, i);
 	if (i == 1) {
 		return (CAM_REQ_CMP);
 	}
 	ISP_GET_PC(isp, bus, tm_enable_defer, i);
 	if (i == 0) {
 		return (CAM_REQ_CMP);
 	}
 	/*
 	 * If this succeeds, it will set tm_enable
 	 */
 	if (isp_enable_target_mode(isp, bus)) {
 		return (CAM_REQ_CMP_ERR);
 	}
 	isp_tmlock(isp, "isp_enable_deferred_luns");
 	for (n = i = 0; i < LUN_HASH_SIZE; i++) {
 		ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp);
 		SLIST_FOREACH(tptr, lhp, next) {
 			tptr->hold++;
 			if (tptr->enabled == 0) {
 				if (isp_enable_deferred(isp, bus, tptr->ts_lun) == CAM_REQ_CMP) {
 					tptr->enabled = 1;
 					n++;
 				}
 			} else {
 				n++;
 			}
 			tptr->hold--;
 		}
 	}
 	isp_tmunlk(isp);
 	if (n == 0) {
 		return (CAM_REQ_CMP_ERR);
 	}
 	ISP_SET_PC(isp, bus, tm_enable_defer, 0);
 	return (CAM_REQ_CMP);
 }
 
 static cam_status
 isp_enable_deferred(ispsoftc_t *isp, int bus, lun_id_t lun)
 {
 	cam_status status;
 	int luns_already_enabled;
 
 	ISP_GET_PC(isp, bus, tm_luns_enabled, luns_already_enabled);
 	isp_prt(isp, ISP_LOGTINFO, "%s: bus %d lun %jx luns_enabled %d", __func__, bus, (uintmax_t)lun, luns_already_enabled);
 	if (IS_23XX(isp) || IS_24XX(isp) ||
 	    (IS_FC(isp) && luns_already_enabled)) {
 		status = CAM_REQ_CMP;
 	} else {
 		int cmd_cnt, not_cnt;
 
 		if (IS_23XX(isp)) {
 			cmd_cnt = DFLT_CMND_CNT;
 			not_cnt = DFLT_INOT_CNT;
 		} else {
 			cmd_cnt = 64;
 			not_cnt = 8;
 		}
 		status = CAM_REQ_INPROG;
 		isp->isp_osinfo.rptr = &status;
 		if (isp_lun_cmd(isp, RQSTYPE_ENABLE_LUN, bus, lun == CAM_LUN_WILDCARD? 0 : lun, cmd_cnt, not_cnt)) {
 			status = CAM_RESRC_UNAVAIL;
 		} else {
 			mtx_sleep(&status, &isp->isp_lock, PRIBIO, "isp_enable_deferred", 0);
 		}
 		isp->isp_osinfo.rptr = NULL;
 	}
 	if (status == CAM_REQ_CMP) {
 		ISP_SET_PC(isp, bus, tm_luns_enabled, 1);
 		isp_prt(isp, ISP_LOGCONFIG|ISP_LOGTINFO, "bus %d lun %jx now enabled for target mode", bus, (uintmax_t)lun);
 	}
 	return (status);
 }
 
 static void
 isp_disable_lun(ispsoftc_t *isp, union ccb *ccb)
 {
 	tstate_t *tptr = NULL;
 	int bus;
 	cam_status status;
 	target_id_t target;
 	lun_id_t lun;
 
 	bus = XS_CHANNEL(ccb);
 	target = ccb->ccb_h.target_id;
 	lun = ccb->ccb_h.target_lun;
 	ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path,
 	    "disabling lun %jx\n", (uintmax_t)lun);
 	if (target == CAM_TARGET_WILDCARD && lun != CAM_LUN_WILDCARD) {
 		ccb->ccb_h.status = CAM_LUN_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 
 	if (target != CAM_TARGET_WILDCARD && lun == CAM_LUN_WILDCARD) {
 		ccb->ccb_h.status = CAM_LUN_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 
 	/*
 	 * See if we're busy disabling a lun now.
 	 */
 	isp_tmlock(isp, "isp_disable_lun");
 	status = CAM_REQ_INPROG;
 
 	/*
 	 * Find the state pointer.
 	 */
 	if ((tptr = get_lun_statep(isp, bus, lun)) == NULL) {
 		status = CAM_PATH_INVALID;
 		goto done;
 	}
 
 	/*
 	 * If we're a 24XX card, we're done.
 	 */
 	if (IS_23XX(isp) || IS_24XX(isp)) {
 		status = CAM_REQ_CMP;
 		goto done;
 	}
 
 	/*
 	 * For SCC FW, we only deal with lun zero.
 	 */
 	if (IS_FC(isp) && lun > 0) {
 		status = CAM_REQ_CMP;
 		goto done;
 	}
 	isp->isp_osinfo.rptr = &status;
 	if (isp_lun_cmd(isp, RQSTYPE_ENABLE_LUN, bus, lun, 0, 0)) {
 		status = CAM_RESRC_UNAVAIL;
 	} else {
 		mtx_sleep(&status, &isp->isp_lock, PRIBIO, "isp_disable_lun", 0);
 	}
 	isp->isp_osinfo.rptr = NULL;
 done:
 	if (status == CAM_REQ_CMP) {
 		tptr->enabled = 0;
 		if (is_any_lun_enabled(isp, bus) == 0) {
 			if (isp_disable_target_mode(isp, bus)) {
 				status = CAM_REQ_CMP_ERR;
 			}
 		}
 	}
 	ccb->ccb_h.status = status;
 	if (status == CAM_REQ_CMP) {
 		destroy_lun_state(isp, tptr);
 		xpt_print(ccb->ccb_h.path, "lun now disabled for target mode\n");
 	} else {
 		if (tptr)
 			rls_lun_statep(isp, tptr);
 	}
 	isp_tmunlk(isp);
 	xpt_done(ccb);
 }
 
 static int
 isp_enable_target_mode(ispsoftc_t *isp, int bus)
 {
 	int tm_enabled;
 
 	ISP_GET_PC(isp, bus, tm_enabled, tm_enabled);
 	if (tm_enabled != 0) {
 		return (0);
 	}
 	if (IS_SCSI(isp)) {
 		mbreg_t mbs;
 		MBSINIT(&mbs, MBOX_ENABLE_TARGET_MODE, MBLOGALL, 0);
 		mbs.param[0] = MBOX_ENABLE_TARGET_MODE;
 		mbs.param[1] = ENABLE_TARGET_FLAG|ENABLE_TQING_FLAG;
 		mbs.param[2] = bus << 7;
 		if (isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs) < 0 || mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			isp_prt(isp, ISP_LOGERR, "Unable to enable Target Role on Bus %d", bus);
 			return (EIO);
 		}
 	}
 	ISP_SET_PC(isp, bus, tm_enabled, 1);
 	isp_prt(isp, ISP_LOGINFO, "Target Role enabled on Bus %d", bus);
 	return (0);
 }
 
 static int
 isp_disable_target_mode(ispsoftc_t *isp, int bus)
 {
 	int tm_enabled;
 
 	ISP_GET_PC(isp, bus, tm_enabled, tm_enabled);
 	if (tm_enabled == 0) {
 		return (0);
 	}
 	if (IS_SCSI(isp)) {
 		mbreg_t mbs;
 		MBSINIT(&mbs, MBOX_ENABLE_TARGET_MODE, MBLOGALL, 0);
 		mbs.param[2] = bus << 7;
 		if (isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs) < 0 || mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 			isp_prt(isp, ISP_LOGERR, "Unable to disable Target Role on Bus %d", bus);
 			return (EIO);
 		}
 	}
 	ISP_SET_PC(isp, bus, tm_enabled, 0);
 	isp_prt(isp, ISP_LOGINFO, "Target Role disabled on Bus %d", bus);
 	return (0);
 }
 
 static void
 isp_ledone(ispsoftc_t *isp, lun_entry_t *lep)
 {
 	uint32_t *rptr;
 
 	rptr = isp->isp_osinfo.rptr;
 	if (lep->le_status != LUN_OK) {
 		isp_prt(isp, ISP_LOGERR, "ENABLE/MODIFY LUN returned 0x%x", lep->le_status);
 		if (rptr) {
 			*rptr = CAM_REQ_CMP_ERR;
 			wakeup_one(rptr);
 		}
 	} else {
 		if (rptr) {
 			*rptr = CAM_REQ_CMP;
 			wakeup_one(rptr);
 		}
 	}
 }
 
 static void
 isp_target_start_ctio(ispsoftc_t *isp, union ccb *ccb, enum Start_Ctio_How how)
 {
 	int fctape, sendstatus, resid;
 	tstate_t *tptr;
 	fcparam *fcp;
 	atio_private_data_t *atp;
 	struct ccb_scsiio *cso;
 	uint32_t dmaresult, handle, xfrlen, sense_length, tmp;
 	uint8_t local[QENTRY_LEN];
 
 	tptr = get_lun_statep(isp, XS_CHANNEL(ccb), XS_LUN(ccb));
 	if (tptr == NULL) {
 		tptr = get_lun_statep(isp, XS_CHANNEL(ccb), CAM_LUN_WILDCARD);
 		if (tptr == NULL) {
 			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] cannot find tstate pointer", __func__, ccb->csio.tag_id);
 			ccb->ccb_h.status = CAM_DEV_NOT_THERE;
 			xpt_done(ccb);
 			return;
 		}
 	}
 	isp_prt(isp, ISP_LOGTDEBUG0, "%s: ENTRY[0x%x] how %u xfrlen %u sendstatus %d sense_len %u", __func__, ccb->csio.tag_id, how, ccb->csio.dxfer_len,
 	    (ccb->ccb_h.flags & CAM_SEND_STATUS) != 0, ((ccb->ccb_h.flags & CAM_SEND_SENSE)? ccb->csio.sense_len : 0));
 
 	switch (how) {
 	case FROM_TIMER:
 	case FROM_CAM:
 		/*
 		 * Insert at the tail of the list, if any, waiting CTIO CCBs
 		 */
 		TAILQ_INSERT_TAIL(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 		break;
 	case FROM_SRR:
 	case FROM_CTIO_DONE:
 		TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 		break;
 	}
 
 	while (TAILQ_FIRST(&tptr->waitq) != NULL) {
 		ccb = (union ccb *) TAILQ_FIRST(&tptr->waitq);
 		TAILQ_REMOVE(&tptr->waitq, &ccb->ccb_h, periph_links.tqe);
 
 		cso = &ccb->csio;
 		xfrlen = cso->dxfer_len;
 		if (xfrlen == 0) {
 			if ((ccb->ccb_h.flags & CAM_SEND_STATUS) == 0) {
 				ISP_PATH_PRT(isp, ISP_LOGERR, ccb->ccb_h.path, "a data transfer length of zero but no status to send is wrong\n");
 				ccb->ccb_h.status = CAM_REQ_INVALID;
 				xpt_done(ccb);
 				continue;
 			}
 		}
 
 		atp = isp_find_atpd(isp, tptr, cso->tag_id);
 		if (atp == NULL) {
 			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] cannot find private data adjunct in %s", __func__, cso->tag_id, __func__);
 			isp_dump_atpd(isp, tptr);
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 			xpt_done(ccb);
 			continue;
 		}
 
 		/*
 		 * Is this command a dead duck?
 		 */
 		if (atp->dead) {
 			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] not sending a CTIO for a dead command", __func__, cso->tag_id);
 			ccb->ccb_h.status = CAM_REQ_ABORTED;
 			xpt_done(ccb);
 			continue;
 		}
 
 		/*
 		 * Check to make sure we're still in target mode.
 		 */
 		fcp = FCPARAM(isp, XS_CHANNEL(ccb));
 		if ((fcp->role & ISP_ROLE_TARGET) == 0) {
 			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] stopping sending a CTIO because we're no longer in target mode", __func__, cso->tag_id);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			xpt_done(ccb);
 			continue;
 		}
 
 		/*
 		 * We're only handling ATPD_CCB_OUTSTANDING outstanding CCB at a time (one of which
 		 * could be split into two CTIOs to split data and status).
 		 */
 		if (atp->ctcnt >= ATPD_CCB_OUTSTANDING) {
 			isp_prt(isp, ISP_LOGTINFO, "[0x%x] handling only %d CCBs at a time (flags for this ccb: 0x%x)", cso->tag_id, ATPD_CCB_OUTSTANDING, ccb->ccb_h.flags);
 			TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 			break;
 		}
 
 		/*
 		 * Does the initiator expect FC-Tape style responses?
 		 */
 		if ((atp->word3 & PRLI_WD3_RETRY) && fcp->fctape_enabled) {
 			fctape = 1;
 		} else {
 			fctape = 0;
 		}
 
 		/*
 		 * If we already did the data xfer portion of a CTIO that sends data
 		 * and status, don't do it again and do the status portion now.
 		 */
 		if (atp->sendst) {
 			isp_prt(isp, ISP_LOGTINFO, "[0x%x] now sending synthesized status orig_dl=%u xfered=%u bit=%u",
 			    cso->tag_id, atp->orig_datalen, atp->bytes_xfered, atp->bytes_in_transit);
 			xfrlen = 0;	/* we already did the data transfer */
 			atp->sendst = 0;
 		}
 		if (ccb->ccb_h.flags & CAM_SEND_STATUS) {
 			sendstatus = 1;
 		} else {
 			sendstatus = 0;
 		}
 
 		if (ccb->ccb_h.flags & CAM_SEND_SENSE) {
 			KASSERT((sendstatus != 0), ("how can you have CAM_SEND_SENSE w/o CAM_SEND_STATUS?"));
 			/*
 			 * Sense length is not the entire sense data structure size. Periph
 			 * drivers don't seem to be setting sense_len to reflect the actual
 			 * size. We'll peek inside to get the right amount.
 			 */
 			sense_length = cso->sense_len;
 
 			/*
 			 * This 'cannot' happen
 			 */
 			if (sense_length > (XCMD_SIZE - MIN_FCP_RESPONSE_SIZE)) {
 				sense_length = XCMD_SIZE - MIN_FCP_RESPONSE_SIZE;
 			}
 		} else {
 			sense_length = 0;
 		}
 
 		memset(local, 0, QENTRY_LEN);
 
 		/*
 		 * Check for overflow
 		 */
 		tmp = atp->bytes_xfered + atp->bytes_in_transit + xfrlen;
 		if (tmp > atp->orig_datalen) {
 			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] data overflow by %u bytes", __func__, cso->tag_id, tmp - atp->orig_datalen);
 			ccb->ccb_h.status = CAM_DATA_RUN_ERR;
 			xpt_done(ccb);
 			continue;
 		}
 
 		if (IS_24XX(isp)) {
 			ct7_entry_t *cto = (ct7_entry_t *) local;
 
 			cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7;
 			cto->ct_header.rqs_entry_count = 1;
 			cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM;
 			ATPD_SET_SEQNO(cto, atp);
 			cto->ct_nphdl = atp->nphdl;
 			cto->ct_rxid = atp->tag;
 			cto->ct_iid_lo = atp->portid;
 			cto->ct_iid_hi = atp->portid >> 16;
 			cto->ct_oxid = atp->oxid;
 			cto->ct_vpidx = ISP_GET_VPIDX(isp, XS_CHANNEL(ccb));
 			cto->ct_timeout = 120;
 			cto->ct_flags = atp->tattr << CT7_TASK_ATTR_SHIFT;
 
 			/*
 			 * Mode 1, status, no data. Only possible when we are sending status, have
 			 * no data to transfer, and any sense data can fit into a ct7_entry_t.
 			 *
 			 * Mode 2, status, no data. We have to use this in the case that
 			 * the sense data won't fit into a ct7_entry_t.
 			 *
 			 */
 			if (sendstatus && xfrlen == 0) {
 				cto->ct_flags |= CT7_SENDSTATUS | CT7_NO_DATA;
 				resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit;
 				if (sense_length <= MAXRESPLEN_24XX) {
 					if (resid < 0) {
 						cto->ct_resid = -resid;
 					} else if (resid > 0) {
 						cto->ct_resid = resid;
 					}
 					cto->ct_flags |= CT7_FLAG_MODE1;
 					cto->ct_scsi_status = cso->scsi_status;
 					if (resid < 0) {
 						cto->ct_scsi_status |= (FCP_RESID_OVERFLOW << 8);
 					} else if (resid > 0) {
 						cto->ct_scsi_status |= (FCP_RESID_UNDERFLOW << 8);
 					}
 					if (fctape) {
 						cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF;
 					}
 					if (sense_length) {
 						cto->ct_scsi_status |= (FCP_SNSLEN_VALID << 8);
 						cto->rsp.m1.ct_resplen = cto->ct_senselen = sense_length;
 						memcpy(cto->rsp.m1.ct_resp, &cso->sense_data, sense_length);
 					}
 				} else {
 					bus_addr_t addr;
 					char buf[XCMD_SIZE];
 					fcp_rsp_iu_t *rp;
 
 					if (atp->ests == NULL) {
 						atp->ests = isp_get_ecmd(isp);
 						if (atp->ests == NULL) {
 							TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 							break;
 						}
 					}
 					memset(buf, 0, sizeof (buf));
 					rp = (fcp_rsp_iu_t *)buf;
 					if (fctape) {
 						cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF;
 						rp->fcp_rsp_bits |= FCP_CONF_REQ;
 					}
 					cto->ct_flags |= CT7_FLAG_MODE2;
 	        			rp->fcp_rsp_scsi_status = cso->scsi_status;
 					if (resid < 0) {
 						rp->fcp_rsp_resid = -resid;
 						rp->fcp_rsp_bits |= FCP_RESID_OVERFLOW;
 					} else if (resid > 0) {
 						rp->fcp_rsp_resid = resid;
 						rp->fcp_rsp_bits |= FCP_RESID_UNDERFLOW;
 					}
 					if (sense_length) {
 	        				rp->fcp_rsp_snslen = sense_length;
 						cto->ct_senselen = sense_length;
 						rp->fcp_rsp_bits |= FCP_SNSLEN_VALID;
 						isp_put_fcp_rsp_iu(isp, rp, atp->ests);
 						memcpy(((fcp_rsp_iu_t *)atp->ests)->fcp_rsp_extra, &cso->sense_data, sense_length);
 					} else {
 						isp_put_fcp_rsp_iu(isp, rp, atp->ests);
 					}
 					if (isp->isp_dblev & ISP_LOGTDEBUG1) {
 						isp_print_bytes(isp, "FCP Response Frame After Swizzling", MIN_FCP_RESPONSE_SIZE + sense_length, atp->ests);
 					}
 					addr = isp->isp_osinfo.ecmd_dma;
 					addr += ((((isp_ecmd_t *)atp->ests) - isp->isp_osinfo.ecmd_base) * XCMD_SIZE);
 					isp_prt(isp, ISP_LOGTDEBUG0, "%s: ests base %p vaddr %p ecmd_dma %jx addr %jx len %u", __func__, isp->isp_osinfo.ecmd_base, atp->ests,
 					    (uintmax_t) isp->isp_osinfo.ecmd_dma, (uintmax_t)addr, MIN_FCP_RESPONSE_SIZE + sense_length);
 					cto->rsp.m2.ct_datalen = MIN_FCP_RESPONSE_SIZE + sense_length;
 					cto->rsp.m2.ct_fcp_rsp_iudata.ds_base = DMA_LO32(addr);
 					cto->rsp.m2.ct_fcp_rsp_iudata.ds_basehi = DMA_HI32(addr);
 					cto->rsp.m2.ct_fcp_rsp_iudata.ds_count = MIN_FCP_RESPONSE_SIZE + sense_length;
 				}
 				if (sense_length) {
 					isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d slen %u sense: %x %x/%x/%x", __func__,
 					    cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, cto->ct_resid, sense_length,
 					    cso->sense_data.error_code, cso->sense_data.sense_buf[1], cso->sense_data.sense_buf[11], cso->sense_data.sense_buf[12]);
 				} else {
 					isp_prt(isp, ISP_LOGDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d", __func__,
 					    cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, cto->ct_resid);
 				}
 				atp->state = ATPD_STATE_LAST_CTIO;
 			}
 
 			/*
 			 * Mode 0 data transfers, *possibly* with status.
 			 */
 			if (xfrlen != 0) {
 				cto->ct_flags |= CT7_FLAG_MODE0;
 				if ((cso->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
 					cto->ct_flags |= CT7_DATA_IN;
 				} else {
 					cto->ct_flags |= CT7_DATA_OUT;
 				}
 
 				cto->rsp.m0.reloff = atp->bytes_xfered + atp->bytes_in_transit;
 				cto->rsp.m0.ct_xfrlen = xfrlen;
 
 #ifdef	DEBUG
 				if (ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame && xfrlen > ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame) {
 					isp_prt(isp, ISP_LOGWARN, "%s: truncating data frame with xfrlen %d to %d", __func__, xfrlen, xfrlen - (xfrlen >> 2));
 					ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame = 0;
 					cto->rsp.m0.ct_xfrlen -= xfrlen >> 2;
 				}
 #endif
 				if (sendstatus) {
 					resid = atp->orig_datalen - atp->bytes_xfered - xfrlen;
 					if (cso->scsi_status == SCSI_STATUS_OK && resid == 0 /* && fctape == 0 */) {
 						cto->ct_flags |= CT7_SENDSTATUS;
 						atp->state = ATPD_STATE_LAST_CTIO;
 						if (fctape) {
 							cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF;
 						}
 					} else {
 						atp->sendst = 1;	/* send status later */
 						cto->ct_header.rqs_seqno &= ~ATPD_SEQ_NOTIFY_CAM;
 						atp->state = ATPD_STATE_CTIO;
 					}
 				} else {
 					atp->state = ATPD_STATE_CTIO;
 				}
 				isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x xfrlen=%u off=%u", __func__,
 				    cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, xfrlen, atp->bytes_xfered);
 			}
 		} else if (IS_FC(isp)) {
 			ct2_entry_t *cto = (ct2_entry_t *) local;
 
 			if (isp->isp_osinfo.sixtyfourbit)
 				cto->ct_header.rqs_entry_type = RQSTYPE_CTIO3;
 			else
 				cto->ct_header.rqs_entry_type = RQSTYPE_CTIO2;
 			cto->ct_header.rqs_entry_count = 1;
 			cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM;
 			ATPD_SET_SEQNO(cto, atp);
 			if (ISP_CAP_2KLOGIN(isp)) {
 				((ct2e_entry_t *)cto)->ct_iid = atp->nphdl;
 			} else {
 				cto->ct_iid = atp->nphdl;
 				if (ISP_CAP_SCCFW(isp) == 0) {
 					cto->ct_lun = ccb->ccb_h.target_lun;
 				}
 			}
 			cto->ct_timeout = 10;
 			cto->ct_rxid = cso->tag_id;
 
 			/*
 			 * Mode 1, status, no data. Only possible when we are sending status, have
 			 * no data to transfer, and the sense length can fit in the ct7_entry.
 			 *
 			 * Mode 2, status, no data. We have to use this in the case the response
 			 * length won't fit into a ct2_entry_t.
 			 *
 			 * We'll fill out this structure with information as if this were a
 			 * Mode 1. The hardware layer will create the Mode 2 FCP RSP IU as
 			 * needed based upon this.
 			 */
 			if (sendstatus && xfrlen == 0) {
 				cto->ct_flags |= CT2_SENDSTATUS | CT2_NO_DATA;
 				resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit;
 				if (sense_length <= MAXRESPLEN) {
 					if (resid < 0) {
 						cto->ct_resid = -resid;
 					} else if (resid > 0) {
 						cto->ct_resid = resid;
 					}
 					cto->ct_flags |= CT2_FLAG_MODE1;
 					cto->rsp.m1.ct_scsi_status = cso->scsi_status;
 					if (resid < 0) {
 						cto->rsp.m1.ct_scsi_status |= CT2_DATA_OVER;
 					} else if (resid > 0) {
 						cto->rsp.m1.ct_scsi_status |= CT2_DATA_UNDER;
 					}
 					if (fctape) {
 						cto->ct_flags |= CT2_CONFIRM;
 					}
 					if (sense_length) {
 						cto->rsp.m1.ct_scsi_status |= CT2_SNSLEN_VALID;
 						cto->rsp.m1.ct_resplen = cto->rsp.m1.ct_senselen = sense_length;
 						memcpy(cto->rsp.m1.ct_resp, &cso->sense_data, sense_length);
 					}
 				} else {
 					bus_addr_t addr;
 					char buf[XCMD_SIZE];
 					fcp_rsp_iu_t *rp;
 
 					if (atp->ests == NULL) {
 						atp->ests = isp_get_ecmd(isp);
 						if (atp->ests == NULL) {
 							TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 							break;
 						}
 					}
 					memset(buf, 0, sizeof (buf));
 					rp = (fcp_rsp_iu_t *)buf;
 					if (fctape) {
 						cto->ct_flags |= CT2_CONFIRM;
 						rp->fcp_rsp_bits |= FCP_CONF_REQ;
 					}
 					cto->ct_flags |= CT2_FLAG_MODE2;
 	        			rp->fcp_rsp_scsi_status = cso->scsi_status;
 					if (resid < 0) {
 						rp->fcp_rsp_resid = -resid;
 						rp->fcp_rsp_bits |= FCP_RESID_OVERFLOW;
 					} else if (resid > 0) {
 						rp->fcp_rsp_resid = resid;
 						rp->fcp_rsp_bits |= FCP_RESID_UNDERFLOW;
 					}
 					if (sense_length) {
 	        				rp->fcp_rsp_snslen = sense_length;
 						rp->fcp_rsp_bits |= FCP_SNSLEN_VALID;
 						isp_put_fcp_rsp_iu(isp, rp, atp->ests);
 						memcpy(((fcp_rsp_iu_t *)atp->ests)->fcp_rsp_extra, &cso->sense_data, sense_length);
 					} else {
 						isp_put_fcp_rsp_iu(isp, rp, atp->ests);
 					}
 					if (isp->isp_dblev & ISP_LOGTDEBUG1) {
 						isp_print_bytes(isp, "FCP Response Frame After Swizzling", MIN_FCP_RESPONSE_SIZE + sense_length, atp->ests);
 					}
 					addr = isp->isp_osinfo.ecmd_dma;
 					addr += ((((isp_ecmd_t *)atp->ests) - isp->isp_osinfo.ecmd_base) * XCMD_SIZE);
 					isp_prt(isp, ISP_LOGTDEBUG0, "%s: ests base %p vaddr %p ecmd_dma %jx addr %jx len %u", __func__, isp->isp_osinfo.ecmd_base, atp->ests,
 					    (uintmax_t) isp->isp_osinfo.ecmd_dma, (uintmax_t)addr, MIN_FCP_RESPONSE_SIZE + sense_length);
 					cto->rsp.m2.ct_datalen = MIN_FCP_RESPONSE_SIZE + sense_length;
 					if (isp->isp_osinfo.sixtyfourbit) {
 						cto->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base = DMA_LO32(addr);
 						cto->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi = DMA_HI32(addr);
 						cto->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count = MIN_FCP_RESPONSE_SIZE + sense_length;
 					} else {
 						cto->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base = DMA_LO32(addr);
 						cto->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count = MIN_FCP_RESPONSE_SIZE + sense_length;
 					}
 				}
 				if (sense_length) {
 					isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO2[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d sense: %x %x/%x/%x", __func__,
 					    cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cso->scsi_status, cto->ct_flags, cto->ct_resid,
 					    cso->sense_data.error_code, cso->sense_data.sense_buf[1], cso->sense_data.sense_buf[11], cso->sense_data.sense_buf[12]);
 				} else {
 					isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO2[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d", __func__, cto->ct_rxid,
 					    ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cso->scsi_status, cto->ct_flags, cto->ct_resid);
 				}
 				atp->state = ATPD_STATE_LAST_CTIO;
 			}
 
 			if (xfrlen != 0) {
 				cto->ct_flags |= CT2_FLAG_MODE0;
 				if ((cso->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
 					cto->ct_flags |= CT2_DATA_IN;
 				} else {
 					cto->ct_flags |= CT2_DATA_OUT;
 				}
 
 				cto->ct_reloff = atp->bytes_xfered + atp->bytes_in_transit;
 				cto->rsp.m0.ct_xfrlen = xfrlen;
 
 				if (sendstatus) {
 					resid = atp->orig_datalen - atp->bytes_xfered - xfrlen;
 					if (cso->scsi_status == SCSI_STATUS_OK && resid == 0 /*&& fctape == 0*/) {
 						cto->ct_flags |= CT2_SENDSTATUS;
 						atp->state = ATPD_STATE_LAST_CTIO;
 						if (fctape) {
 							cto->ct_flags |= CT2_CONFIRM;
 						}
 					} else {
 						atp->sendst = 1;	/* send status later */
 						cto->ct_header.rqs_seqno &= ~ATPD_SEQ_NOTIFY_CAM;
 						atp->state = ATPD_STATE_CTIO;
 					}
 				} else {
 					atp->state = ATPD_STATE_CTIO;
 				}
 			}
 			isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO2[%x] seq %u nc %d CDB0=%x scsi status %x flags %x resid %d xfrlen %u offset %u", __func__, cto->ct_rxid,
 			    ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cso->scsi_status, cto->ct_flags, cto->ct_resid, cso->dxfer_len, atp->bytes_xfered);
 		} else {
 			ct_entry_t *cto = (ct_entry_t *) local;
 
 			cto->ct_header.rqs_entry_type = RQSTYPE_CTIO;
 			cto->ct_header.rqs_entry_count = 1;
 			cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM;
 			ATPD_SET_SEQNO(cto, atp);
 			cto->ct_iid = cso->init_id;
 			cto->ct_iid |= XS_CHANNEL(ccb) << 7;
 			cto->ct_tgt = ccb->ccb_h.target_id;
 			cto->ct_lun = ccb->ccb_h.target_lun;
 			cto->ct_fwhandle = cso->tag_id;
 			if (atp->rxid) {
 				cto->ct_tag_val = atp->rxid;
 				cto->ct_flags |= CT_TQAE;
 			}
 			if (ccb->ccb_h.flags & CAM_DIS_DISCONNECT) {
 				cto->ct_flags |= CT_NODISC;
 			}
 			if (cso->dxfer_len == 0) {
 				cto->ct_flags |= CT_NO_DATA;
 			} else if ((cso->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
 				cto->ct_flags |= CT_DATA_IN;
 			} else {
 				cto->ct_flags |= CT_DATA_OUT;
 			}
 			if (ccb->ccb_h.flags & CAM_SEND_STATUS) {
 				cto->ct_flags |= CT_SENDSTATUS|CT_CCINCR;
 				cto->ct_scsi_status = cso->scsi_status;
 				cto->ct_resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit - xfrlen;
 				isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO[%x] seq %u nc %d scsi status %x resid %d tag_id %x", __func__,
 				    cto->ct_fwhandle, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), cso->scsi_status, cso->resid, cso->tag_id);
 			}
 			ccb->ccb_h.flags &= ~CAM_SEND_SENSE;
 			cto->ct_timeout = 10;
 		}
 
 		if (isp_get_pcmd(isp, ccb)) {
 			ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "out of PCMDs\n");
 			TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 			break;
 		}
 		if (isp_allocate_xs_tgt(isp, ccb, &handle)) {
 			ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "No XFLIST pointers for %s\n", __func__);
 			TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 			isp_free_pcmd(isp, ccb);
 			break;
 		}
 		atp->bytes_in_transit += xfrlen;
 		PISP_PCMD(ccb)->datalen = xfrlen;
 
 
 		/*
 		 * Call the dma setup routines for this entry (and any subsequent
 		 * CTIOs) if there's data to move, and then tell the f/w it's got
 		 * new things to play with. As with isp_start's usage of DMA setup,
 		 * any swizzling is done in the machine dependent layer. Because
 		 * of this, we put the request onto the queue area first in native
 		 * format.
 		 */
 
 		if (IS_24XX(isp)) {
 			ct7_entry_t *cto = (ct7_entry_t *) local;
 			cto->ct_syshandle = handle;
 		} else if (IS_FC(isp)) {
 			ct2_entry_t *cto = (ct2_entry_t *) local;
 			cto->ct_syshandle = handle;
 		} else {
 			ct_entry_t *cto = (ct_entry_t *) local;
 			cto->ct_syshandle = handle;
 		}
 
 		dmaresult = ISP_DMASETUP(isp, cso, (ispreq_t *) local);
 		if (dmaresult != CMD_QUEUED) {
 			isp_destroy_tgt_handle(isp, handle);
 			isp_free_pcmd(isp, ccb);
 			if (dmaresult == CMD_EAGAIN) {
 				TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); 
 				break;
 			}
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 			xpt_done(ccb);
 			continue;
 		}
 		isp->isp_nactive++;
 		ccb->ccb_h.status = CAM_REQ_INPROG | CAM_SIM_QUEUED;
 		if (xfrlen) {
 			ccb->ccb_h.spriv_field0 = atp->bytes_xfered;
 		} else {
 			ccb->ccb_h.spriv_field0 = ~0;
 		}
 		atp->ctcnt++;
 		atp->seqno++;
 	}
 	rls_lun_statep(isp, tptr);
 }
 
 static void
 isp_refire_putback_atio(void *arg)
 {
 	union ccb *ccb = arg;
 
 	ISP_ASSERT_LOCKED((ispsoftc_t *)XS_ISP(ccb));
 	isp_target_putback_atio(ccb);
 }
 
 static void
 isp_refire_notify_ack(void *arg)
 {
 	isp_tna_t *tp  = arg;
 	ispsoftc_t *isp = tp->isp;
 
 	ISP_ASSERT_LOCKED(isp);
 	if (isp_notify_ack(isp, tp->not)) {
 		callout_schedule(&tp->timer, 5);
 	} else {
 		free(tp, M_DEVBUF);
 	}
 }
 
 
 static void
 isp_target_putback_atio(union ccb *ccb)
 {
 	ispsoftc_t *isp;
 	struct ccb_scsiio *cso;
 	void *qe;
 
 	isp = XS_ISP(ccb);
 
 	qe = isp_getrqentry(isp);
 	if (qe == NULL) {
 		xpt_print(ccb->ccb_h.path,
 		    "%s: Request Queue Overflow\n", __func__);
 		callout_reset(&PISP_PCMD(ccb)->wdog, 10,
 		    isp_refire_putback_atio, ccb);
 		return;
 	}
 	memset(qe, 0, QENTRY_LEN);
 	cso = &ccb->csio;
 	if (IS_FC(isp)) {
 		at2_entry_t local, *at = &local;
 		ISP_MEMZERO(at, sizeof (at2_entry_t));
 		at->at_header.rqs_entry_type = RQSTYPE_ATIO2;
 		at->at_header.rqs_entry_count = 1;
 		if (ISP_CAP_SCCFW(isp)) {
 			at->at_scclun = (uint16_t) ccb->ccb_h.target_lun;
 #if __FreeBSD_version < 1000700
 			if (at->at_scclun >= 256)
 				at->at_scclun |= 0x4000;
 #endif
 		} else {
 			at->at_lun = (uint8_t) ccb->ccb_h.target_lun;
 		}
 		at->at_status = CT_OK;
 		at->at_rxid = cso->tag_id;
 		at->at_iid = cso->ccb_h.target_id;
 		isp_put_atio2(isp, at, qe);
 	} else {
 		at_entry_t local, *at = &local;
 		ISP_MEMZERO(at, sizeof (at_entry_t));
 		at->at_header.rqs_entry_type = RQSTYPE_ATIO;
 		at->at_header.rqs_entry_count = 1;
 		at->at_iid = cso->init_id;
 		at->at_iid |= XS_CHANNEL(ccb) << 7;
 		at->at_tgt = cso->ccb_h.target_id;
 		at->at_lun = cso->ccb_h.target_lun;
 		at->at_status = CT_OK;
 		at->at_tag_val = AT_GET_TAG(cso->tag_id);
 		at->at_handle = AT_GET_HANDLE(cso->tag_id);
 		isp_put_atio(isp, at, qe);
 	}
 	ISP_TDQE(isp, "isp_target_putback_atio", isp->isp_reqidx, qe);
 	ISP_SYNC_REQUEST(isp);
 	isp_complete_ctio(ccb);
 }
 
 static void
 isp_complete_ctio(union ccb *ccb)
 {
 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
 		ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 		xpt_done(ccb);
 	}
 }
 
 /*
  * Handle ATIO stuff that the generic code can't.
  * This means handling CDBs.
  */
 
 static void
 isp_handle_platform_atio(ispsoftc_t *isp, at_entry_t *aep)
 {
 	tstate_t *tptr;
 	int status, bus;
 	struct ccb_accept_tio *atiop;
 	atio_private_data_t *atp;
 
 	/*
 	 * The firmware status (except for the QLTM_SVALID bit)
 	 * indicates why this ATIO was sent to us.
 	 *
 	 * If QLTM_SVALID is set, the firmware has recommended Sense Data.
 	 *
 	 * If the DISCONNECTS DISABLED bit is set in the flags field,
 	 * we're still connected on the SCSI bus.
 	 */
 	status = aep->at_status;
 	if ((status & ~QLTM_SVALID) == AT_PHASE_ERROR) {
 		/*
 		 * Bus Phase Sequence error. We should have sense data
 		 * suggested by the f/w. I'm not sure quite yet what
 		 * to do about this for CAM.
 		 */
 		isp_prt(isp, ISP_LOGWARN, "PHASE ERROR");
 		isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0);
 		return;
 	}
 	if ((status & ~QLTM_SVALID) != AT_CDB) {
 		isp_prt(isp, ISP_LOGWARN, "bad atio (0x%x) leaked to platform", status);
 		isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0);
 		return;
 	}
 
 	bus = GET_BUS_VAL(aep->at_iid);
 	tptr = get_lun_statep(isp, bus, aep->at_lun);
 	if (tptr == NULL) {
 		tptr = get_lun_statep(isp, bus, CAM_LUN_WILDCARD);
 		if (tptr == NULL) {
 			/*
 			 * Because we can't autofeed sense data back with
 			 * a command for parallel SCSI, we can't give back
 			 * a CHECK CONDITION. We'll give back a BUSY status
 			 * instead. This works out okay because the only
 			 * time we should, in fact, get this, is in the
 			 * case that somebody configured us without the
 			 * blackhole driver, so they get what they deserve.
 			 */
 			isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0);
 			return;
 		}
 	}
 
 	atp = isp_get_atpd(isp, tptr, aep->at_handle);
 	atiop = (struct ccb_accept_tio *) SLIST_FIRST(&tptr->atios);
 	if (atiop == NULL || atp == NULL) {
 		/*
 		 * Because we can't autofeed sense data back with
 		 * a command for parallel SCSI, we can't give back
 		 * a CHECK CONDITION. We'll give back a QUEUE FULL status
 		 * instead. This works out okay because the only time we
 		 * should, in fact, get this, is in the case that we've
 		 * run out of ATIOS.
 		 */
 		xpt_print(tptr->owner, "no %s for lun %d from initiator %d\n", (atp == NULL && atiop == NULL)? "ATIOs *or* ATPS" :
 		    ((atp == NULL)? "ATPs" : "ATIOs"), aep->at_lun, aep->at_iid);
 		isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0);
 		if (atp) {
 			isp_put_atpd(isp, tptr, atp);
 		}
 		rls_lun_statep(isp, tptr);
 		return;
 	}
 	atp->rxid = aep->at_tag_val;
 	atp->state = ATPD_STATE_ATIO;
 	SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle);
 	tptr->atio_count--;
 	ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, atiop->ccb_h.path, "Take FREE ATIO count now %d\n", tptr->atio_count);
 	atiop->ccb_h.target_id = aep->at_tgt;
 	atiop->ccb_h.target_lun = aep->at_lun;
 	if (aep->at_flags & AT_NODISC) {
 		atiop->ccb_h.flags |= CAM_DIS_DISCONNECT;
 	} else {
 		atiop->ccb_h.flags &= ~CAM_DIS_DISCONNECT;
 	}
 
 	if (status & QLTM_SVALID) {
 		size_t amt = ISP_MIN(QLTM_SENSELEN, sizeof (atiop->sense_data));
 		atiop->sense_len = amt;
 		ISP_MEMCPY(&atiop->sense_data, aep->at_sense, amt);
 	} else {
 		atiop->sense_len = 0;
 	}
 
 	atiop->init_id = GET_IID_VAL(aep->at_iid);
 	atiop->cdb_len = aep->at_cdblen;
 	ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cdb, aep->at_cdblen);
 	atiop->ccb_h.status = CAM_CDB_RECVD;
 	/*
 	 * Construct a tag 'id' based upon tag value (which may be 0..255)
 	 * and the handle (which we have to preserve).
 	 */
 	atiop->tag_id = atp->tag;
 	if (aep->at_flags & AT_TQAE) {
 		atiop->tag_action = aep->at_tag_type;
 		atiop->ccb_h.status |= CAM_TAG_ACTION_VALID;
 	}
 	atp->orig_datalen = 0;
 	atp->bytes_xfered = 0;
 	atp->lun = aep->at_lun;
 	atp->nphdl = aep->at_iid;
 	atp->portid = PORT_NONE;
 	atp->oxid = 0;
 	atp->cdb0 = atiop->cdb_io.cdb_bytes[0];
 	atp->tattr = aep->at_tag_type;
 	atp->state = ATPD_STATE_CAM;
 	isp_prt(isp, ISP_LOGTDEBUG0, "ATIO[0x%x] CDB=0x%x lun %d", aep->at_tag_val, atp->cdb0, atp->lun);
 	rls_lun_statep(isp, tptr);
 }
 
 static void
 isp_handle_platform_atio2(ispsoftc_t *isp, at2_entry_t *aep)
 {
 	lun_id_t lun;
 	fcportdb_t *lp;
 	tstate_t *tptr;
 	struct ccb_accept_tio *atiop;
 	uint16_t nphdl;
 	atio_private_data_t *atp;
 	inot_private_data_t *ntp;
 
 	/*
 	 * The firmware status (except for the QLTM_SVALID bit)
 	 * indicates why this ATIO was sent to us.
 	 *
 	 * If QLTM_SVALID is set, the firmware has recommended Sense Data.
 	 */
 	if ((aep->at_status & ~QLTM_SVALID) != AT_CDB) {
 		isp_prt(isp, ISP_LOGWARN, "bogus atio (0x%x) leaked to platform", aep->at_status);
 		isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0);
 		return;
 	}
 
 	if (ISP_CAP_SCCFW(isp)) {
 		lun = aep->at_scclun;
 #if __FreeBSD_version < 1000700
 		lun &= 0x3fff;
 #endif
 	} else {
 		lun = aep->at_lun;
 	}
 	if (ISP_CAP_2KLOGIN(isp)) {
 		nphdl = ((at2e_entry_t *)aep)->at_iid;
 	} else {
 		nphdl = aep->at_iid;
 	}
 	tptr = get_lun_statep(isp, 0, lun);
 	if (tptr == NULL) {
 		tptr = get_lun_statep(isp, 0, CAM_LUN_WILDCARD);
 		if (tptr == NULL) {
 			isp_prt(isp, ISP_LOGWARN, "%s: [0x%x] no state pointer for lun %jx or wildcard", __func__, aep->at_rxid, (uintmax_t)lun);
 			if (lun == 0) {
 				isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0);
 			} else {
 				isp_endcmd(isp, aep, SCSI_STATUS_CHECK_COND | ECMD_SVALID | (0x5 << 12) | (0x25 << 16), 0);
 			}
 			return;
 		}
 	}
 
 	/*
 	 * Start any commands pending resources first.
 	 */
 	if (tptr->restart_queue) {
 		inot_private_data_t *restart_queue = tptr->restart_queue;
 		tptr->restart_queue = NULL;
 		while (restart_queue) {
 			ntp = restart_queue;
 			restart_queue = ntp->rd.nt.nt_hba;
 			isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at2_entry_t *)ntp->rd.data)->at_rxid);
 			isp_handle_platform_atio2(isp, (at2_entry_t *) ntp->rd.data);
 			isp_put_ntpd(isp, tptr, ntp);
 			/*
 			 * If a recursion caused the restart queue to start to fill again,
 			 * stop and splice the new list on top of the old list and restore
 			 * it and go to noresrc.
 			 */
 			if (tptr->restart_queue) {
 				ntp = tptr->restart_queue;
 				tptr->restart_queue = restart_queue;
 				while (restart_queue->rd.nt.nt_hba) {
 					restart_queue = restart_queue->rd.nt.nt_hba;
 				}
 				restart_queue->rd.nt.nt_hba = ntp;
 				goto noresrc;
 			}
 		}
 	}
 
 	atiop = (struct ccb_accept_tio *) SLIST_FIRST(&tptr->atios);
 	if (atiop == NULL) {
 		goto noresrc;
 	}
 
 	atp = isp_get_atpd(isp, tptr, aep->at_rxid);
 	if (atp == NULL) {
 		goto noresrc;
 	}
 
 	atp->state = ATPD_STATE_ATIO;
 	SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle);
 	tptr->atio_count--;
 	isp_prt(isp, ISP_LOGTDEBUG2, "Take FREE ATIO count now %d", tptr->atio_count);
 	atiop->ccb_h.target_id = FCPARAM(isp, 0)->isp_loopid;
 	atiop->ccb_h.target_lun = lun;
 
 	/*
 	 * We don't get 'suggested' sense data as we do with SCSI cards.
 	 */
 	atiop->sense_len = 0;
 
 	/*
 	 * If we're not in the port database, add ourselves.
 	 */
 	if (IS_2100(isp))
 		atiop->init_id = nphdl;
 	else {
 		if ((isp_find_pdb_by_handle(isp, 0, nphdl, &lp) == 0 ||
 		     lp->state == FC_PORTDB_STATE_ZOMBIE)) {
 			uint64_t wwpn =
 				(((uint64_t) aep->at_wwpn[0]) << 48) |
 				(((uint64_t) aep->at_wwpn[1]) << 32) |
 				(((uint64_t) aep->at_wwpn[2]) << 16) |
 				(((uint64_t) aep->at_wwpn[3]) <<  0);
 			isp_add_wwn_entry(isp, 0, wwpn, INI_NONE,
 			    nphdl, PORT_ANY, 0);
 			isp_find_pdb_by_handle(isp, 0, nphdl, &lp);
 		}
 		atiop->init_id = FC_PORTDB_TGT(isp, 0, lp);
 	}
 	atiop->cdb_len = ATIO2_CDBLEN;
 	ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cdb, ATIO2_CDBLEN);
 	atiop->ccb_h.status = CAM_CDB_RECVD;
 	atiop->tag_id = atp->tag;
 	switch (aep->at_taskflags & ATIO2_TC_ATTR_MASK) {
 	case ATIO2_TC_ATTR_SIMPLEQ:
 		atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 		atiop->tag_action = MSG_SIMPLE_Q_TAG;
 		break;
 	case ATIO2_TC_ATTR_HEADOFQ:
 		atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 		atiop->tag_action = MSG_HEAD_OF_Q_TAG;
 		break;
 	case ATIO2_TC_ATTR_ORDERED:
 		atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 		atiop->tag_action = MSG_ORDERED_Q_TAG;
 		break;
 	case ATIO2_TC_ATTR_ACAQ:		/* ?? */
 	case ATIO2_TC_ATTR_UNTAGGED:
 	default:
 		atiop->tag_action = 0;
 		break;
 	}
 
 	atp->orig_datalen = aep->at_datalen;
 	atp->bytes_xfered = 0;
 	atp->lun = lun;
 	atp->nphdl = nphdl;
 	atp->sid = PORT_ANY;
 	atp->oxid = aep->at_oxid;
 	atp->cdb0 = aep->at_cdb[0];
 	atp->tattr = aep->at_taskflags & ATIO2_TC_ATTR_MASK;
 	atp->state = ATPD_STATE_CAM;
 	xpt_done((union ccb *)atiop);
 	isp_prt(isp, ISP_LOGTDEBUG0, "ATIO2[0x%x] CDB=0x%x lun %jx datalen %u", aep->at_rxid, atp->cdb0, (uintmax_t)lun, atp->orig_datalen);
 	rls_lun_statep(isp, tptr);
 	return;
 noresrc:
 	ntp = isp_get_ntpd(isp, tptr);
 	if (ntp == NULL) {
 		rls_lun_statep(isp, tptr);
 		isp_endcmd(isp, aep, nphdl, 0, SCSI_STATUS_BUSY, 0);
 		return;
 	}
 	memcpy(ntp->rd.data, aep, QENTRY_LEN);
 	ntp->rd.nt.nt_hba = tptr->restart_queue;
 	tptr->restart_queue = ntp;
 	rls_lun_statep(isp, tptr);
 }
 
 static void
 isp_handle_platform_atio7(ispsoftc_t *isp, at7_entry_t *aep)
 {
 	int cdbxlen;
 	lun_id_t lun;
 	uint16_t chan, nphdl = NIL_HANDLE;
 	uint32_t did, sid;
 	fcportdb_t *lp;
 	tstate_t *tptr;
 	struct ccb_accept_tio *atiop;
 	atio_private_data_t *atp = NULL;
 	atio_private_data_t *oatp;
 	inot_private_data_t *ntp;
 
 	did = (aep->at_hdr.d_id[0] << 16) | (aep->at_hdr.d_id[1] << 8) | aep->at_hdr.d_id[2];
 	sid = (aep->at_hdr.s_id[0] << 16) | (aep->at_hdr.s_id[1] << 8) | aep->at_hdr.s_id[2];
 #if __FreeBSD_version >= 1000700
 	lun = CAM_EXTLUN_BYTE_SWIZZLE(be64dec(aep->at_cmnd.fcp_cmnd_lun));
 #else
 	lun = (aep->at_cmnd.fcp_cmnd_lun[0] & 0x3f << 8) |
 	    aep->at_cmnd.fcp_cmnd_lun[1];
 #endif
 
 	/*
 	 * Find the N-port handle, and Virtual Port Index for this command.
 	 *
 	 * If we can't, we're somewhat in trouble because we can't actually respond w/o that information.
 	 * We also, as a matter of course, need to know the WWN of the initiator too.
 	 */
 	if (ISP_CAP_MULTI_ID(isp) && isp->isp_nchan > 1) {
 		/*
 		 * Find the right channel based upon D_ID
 		 */
 		isp_find_chan_by_did(isp, did, &chan);
 
 		if (chan == ISP_NOCHAN) {
 			NANOTIME_T now;
 
 			/*
 			 * If we don't recognizer our own D_DID, terminate the exchange, unless we're within 2 seconds of startup
 			 * It's a bit tricky here as we need to stash this command *somewhere*.
 			 */
 			GET_NANOTIME(&now);
 			if (NANOTIME_SUB(&isp->isp_init_time, &now) > 2000000000ULL) {
 				isp_prt(isp, ISP_LOGWARN, "%s: [RX_ID 0x%x] D_ID %x not found on any channel- dropping", __func__, aep->at_rxid, did);
 				isp_endcmd(isp, aep, NIL_HANDLE, ISP_NOCHAN, ECMD_TERMINATE, 0);
 				return;
 			}
 			tptr = get_lun_statep(isp, 0, 0);
 			if (tptr == NULL) {
 				tptr = get_lun_statep(isp, 0, CAM_LUN_WILDCARD);
 				if (tptr == NULL) {
 					isp_prt(isp, ISP_LOGWARN, "%s: [RX_ID 0x%x] D_ID %x not found on any channel and no tptr- dropping", __func__, aep->at_rxid, did);
 					isp_endcmd(isp, aep, NIL_HANDLE, ISP_NOCHAN, ECMD_TERMINATE, 0);
 					return;
 				}
 			}
 			isp_prt(isp, ISP_LOGWARN, "%s: [RX_ID 0x%x] D_ID %x not found on any channel- deferring", __func__, aep->at_rxid, did);
 			goto noresrc;
 		}
 		isp_prt(isp, ISP_LOGTDEBUG0, "%s: [RX_ID 0x%x] D_ID 0x%06x found on Chan %d for S_ID 0x%06x", __func__, aep->at_rxid, did, chan, sid);
 	} else {
 		chan = 0;
 	}
 
 	/*
 	 * Find the PDB entry for this initiator
 	 */
 	if (isp_find_pdb_by_sid(isp, chan, sid, &lp) == 0) {
 		/*
 		 * If we're not in the port database terminate the exchange.
 		 */
 		isp_prt(isp, ISP_LOGTINFO, "%s: [RX_ID 0x%x] D_ID 0x%06x found on Chan %d for S_ID 0x%06x wasn't in PDB already",
 		    __func__, aep->at_rxid, did, chan, sid);
 		isp_dump_portdb(isp, chan);
 		isp_endcmd(isp, aep, NIL_HANDLE, chan, ECMD_TERMINATE, 0);
 		return;
 	}
 	nphdl = lp->handle;
 
 	/*
 	 * Get the tstate pointer
 	 */
 	tptr = get_lun_statep(isp, chan, lun);
 	if (tptr == NULL) {
 		tptr = get_lun_statep(isp, chan, CAM_LUN_WILDCARD);
 		if (tptr == NULL) {
 			isp_prt(isp, ISP_LOGWARN,
 			    "%s: [0x%x] no state pointer for lun %jx or wildcard",
 			    __func__, aep->at_rxid, (uintmax_t)lun);
 			if (lun == 0) {
 				isp_endcmd(isp, aep, nphdl, SCSI_STATUS_BUSY, 0);
 			} else {
 				isp_endcmd(isp, aep, nphdl, chan, SCSI_STATUS_CHECK_COND | ECMD_SVALID | (0x5 << 12) | (0x25 << 16), 0);
 			}
 			return;
 		}
 	}
 
 	/*
 	 * Start any commands pending resources first.
 	 */
 	if (tptr->restart_queue) {
 		inot_private_data_t *restart_queue = tptr->restart_queue;
 		tptr->restart_queue = NULL;
 		while (restart_queue) {
 			ntp = restart_queue;
 			restart_queue = ntp->rd.nt.nt_hba;
 			isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at7_entry_t *)ntp->rd.data)->at_rxid);
 			isp_handle_platform_atio7(isp, (at7_entry_t *) ntp->rd.data);
 			isp_put_ntpd(isp, tptr, ntp);
 			/*
 			 * If a recursion caused the restart queue to start to fill again,
 			 * stop and splice the new list on top of the old list and restore
 			 * it and go to noresrc.
 			 */
 			if (tptr->restart_queue) {
 				isp_prt(isp, ISP_LOGTDEBUG0, "%s: restart queue refilling", __func__);
 				if (restart_queue) {
 					ntp = tptr->restart_queue;
 					tptr->restart_queue = restart_queue;
 					while (restart_queue->rd.nt.nt_hba) {
 						restart_queue = restart_queue->rd.nt.nt_hba;
 					}
 					restart_queue->rd.nt.nt_hba = ntp;
 				}
 				goto noresrc;
 			}
 		}
 	}
 
 	/*
 	 * If the f/w is out of resources, just send a BUSY status back.
 	 */
 	if (aep->at_rxid == AT7_NORESRC_RXID) {
 		rls_lun_statep(isp, tptr);
 		isp_endcmd(isp, aep, nphdl, chan, SCSI_BUSY, 0);
 		return;
 	}
 
 	/*
 	 * If we're out of resources, just send a BUSY status back.
 	 */
 	atiop = (struct ccb_accept_tio *) SLIST_FIRST(&tptr->atios);
 	if (atiop == NULL) {
 		isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] out of atios", aep->at_rxid);
 		goto noresrc;
 	}
 
 	oatp = isp_find_atpd(isp, tptr, aep->at_rxid);
 	if (oatp) {
 		isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] tag wraparound in isp_handle_platforms_atio7 (N-Port Handle 0x%04x S_ID 0x%04x OX_ID 0x%04x) oatp state %d",
 		    aep->at_rxid, nphdl, sid, aep->at_hdr.ox_id, oatp->state);
 		/*
 		 * It's not a "no resource" condition- but we can treat it like one
 		 */
 		goto noresrc;
 	}
 	atp = isp_get_atpd(isp, tptr, aep->at_rxid);
 	if (atp == NULL) {
 		isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] out of atps", aep->at_rxid);
 		goto noresrc;
 	}
 	atp->word3 = lp->prli_word3;
 	atp->state = ATPD_STATE_ATIO;
 	SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle);
 	tptr->atio_count--;
 	ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, atiop->ccb_h.path, "Take FREE ATIO count now %d\n", tptr->atio_count);
 	atiop->init_id = FC_PORTDB_TGT(isp, chan, lp);
 	atiop->ccb_h.target_id = FCPARAM(isp, chan)->isp_loopid;
 	atiop->ccb_h.target_lun = lun;
 	atiop->sense_len = 0;
 	cdbxlen = aep->at_cmnd.fcp_cmnd_alen_datadir >> FCP_CMND_ADDTL_CDBLEN_SHIFT;
 	if (cdbxlen) {
 		isp_prt(isp, ISP_LOGWARN, "additional CDBLEN ignored");
 	}
 	cdbxlen = sizeof (aep->at_cmnd.cdb_dl.sf.fcp_cmnd_cdb);
 	ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cmnd.cdb_dl.sf.fcp_cmnd_cdb, cdbxlen);
 	atiop->cdb_len = cdbxlen;
 	atiop->ccb_h.status = CAM_CDB_RECVD;
 	atiop->tag_id = atp->tag;
 	switch (aep->at_cmnd.fcp_cmnd_task_attribute & FCP_CMND_TASK_ATTR_MASK) {
 	case FCP_CMND_TASK_ATTR_SIMPLE:
 		atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 		atiop->tag_action = MSG_SIMPLE_Q_TAG;
 		break;
 	case FCP_CMND_TASK_ATTR_HEAD:
 		atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 		atiop->tag_action = MSG_HEAD_OF_Q_TAG;
 		break;
 	case FCP_CMND_TASK_ATTR_ORDERED:
 		atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 		atiop->tag_action = MSG_ORDERED_Q_TAG;
 		break;
 	default:
 		/* FALLTHROUGH */
 	case FCP_CMND_TASK_ATTR_ACA:
 	case FCP_CMND_TASK_ATTR_UNTAGGED:
 		atiop->tag_action = 0;
 		break;
 	}
 	atp->orig_datalen = aep->at_cmnd.cdb_dl.sf.fcp_cmnd_dl;
 	atp->bytes_xfered = 0;
 	atp->lun = lun;
 	atp->nphdl = nphdl;
 	atp->portid = sid;
 	atp->oxid = aep->at_hdr.ox_id;
 	atp->rxid = aep->at_hdr.rx_id;
 	atp->cdb0 = atiop->cdb_io.cdb_bytes[0];
 	atp->tattr = aep->at_cmnd.fcp_cmnd_task_attribute & FCP_CMND_TASK_ATTR_MASK;
 	atp->state = ATPD_STATE_CAM;
 	isp_prt(isp, ISP_LOGTDEBUG0, "ATIO7[0x%x] CDB=0x%x lun %jx datalen %u",
 	    aep->at_rxid, atp->cdb0, (uintmax_t)lun, atp->orig_datalen);
 	xpt_done((union ccb *)atiop);
 	rls_lun_statep(isp, tptr);
 	return;
 noresrc:
 	if (atp) {
 		isp_put_atpd(isp, tptr, atp);
 	}
 	ntp = isp_get_ntpd(isp, tptr);
 	if (ntp == NULL) {
 		rls_lun_statep(isp, tptr);
 		isp_endcmd(isp, aep, nphdl, chan, SCSI_STATUS_BUSY, 0);
 		return;
 	}
 	memcpy(ntp->rd.data, aep, QENTRY_LEN);
 	ntp->rd.nt.nt_hba = tptr->restart_queue;
 	tptr->restart_queue = ntp;
 	rls_lun_statep(isp, tptr);
 }
 
 
 /*
  * Handle starting an SRR (sequence retransmit request)
  * We get here when we've gotten the immediate notify
  * and the return of all outstanding CTIOs for this
  * transaction.
  */
 static void
 isp_handle_srr_start(ispsoftc_t *isp, tstate_t *tptr, atio_private_data_t *atp)
 {
 	in_fcentry_24xx_t *inot;
 	uint32_t srr_off, ccb_off, ccb_len, ccb_end;
 	union ccb *ccb;
 
 	inot = (in_fcentry_24xx_t *)atp->srr;
 	srr_off = inot->in_srr_reloff_lo | (inot->in_srr_reloff_hi << 16);
 	ccb = atp->srr_ccb;
 	atp->srr_ccb = NULL;
 	atp->nsrr++;
 	if (ccb == NULL) {
 		isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] null ccb", atp->tag);
 		goto fail;
 	}
 
 	ccb_off = ccb->ccb_h.spriv_field0;
 	ccb_len = ccb->csio.dxfer_len;
         ccb_end = (ccb_off == ~0)? ~0 : ccb_off + ccb_len;
 
 	switch (inot->in_srr_iu) {
 	case R_CTL_INFO_SOLICITED_DATA:
 		/*
 		 * We have to restart a FCP_DATA data out transaction
 		 */
 		atp->sendst = 0;
 		atp->bytes_xfered = srr_off;
 		if (ccb_len == 0) {
 			isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x but current CCB doesn't transfer data", atp->tag, srr_off);
 			goto mdp;
 		}
  		if (srr_off < ccb_off || ccb_off > srr_off + ccb_len) {
 			isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x not covered by current CCB data range [0x%x..0x%x]", atp->tag, srr_off, ccb_off, ccb_end);
 			goto mdp;
 		}
 		isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x covered by current CCB data range [0x%x..0x%x]", atp->tag, srr_off, ccb_off, ccb_end);
 		break;
 	case R_CTL_INFO_COMMAND_STATUS:
 		isp_prt(isp, ISP_LOGTINFO, "SRR[0x%x] Got an FCP RSP SRR- resending status", atp->tag);
 		atp->sendst = 1;
 		/*
 		 * We have to restart a FCP_RSP IU transaction
 		 */
 		break;
 	case R_CTL_INFO_DATA_DESCRIPTOR:
 		/*
 		 * We have to restart an FCP DATA in transaction
 		 */
 		isp_prt(isp, ISP_LOGWARN, "Got an FCP DATA IN SRR- dropping");
 		goto fail;
 		
 	default:
 		isp_prt(isp, ISP_LOGWARN, "Got an unknown information (%x) SRR- dropping", inot->in_srr_iu);
 		goto fail;
 	}
 
 	/*
 	 * We can't do anything until this is acked, so we might as well start it now.
 	 * We aren't going to do the usual asynchronous ack issue because we need
 	 * to make sure this gets on the wire first.
 	 */
 	if (isp_notify_ack(isp, inot)) {
 		isp_prt(isp, ISP_LOGWARN, "could not push positive ack for SRR- you lose");
 		goto fail;
 	}
 	isp_target_start_ctio(isp, ccb, FROM_SRR);
 	return;
 fail:
 	inot->in_reserved = 1;
 	isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot);
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	ccb->ccb_h.status |= CAM_REQ_CMP_ERR;
 	isp_complete_ctio(ccb);
 	return;
 mdp:
 	if (isp_notify_ack(isp, inot)) {
 		isp_prt(isp, ISP_LOGWARN, "could not push positive ack for SRR- you lose");
 		goto fail;
 	}
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	ccb->ccb_h.status = CAM_MESSAGE_RECV;
 	/*
 	 * This is not a strict interpretation of MDP, but it's close
 	 */
 	ccb->csio.msg_ptr = &ccb->csio.sense_data.sense_buf[SSD_FULL_SIZE - 16];
 	ccb->csio.msg_len = 7;
 	ccb->csio.msg_ptr[0] = MSG_EXTENDED;
 	ccb->csio.msg_ptr[1] = 5;
 	ccb->csio.msg_ptr[2] = 0;	/* modify data pointer */
 	ccb->csio.msg_ptr[3] = srr_off >> 24;
 	ccb->csio.msg_ptr[4] = srr_off >> 16;
 	ccb->csio.msg_ptr[5] = srr_off >> 8;
 	ccb->csio.msg_ptr[6] = srr_off;
 	isp_complete_ctio(ccb);
 }
 
 
 static void
 isp_handle_srr_notify(ispsoftc_t *isp, void *inot_raw)
 {
 	tstate_t *tptr;
 	in_fcentry_24xx_t *inot = inot_raw;
 	atio_private_data_t *atp;
 	uint32_t tag = inot->in_rxid;
 	uint32_t bus = inot->in_vpidx;
 
 	if (!IS_24XX(isp)) {
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot_raw);
 		return;
 	}
 
 	tptr = get_lun_statep_from_tag(isp, bus, tag);
 	if (tptr == NULL) {
 		isp_prt(isp, ISP_LOGERR, "%s: cannot find tptr for tag %x in SRR Notify", __func__, tag);
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot);
 		return;
 	}
 	atp = isp_find_atpd(isp, tptr, tag);
 	if (atp == NULL) {
 		rls_lun_statep(isp, tptr);
 		isp_prt(isp, ISP_LOGERR, "%s: cannot find adjunct for %x in SRR Notify", __func__, tag);
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot);
 		return;
 	}
 	atp->srr_notify_rcvd = 1;
 	memcpy(atp->srr, inot, sizeof (atp->srr));
 	isp_prt(isp, ISP_LOGTINFO /* ISP_LOGTDEBUG0 */, "SRR[0x%x] inot->in_rxid flags 0x%x srr_iu=%x reloff 0x%x", inot->in_rxid, inot->in_flags, inot->in_srr_iu,
 	    inot->in_srr_reloff_lo | (inot->in_srr_reloff_hi << 16));
 	if (atp->srr_ccb)
 		isp_handle_srr_start(isp, tptr, atp);
 	rls_lun_statep(isp, tptr);
 }
 
 static void
 isp_handle_platform_ctio(ispsoftc_t *isp, void *arg)
 {
 	union ccb *ccb;
 	int sentstatus = 0, ok = 0, notify_cam = 0, resid = 0, failure = 0;
 	tstate_t *tptr = NULL;
 	atio_private_data_t *atp = NULL;
 	int bus;
 	uint32_t handle, moved_data = 0, data_requested;
 
 	/*
 	 * CTIO handles are 16 bits.
 	 * CTIO2 and CTIO7 are 32 bits.
 	 */
 
 	if (IS_SCSI(isp)) {
 		handle = ((ct_entry_t *)arg)->ct_syshandle;
 	} else {
 		handle = ((ct2_entry_t *)arg)->ct_syshandle;
 	}
 	ccb = isp_find_xs_tgt(isp, handle);
 	if (ccb == NULL) {
 		isp_print_bytes(isp, "null ccb in isp_handle_platform_ctio", QENTRY_LEN, arg);
 		return;
 	}
 	isp_destroy_tgt_handle(isp, handle);
 	data_requested = PISP_PCMD(ccb)->datalen;
 	isp_free_pcmd(isp, ccb);
 	if (isp->isp_nactive) {
 		isp->isp_nactive--;
 	}
 
 	bus = XS_CHANNEL(ccb);
 	tptr = get_lun_statep(isp, bus, XS_LUN(ccb));
 	if (tptr == NULL) {
 		tptr = get_lun_statep(isp, bus, CAM_LUN_WILDCARD);
 	}
 	if (tptr == NULL) {
 		isp_prt(isp, ISP_LOGERR, "%s: cannot find tptr for tag %x after I/O", __func__, ccb->csio.tag_id);
 		return;
 	}
 
 	if (IS_24XX(isp)) {
 		atp = isp_find_atpd(isp, tptr, ((ct7_entry_t *)arg)->ct_rxid);
 	} else if (IS_FC(isp)) {
 		atp = isp_find_atpd(isp, tptr, ((ct2_entry_t *)arg)->ct_rxid);
 	} else {
 		atp = isp_find_atpd(isp, tptr, ((ct_entry_t *)arg)->ct_fwhandle);
 	}
 	if (atp == NULL) {
 		/*
 		 * XXX: isp_clear_commands() generates fake CTIO with zero
 		 * ct_rxid value, filling only ct_syshandle.  Workaround
 		 * that using tag_id from the CCB, pointed by ct_syshandle.
 		 */
 		atp = isp_find_atpd(isp, tptr, ccb->csio.tag_id);
 	}
 	if (atp == NULL) {
 		rls_lun_statep(isp, tptr);
 		isp_prt(isp, ISP_LOGERR, "%s: cannot find adjunct for %x after I/O", __func__, ccb->csio.tag_id);
 		return;
 	}
 	KASSERT((atp->ctcnt > 0), ("ctio count not greater than zero"));
 	atp->bytes_in_transit -= data_requested;
 	atp->ctcnt -= 1;
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 
 	if (IS_24XX(isp)) {
 		ct7_entry_t *ct = arg;
 
 		if (ct->ct_nphdl == CT7_SRR) {
 			atp->srr_ccb = ccb;
 			if (atp->srr_notify_rcvd)
 				isp_handle_srr_start(isp, tptr, atp);
 			rls_lun_statep(isp, tptr);
 			return;
 		}
 		if (ct->ct_nphdl == CT_HBA_RESET) {
 			failure = CAM_UNREC_HBA_ERROR;
 		} else {
 			sentstatus = ct->ct_flags & CT7_SENDSTATUS;
 			ok = (ct->ct_nphdl == CT7_OK);
 			notify_cam = (ct->ct_header.rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0;
 			if ((ct->ct_flags & CT7_DATAMASK) != CT7_NO_DATA) {
 				resid = ct->ct_resid;
 				moved_data = data_requested - resid;
 			}
 		}
 		isp_prt(isp, ok? ISP_LOGTDEBUG0 : ISP_LOGWARN, "%s: CTIO7[%x] seq %u nc %d sts 0x%x flg 0x%x sns %d resid %d %s", __func__, ct->ct_rxid, ATPD_GET_SEQNO(ct),
 		   notify_cam, ct->ct_nphdl, ct->ct_flags, (ccb->ccb_h.status & CAM_SENT_SENSE) != 0, resid, sentstatus? "FIN" : "MID");
 	} else if (IS_FC(isp)) {
 		ct2_entry_t *ct = arg;
 		if (ct->ct_status == CT_SRR) {
 			atp->srr_ccb = ccb;
 			if (atp->srr_notify_rcvd)
 				isp_handle_srr_start(isp, tptr, atp);
 			rls_lun_statep(isp, tptr);
 			isp_target_putback_atio(ccb);
 			return;
 		}
 		if (ct->ct_status == CT_HBA_RESET) {
 			failure = CAM_UNREC_HBA_ERROR;
 		} else {
 			sentstatus = ct->ct_flags & CT2_SENDSTATUS;
 			ok = (ct->ct_status & ~QLTM_SVALID) == CT_OK;
 			notify_cam = (ct->ct_header.rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0;
 			if ((ct->ct_flags & CT2_DATAMASK) != CT2_NO_DATA) {
 				resid = ct->ct_resid;
 				moved_data = data_requested - resid;
 			}
 		}
 		isp_prt(isp, ok? ISP_LOGTDEBUG0 : ISP_LOGWARN, "%s: CTIO2[%x] seq %u nc %d sts 0x%x flg 0x%x sns %d resid %d %s", __func__, ct->ct_rxid, ATPD_GET_SEQNO(ct),
 		    notify_cam, ct->ct_status, ct->ct_flags, (ccb->ccb_h.status & CAM_SENT_SENSE) != 0, resid, sentstatus? "FIN" : "MID");
 	} else {
 		ct_entry_t *ct = arg;
 
 		if (ct->ct_status == (CT_HBA_RESET & 0xff)) {
 			failure = CAM_UNREC_HBA_ERROR;
 		} else {
 			sentstatus = ct->ct_flags & CT_SENDSTATUS;
 			ok = (ct->ct_status  & ~QLTM_SVALID) == CT_OK;
 			notify_cam = (ct->ct_header.rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0;
 		}
 		if ((ct->ct_flags & CT_DATAMASK) != CT_NO_DATA) {
 			resid = ct->ct_resid;
 			moved_data = data_requested - resid;
 		}
 		isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO[%x] seq %u nc %d tag %x S_ID 0x%x lun %x sts %x flg %x resid %d %s", __func__, ct->ct_fwhandle, ATPD_GET_SEQNO(ct),
 		    notify_cam, ct->ct_tag_val, ct->ct_iid, ct->ct_lun, ct->ct_status, ct->ct_flags, resid, sentstatus? "FIN" : "MID");
 	}
 	if (ok) {
 		if (moved_data) {
 			atp->bytes_xfered += moved_data;
 			ccb->csio.resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit;
 		}
 		if (sentstatus && (ccb->ccb_h.flags & CAM_SEND_SENSE)) {
 			ccb->ccb_h.status |= CAM_SENT_SENSE;
 		}
 		ccb->ccb_h.status |= CAM_REQ_CMP;
 	} else {
 		notify_cam = 1;
 		if (failure == CAM_UNREC_HBA_ERROR)
 			ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
 		else
 			ccb->ccb_h.status |= CAM_REQ_CMP_ERR;
 	}
 	atp->state = ATPD_STATE_PDON;
 	rls_lun_statep(isp, tptr);
 
 	/*
 	 * We never *not* notify CAM when there has been any error (ok == 0),
 	 * so we never need to do an ATIO putback if we're not notifying CAM.
 	 */
 	isp_prt(isp, ISP_LOGTDEBUG0, "%s CTIO[0x%x] done (ok=%d nc=%d nowsendstatus=%d ccb ss=%d)",
 	    (sentstatus)? "  FINAL " : "MIDTERM ", atp->tag, ok, notify_cam, atp->sendst, (ccb->ccb_h.flags & CAM_SEND_STATUS) != 0);
 	if (notify_cam == 0) {
 		if (atp->sendst) {
 			isp_target_start_ctio(isp, ccb, FROM_CTIO_DONE);
 		}
 		return;
 	}
 
 	/*
 	 * We're telling CAM we're done with this CTIO transaction.
 	 *
 	 * 24XX cards never need an ATIO put back.
 	 *
 	 * Other cards need one put back only on error.
 	 * In the latter case, a timeout will re-fire
 	 * and try again in case we didn't have
 	 * queue resources to do so at first. In any case,
 	 * once the putback is done we do the completion
 	 * call.
 	 */
 	if (ok || IS_24XX(isp)) {
 		isp_complete_ctio(ccb);
 	} else {
 		isp_target_putback_atio(ccb);
 	}
 }
 
 static void
 isp_handle_platform_notify_scsi(ispsoftc_t *isp, in_entry_t *inot)
 {
 	isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot);
 }
 
 static void
 isp_handle_platform_notify_fc(ispsoftc_t *isp, in_fcentry_t *inp)
 {
 	int needack = 1;
 	switch (inp->in_status) {
 	case IN_PORT_LOGOUT:
 		/*
 		 * XXX: Need to delete this initiator's WWN from the database
 		 * XXX: Need to send this LOGOUT upstream
 		 */
 		isp_prt(isp, ISP_LOGWARN, "port logout of S_ID 0x%x", inp->in_iid);
 		break;
 	case IN_PORT_CHANGED:
 		isp_prt(isp, ISP_LOGWARN, "port changed for S_ID 0x%x", inp->in_iid);
 		break;
 	case IN_GLOBAL_LOGO:
 		isp_del_all_wwn_entries(isp, 0);
 		isp_prt(isp, ISP_LOGINFO, "all ports logged out");
 		break;
 	case IN_ABORT_TASK:
 	{
 		tstate_t *tptr;
 		uint16_t lun;
 		uint32_t loopid, sid;
 		uint64_t wwn;
 		atio_private_data_t *atp;
 		fcportdb_t *lp;
 		struct ccb_immediate_notify *inot = NULL;
 
 		if (ISP_CAP_SCCFW(isp)) {
 			lun = inp->in_scclun;
 #if __FreeBSD_version < 1000700
 			lun &= 0x3fff;
 #endif
 		} else {
 			lun = inp->in_lun;
 		}
 		if (ISP_CAP_2KLOGIN(isp)) {
 			loopid = ((in_fcentry_e_t *)inp)->in_iid;
 		} else {
 			loopid = inp->in_iid;
 		}
 		if (isp_find_pdb_by_handle(isp, 0, loopid, &lp)) {
 			wwn = lp->port_wwn;
 			sid = lp->portid;
 		} else {
 			wwn = INI_ANY;
 			sid = PORT_ANY;
 		}
 		tptr = get_lun_statep(isp, 0, lun);
 		if (tptr == NULL) {
 			tptr = get_lun_statep(isp, 0, CAM_LUN_WILDCARD);
 			if (tptr == NULL) {
 				isp_prt(isp, ISP_LOGWARN, "ABORT TASK for lun %u- but no tstate", lun);
 				return;
 			}
 		}
 		atp = isp_find_atpd(isp, tptr, inp->in_seqid);
 
 		if (atp) {
 			inot = (struct ccb_immediate_notify *) SLIST_FIRST(&tptr->inots);
 			isp_prt(isp, ISP_LOGTDEBUG0, "ABORT TASK RX_ID %x WWN 0x%016llx state %d", inp->in_seqid, (unsigned long long) wwn, atp->state);
 			if (inot) {
 				tptr->inot_count--;
 				SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle);
 				ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, inot->ccb_h.path, "%s: Take FREE INOT count now %d\n", __func__, tptr->inot_count);
 			} else {
 				ISP_PATH_PRT(isp, ISP_LOGWARN, tptr->owner, "out of INOT structures\n");
 			}
 		} else {
 			ISP_PATH_PRT(isp, ISP_LOGWARN, tptr->owner, "abort task RX_ID %x from wwn 0x%016llx, state unknown\n", inp->in_seqid, wwn);
 		}
 		if (inot) {
 			isp_notify_t tmp, *nt = &tmp;
 			ISP_MEMZERO(nt, sizeof (isp_notify_t));
     			nt->nt_hba = isp;
 			nt->nt_tgt = FCPARAM(isp, 0)->isp_wwpn;
 			nt->nt_wwn = wwn;
 			nt->nt_nphdl = loopid;
 			nt->nt_sid = sid;
 			nt->nt_did = PORT_ANY;
     			nt->nt_lun = lun;
             		nt->nt_need_ack = 1;
     			nt->nt_channel = 0;
     			nt->nt_ncode = NT_ABORT_TASK;
     			nt->nt_lreserved = inot;
 			isp_handle_platform_target_tmf(isp, nt);
 			needack = 0;
 		}
 		rls_lun_statep(isp, tptr);
 		break;
 	}
 	default:
 		break;
 	}
 	if (needack) {
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inp);
 	}
 }
 
 static void
 isp_handle_platform_notify_24xx(ispsoftc_t *isp, in_fcentry_24xx_t *inot)
 {
 	uint16_t nphdl;
 	uint16_t prli_options = 0;
 	uint32_t portid;
 	fcportdb_t *lp;
 	char *msg = NULL;
 	uint8_t *ptr = (uint8_t *)inot;
 	uint64_t wwpn = INI_NONE, wwnn = INI_NONE;
 
 	nphdl = inot->in_nphdl;
 	if (nphdl != NIL_HANDLE) {
 		portid = inot->in_portid_hi << 16 | inot->in_portid_lo;
 	} else {
 		portid = PORT_ANY;
 	}
 
 	switch (inot->in_status) {
 	case IN24XX_ELS_RCVD:
 	{
 		char buf[16];
 		int chan = ISP_GET_VPIDX(isp, inot->in_vpidx);
 
 		/*
 		 * Note that we're just getting notification that an ELS was received
 		 * (possibly with some associated information sent upstream). This is
 		 * *not* the same as being given the ELS frame to accept or reject.
 		 */
 		switch (inot->in_status_subcode) {
 		case LOGO:
 			msg = "LOGO";
 			wwpn = be64dec(&ptr[IN24XX_PLOGI_WWPN_OFF]);
 			isp_del_wwn_entry(isp, chan, wwpn, nphdl, portid);
 			break;
 		case PRLO:
 			msg = "PRLO";
 			break;
 		case PLOGI:
 			msg = "PLOGI";
 			wwnn = be64dec(&ptr[IN24XX_PLOGI_WWNN_OFF]);
 			wwpn = be64dec(&ptr[IN24XX_PLOGI_WWPN_OFF]);
 			isp_add_wwn_entry(isp, chan, wwpn, wwnn,
 			    nphdl, portid, prli_options);
 			break;
 		case PRLI:
 			msg = "PRLI";
 			prli_options = inot->in_prli_options;
 			if (inot->in_flags & IN24XX_FLAG_PN_NN_VALID)
 				wwnn = be64dec(&ptr[IN24XX_PRLI_WWNN_OFF]);
 			wwpn = be64dec(&ptr[IN24XX_PRLI_WWPN_OFF]);
 			isp_add_wwn_entry(isp, chan, wwpn, wwnn,
 			    nphdl, portid, prli_options);
 			break;
 		case PDISC:
 			msg = "PDISC";
 			break;
 		case ADISC:
 			msg = "ADISC";
 			break;
 		default:
 			ISP_SNPRINTF(buf, sizeof (buf), "ELS 0x%x", inot->in_status_subcode);
 			msg = buf;
 			break;
 		}
 		if (inot->in_flags & IN24XX_FLAG_PUREX_IOCB) {
 			isp_prt(isp, ISP_LOGERR, "%s Chan %d ELS N-port handle %x PortID 0x%06x marked as needing a PUREX response", msg, chan, nphdl, portid);
 			break;
 		}
 		isp_prt(isp, ISP_LOGTDEBUG0, "%s Chan %d ELS N-port handle %x PortID 0x%06x RX_ID 0x%x OX_ID 0x%x", msg, chan, nphdl, portid,
 		    inot->in_rxid, inot->in_oxid);
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot);
 		break;
 	}
 
 	case IN24XX_PORT_LOGOUT:
 		msg = "PORT LOGOUT";
 		if (isp_find_pdb_by_handle(isp, ISP_GET_VPIDX(isp, inot->in_vpidx), nphdl, &lp)) {
 			isp_del_wwn_entry(isp, ISP_GET_VPIDX(isp, inot->in_vpidx), lp->port_wwn, nphdl, lp->portid);
 		}
 		/* FALLTHROUGH */
 	case IN24XX_PORT_CHANGED:
 		if (msg == NULL)
 			msg = "PORT CHANGED";
 		/* FALLTHROUGH */
 	case IN24XX_LIP_RESET:
 		if (msg == NULL)
 			msg = "LIP RESET";
 		isp_prt(isp, ISP_LOGINFO, "Chan %d %s (sub-status 0x%x) for N-port handle 0x%x", ISP_GET_VPIDX(isp, inot->in_vpidx), msg, inot->in_status_subcode, nphdl);
 
 		/*
 		 * All subcodes here are irrelevant. What is relevant
 		 * is that we need to terminate all active commands from
 		 * this initiator (known by N-port handle).
 		 */
 		/* XXX IMPLEMENT XXX */
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot);
 		break;
 
 	case IN24XX_SRR_RCVD:
 #ifdef	ISP_TARGET_MODE
 		isp_handle_srr_notify(isp, inot);
 		break;
 #else
 		if (msg == NULL)
 			msg = "SRR RCVD";
 		/* FALLTHROUGH */
 #endif
 	case IN24XX_LINK_RESET:
 		if (msg == NULL)
 			msg = "LINK RESET";
 	case IN24XX_LINK_FAILED:
 		if (msg == NULL)
 			msg = "LINK FAILED";
 	default:
 		isp_prt(isp, ISP_LOGWARN, "Chan %d %s", ISP_GET_VPIDX(isp, inot->in_vpidx), msg);
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot);
 		break;
 	}
 }
 
 static int
 isp_handle_platform_target_notify_ack(ispsoftc_t *isp, isp_notify_t *mp)
 {
 
 	if (isp->isp_state != ISP_RUNSTATE) {
 		isp_prt(isp, ISP_LOGTINFO, "Notify Code 0x%x (qevalid=%d) acked- h/w not ready (dropping)", mp->nt_ncode, mp->nt_lreserved != NULL);
 		return (0);
 	}
 
 	/*
 	 * This case is for a Task Management Function, which shows up as an ATIO7 entry.
 	 */
 	if (IS_24XX(isp) && mp->nt_lreserved && ((isphdr_t *)mp->nt_lreserved)->rqs_entry_type == RQSTYPE_ATIO) {
 		ct7_entry_t local, *cto = &local;
 		at7_entry_t *aep = (at7_entry_t *)mp->nt_lreserved;
 		fcportdb_t *lp;
 		uint32_t sid;
 		uint16_t nphdl;
 
 		sid = (aep->at_hdr.s_id[0] << 16) | (aep->at_hdr.s_id[1] << 8) | aep->at_hdr.s_id[2];
 		if (isp_find_pdb_by_sid(isp, mp->nt_channel, sid, &lp)) {
 			nphdl = lp->handle;
 		} else {
 			nphdl = NIL_HANDLE;
 		}
 		ISP_MEMZERO(&local, sizeof (local));
 		cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7;
 		cto->ct_header.rqs_entry_count = 1;
 		cto->ct_nphdl = nphdl;
 		cto->ct_rxid = aep->at_rxid;
 		cto->ct_vpidx = mp->nt_channel;
 		cto->ct_iid_lo = sid;
 		cto->ct_iid_hi = sid >> 16;
 		cto->ct_oxid = aep->at_hdr.ox_id;
 		cto->ct_flags = CT7_SENDSTATUS|CT7_NOACK|CT7_NO_DATA|CT7_FLAG_MODE1;
 		cto->ct_flags |= (aep->at_ta_len >> 12) << CT7_TASK_ATTR_SHIFT;
 		return (isp_target_put_entry(isp, &local));
 	}
 
 	/*
 	 * This case is for a responding to an ABTS frame
 	 */
 	if (IS_24XX(isp) && mp->nt_lreserved && ((isphdr_t *)mp->nt_lreserved)->rqs_entry_type == RQSTYPE_ABTS_RCVD) {
 
 		/*
 		 * Overload nt_need_ack here to mark whether we've terminated the associated command.
 		 */
 		if (mp->nt_need_ack) {
 			uint8_t storage[QENTRY_LEN];
 			ct7_entry_t *cto = (ct7_entry_t *) storage;
 			abts_t *abts = (abts_t *)mp->nt_lreserved;
 
 			ISP_MEMZERO(cto, sizeof (ct7_entry_t));
 			isp_prt(isp, ISP_LOGTDEBUG0, "%s: [%x] terminating after ABTS received", __func__, abts->abts_rxid_task);
 			cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7;
 			cto->ct_header.rqs_entry_count = 1;
 			cto->ct_nphdl = mp->nt_nphdl;
 			cto->ct_rxid = abts->abts_rxid_task;
 			cto->ct_iid_lo = mp->nt_sid;
 			cto->ct_iid_hi = mp->nt_sid >> 16;
 			cto->ct_oxid = abts->abts_ox_id;
 			cto->ct_vpidx = mp->nt_channel;
 			cto->ct_flags = CT7_NOACK|CT7_TERMINATE;
 			if (isp_target_put_entry(isp, cto)) {
 				return (ENOMEM);
 			}
 			mp->nt_need_ack = 0;
 		}
 		if (isp_acknak_abts(isp, mp->nt_lreserved, 0) == ENOMEM) {
 			return (ENOMEM);
 		} else {
 			return (0);
 		}
 	}
 
 	/*
 	 * Handle logout cases here
 	 */
 	if (mp->nt_ncode == NT_GLOBAL_LOGOUT) {
 		isp_del_all_wwn_entries(isp, mp->nt_channel);
 	}
 
 	if (mp->nt_ncode == NT_LOGOUT) {
 		if (!IS_2100(isp) && IS_FC(isp)) {
 			isp_del_wwn_entries(isp, mp);
 		}
 	}
 
 	/*
 	 * General purpose acknowledgement
 	 */
 	if (mp->nt_need_ack) {
 		isp_prt(isp, ISP_LOGTINFO, "Notify Code 0x%x (qevalid=%d) being acked", mp->nt_ncode, mp->nt_lreserved != NULL);
 		/*
 		 * Don't need to use the guaranteed send because the caller can retry
 		 */
 		return (isp_notify_ack(isp, mp->nt_lreserved));
 	}
 	return (0);
 }
 
 /*
  * Handle task management functions.
  *
  * We show up here with a notify structure filled out.
  *
  * The nt_lreserved tag points to the original queue entry
  */
 static void
 isp_handle_platform_target_tmf(ispsoftc_t *isp, isp_notify_t *notify)
 {
 	tstate_t *tptr;
 	fcportdb_t *lp;
 	struct ccb_immediate_notify *inot;
 	inot_private_data_t *ntp = NULL;
 	lun_id_t lun;
 
 	isp_prt(isp, ISP_LOGTDEBUG0, "%s: code 0x%x sid  0x%x tagval 0x%016llx chan %d lun 0x%x", __func__, notify->nt_ncode,
 	    notify->nt_sid, (unsigned long long) notify->nt_tagval, notify->nt_channel, notify->nt_lun);
 	/*
 	 * NB: This assignment is necessary because of tricky type conversion.
 	 * XXX: This is tricky and I need to check this. If the lun isn't known
 	 * XXX: for the task management function, it does not of necessity follow
 	 * XXX: that it should go up stream to the wildcard listener.
 	 */
 	if (notify->nt_lun == LUN_ANY) {
 		lun = CAM_LUN_WILDCARD;
 	} else {
 		lun = notify->nt_lun;
 	}
 	tptr = get_lun_statep(isp, notify->nt_channel, lun);
 	if (tptr == NULL) {
 		tptr = get_lun_statep(isp, notify->nt_channel, CAM_LUN_WILDCARD);
 		if (tptr == NULL) {
 			isp_prt(isp, ISP_LOGWARN, "%s: no state pointer found for chan %d lun %#jx", __func__, notify->nt_channel, (uintmax_t)lun);
 			goto bad;
 		}
 	}
 	inot = (struct ccb_immediate_notify *) SLIST_FIRST(&tptr->inots);
 	if (inot == NULL) {
 		isp_prt(isp, ISP_LOGWARN, "%s: out of immediate notify structures for chan %d lun %#jx", __func__, notify->nt_channel, (uintmax_t)lun);
 		goto bad;
 	}
 
 	if (isp_find_pdb_by_sid(isp, notify->nt_channel, notify->nt_sid, &lp) == 0 &&
 	    isp_find_pdb_by_handle(isp, notify->nt_channel, notify->nt_nphdl, &lp) == 0) {
 		inot->initiator_id = CAM_TARGET_WILDCARD;
 	} else {
 		inot->initiator_id = FC_PORTDB_TGT(isp, notify->nt_channel, lp);
 	}
 	inot->seq_id = notify->nt_tagval;
 	inot->tag_id = notify->nt_tagval >> 32;
 
 	switch (notify->nt_ncode) {
 	case NT_ABORT_TASK:
 		isp_target_mark_aborted_early(isp, tptr, inot->tag_id);
 		inot->arg = MSG_ABORT_TASK;
 		break;
 	case NT_ABORT_TASK_SET:
 		isp_target_mark_aborted_early(isp, tptr, TAG_ANY);
 		inot->arg = MSG_ABORT_TASK_SET;
 		break;
 	case NT_CLEAR_ACA:
 		inot->arg = MSG_CLEAR_ACA;
 		break;
 	case NT_CLEAR_TASK_SET:
 		inot->arg = MSG_CLEAR_TASK_SET;
 		break;
 	case NT_LUN_RESET:
 		inot->arg = MSG_LOGICAL_UNIT_RESET;
 		break;
 	case NT_TARGET_RESET:
 		inot->arg = MSG_TARGET_RESET;
 		break;
 	case NT_QUERY_TASK_SET:
 		inot->arg = MSG_QUERY_TASK_SET;
 		break;
 	case NT_QUERY_ASYNC_EVENT:
 		inot->arg = MSG_QUERY_ASYNC_EVENT;
 		break;
 	default:
 		isp_prt(isp, ISP_LOGWARN, "%s: unknown TMF code 0x%x for chan %d lun %#jx", __func__, notify->nt_ncode, notify->nt_channel, (uintmax_t)lun);
 		goto bad;
 	}
 
 	ntp = isp_get_ntpd(isp, tptr);
 	if (ntp == NULL) {
 		isp_prt(isp, ISP_LOGWARN, "%s: out of inotify private structures", __func__);
 		goto bad;
 	}
 	ISP_MEMCPY(&ntp->rd.nt, notify, sizeof (isp_notify_t));
 	if (notify->nt_lreserved) {
 		ISP_MEMCPY(&ntp->rd.data, notify->nt_lreserved, QENTRY_LEN);
 		ntp->rd.nt.nt_lreserved = &ntp->rd.data;
 	}
 	ntp->rd.seq_id = notify->nt_tagval;
 	ntp->rd.tag_id = notify->nt_tagval >> 32;
 
 	tptr->inot_count--;
 	SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle);
 	rls_lun_statep(isp, tptr);
 	ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, inot->ccb_h.path, "%s: Take FREE INOT count now %d\n", __func__, tptr->inot_count);
 	inot->ccb_h.status = CAM_MESSAGE_RECV;
 	xpt_done((union ccb *)inot);
 	return;
 bad:
 	if (tptr) {
 		rls_lun_statep(isp, tptr);
 	}
 	if (notify->nt_need_ack && notify->nt_lreserved) {
 		if (((isphdr_t *)notify->nt_lreserved)->rqs_entry_type == RQSTYPE_ABTS_RCVD) {
 			if (isp_acknak_abts(isp, notify->nt_lreserved, ENOMEM)) {
 				isp_prt(isp, ISP_LOGWARN, "you lose- unable to send an ACKNAK");
 			}
 		} else {
 			isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, notify->nt_lreserved);
 		}
 	}
 }
 
 /*
  * Find the associated private data and mark it as dead so
  * we don't try to work on it any further.
  */
 static void
 isp_target_mark_aborted(ispsoftc_t *isp, union ccb *ccb)
 {
 	tstate_t *tptr;
 	atio_private_data_t *atp;
 	union ccb *accb = ccb->cab.abort_ccb;
 
 	tptr = get_lun_statep(isp, XS_CHANNEL(accb), XS_LUN(accb));
 	if (tptr == NULL) {
 		tptr = get_lun_statep(isp, XS_CHANNEL(accb), CAM_LUN_WILDCARD);
 		if (tptr == NULL) {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			return;
 		}
 	}
 
 	atp = isp_find_atpd(isp, tptr, accb->atio.tag_id);
 	if (atp == NULL) {
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 	} else {
 		atp->dead = 1;
 		ccb->ccb_h.status = CAM_REQ_CMP;
 	}
 	rls_lun_statep(isp, tptr);
 }
 
 static void
 isp_target_mark_aborted_early(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag_id)
 {
 	atio_private_data_t *atp;
 	inot_private_data_t *restart_queue = tptr->restart_queue;
 
 	/*
 	 * First, clean any commands pending restart
 	 */
 	tptr->restart_queue = NULL;
 	while (restart_queue) {
 		uint32_t this_tag_id;
 		inot_private_data_t *ntp = restart_queue;
 
 		restart_queue = ntp->rd.nt.nt_hba;
 
 		if (IS_24XX(isp)) {
 			this_tag_id = ((at7_entry_t *)ntp->rd.data)->at_rxid;
 		} else {
 			this_tag_id = ((at2_entry_t *)ntp->rd.data)->at_rxid;
 		}
 		if ((uint64_t)tag_id == TAG_ANY || tag_id == this_tag_id) {
 			isp_put_ntpd(isp, tptr, ntp);
 		} else {
 			ntp->rd.nt.nt_hba = tptr->restart_queue;
 			tptr->restart_queue = ntp;
 		}
 	}
 
 	/*
 	 * Now mark other ones dead as well.
 	 */
 	for (atp = tptr->atpool; atp < &tptr->atpool[ATPDPSIZE]; atp++) {
 		if ((uint64_t)tag_id == TAG_ANY || atp->tag == tag_id) {
 			atp->dead = 1;
 		}
 	}
 }
 #endif
 
 static void
 isp_cam_async(void *cbarg, uint32_t code, struct cam_path *path, void *arg)
 {
 	struct cam_sim *sim;
 	int bus, tgt;
 	ispsoftc_t *isp;
 
 	sim = (struct cam_sim *)cbarg;
 	isp = (ispsoftc_t *) cam_sim_softc(sim);
 	bus = cam_sim_bus(sim);
 	tgt = xpt_path_target_id(path);
 
 	switch (code) {
 	case AC_LOST_DEVICE:
 		if (IS_SCSI(isp)) {
 			uint16_t oflags, nflags;
 			sdparam *sdp = SDPARAM(isp, bus);
 
 			if (tgt >= 0) {
 				nflags = sdp->isp_devparam[tgt].nvrm_flags;
 #ifndef	ISP_TARGET_MODE
 				nflags &= DPARM_SAFE_DFLT;
 				if (isp->isp_loaded_fw) {
 					nflags |= DPARM_NARROW | DPARM_ASYNC;
 				}
 #else
 				nflags = DPARM_DEFAULT;
 #endif
 				oflags = sdp->isp_devparam[tgt].goal_flags;
 				sdp->isp_devparam[tgt].goal_flags = nflags;
 				sdp->isp_devparam[tgt].dev_update = 1;
 				sdp->update = 1;
 				(void) isp_control(isp, ISPCTL_UPDATE_PARAMS, bus);
 				sdp->isp_devparam[tgt].goal_flags = oflags;
 			}
 		}
 		break;
 	default:
 		isp_prt(isp, ISP_LOGWARN, "isp_cam_async: Code 0x%x", code);
 		break;
 	}
 }
 
 static void
 isp_poll(struct cam_sim *sim)
 {
 	ispsoftc_t *isp = cam_sim_softc(sim);
 	uint16_t isr, sema, info;
 
 	if (ISP_READ_ISR(isp, &isr, &sema, &info))
 		isp_intr(isp, isr, sema, info);
 }
 
 
 static void
 isp_watchdog(void *arg)
 {
 	struct ccb_scsiio *xs = arg;
 	ispsoftc_t *isp;
 	uint32_t ohandle = ISP_HANDLE_FREE, handle;
 
 	isp = XS_ISP(xs);
 
 	handle = isp_find_handle(isp, xs);
 
 	/*
 	 * Hand crank the interrupt code just to be sure the command isn't stuck somewhere.
 	 */
 	if (handle != ISP_HANDLE_FREE) {
 		uint16_t isr, sema, info;
 		if (ISP_READ_ISR(isp, &isr, &sema, &info) != 0)
 			isp_intr(isp, isr, sema, info);
 		ohandle = handle;
 		handle = isp_find_handle(isp, xs);
 	}
 	if (handle != ISP_HANDLE_FREE) {
 		/*
 		 * Try and make sure the command is really dead before
 		 * we release the handle (and DMA resources) for reuse.
 		 *
 		 * If we are successful in aborting the command then
 		 * we're done here because we'll get the command returned
 		 * back separately.
 		 */
 		if (isp_control(isp, ISPCTL_ABORT_CMD, xs) == 0) {
 			return;
 		}
 
 		/*
 		 * Note that after calling the above, the command may in
 		 * fact have been completed.
 		 */
 		xs = isp_find_xs(isp, handle);
 
 		/*
 		 * If the command no longer exists, then we won't
 		 * be able to find the xs again with this handle.
 		 */
 		if (xs == NULL) {
 			return;
 		}
 
 		/*
 		 * After this point, the command is really dead.
 		 */
 		if (XS_XFRLEN(xs)) {
 			ISP_DMAFREE(isp, xs, handle);
 		} 
 		isp_destroy_handle(isp, handle);
 		isp_prt(isp, ISP_LOGERR, "%s: timeout for handle 0x%x", __func__, handle);
 		xs->ccb_h.status &= ~CAM_STATUS_MASK;
 		xs->ccb_h.status |= CAM_CMD_TIMEOUT;
 		isp_prt_endcmd(isp, xs);
 		isp_done(xs);
 	} else {
 		if (ohandle != ISP_HANDLE_FREE) {
 			isp_prt(isp, ISP_LOGWARN, "%s: timeout for handle 0x%x, recovered during interrupt", __func__, ohandle);
 		} else {
 			isp_prt(isp, ISP_LOGWARN, "%s: timeout for handle already free", __func__);
 		}
 	}
 }
 
 static void
 isp_make_here(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt)
 {
 	union ccb *ccb;
 	struct isp_fc *fc = ISP_FC_PC(isp, chan);
 
 	/*
 	 * Allocate a CCB, create a wildcard path for this target and schedule a rescan.
 	 */
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		isp_prt(isp, ISP_LOGWARN, "Chan %d unable to alloc CCB for rescan", chan);
 		return;
 	}
 	if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(fc->sim),
 	    tgt, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		isp_prt(isp, ISP_LOGWARN, "unable to create path for rescan");
 		xpt_free_ccb(ccb);
 		return;
 	}
 	xpt_rescan(ccb);
 }
 
 static void
 isp_make_gone(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt)
 {
 	struct cam_path *tp;
 	struct isp_fc *fc = ISP_FC_PC(isp, chan);
 
 	if (xpt_create_path(&tp, NULL, cam_sim_path(fc->sim), tgt, CAM_LUN_WILDCARD) == CAM_REQ_CMP) {
 		xpt_async(AC_LOST_DEVICE, tp, NULL);
 		xpt_free_path(tp);
 	}
 }
 
 /*
  * Gone Device Timer Function- when we have decided that a device has gone
  * away, we wait a specific period of time prior to telling the OS it has
  * gone away.
  *
  * This timer function fires once a second and then scans the port database
  * for devices that are marked dead but still have a virtual target assigned.
  * We decrement a counter for that port database entry, and when it hits zero,
  * we tell the OS the device has gone away.
  */
 static void
 isp_gdt(void *arg)
 {
 	struct isp_fc *fc = arg;
 	taskqueue_enqueue(taskqueue_thread, &fc->gtask);
 }
 
 static void
 isp_gdt_task(void *arg, int pending)
 {
 	struct isp_fc *fc = arg;
 	ispsoftc_t *isp = fc->isp;
 	int chan = fc - isp->isp_osinfo.pc.fc;
 	fcportdb_t *lp;
 	struct ac_contract ac;
 	struct ac_device_changed *adc;
 	int dbidx, more_to_do = 0;
 
 	ISP_LOCK(isp);
 	isp_prt(isp, ISP_LOGDEBUG0, "Chan %d GDT timer expired", chan);
 	for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
 		lp = &FCPARAM(isp, chan)->portdb[dbidx];
 
 		if (lp->state != FC_PORTDB_STATE_ZOMBIE) {
 			continue;
 		}
 		if (lp->gone_timer != 0) {
 			lp->gone_timer -= 1;
 			more_to_do++;
 			continue;
 		}
 		isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Gone Device Timeout");
 		if (lp->is_target) {
 			lp->is_target = 0;
 			isp_make_gone(isp, lp, chan, dbidx);
 		}
 		if (lp->is_initiator) {
 			lp->is_initiator = 0;
 			ac.contract_number = AC_CONTRACT_DEV_CHG;
 			adc = (struct ac_device_changed *) ac.contract_data;
 			adc->wwpn = lp->port_wwn;
 			adc->port = lp->portid;
 			adc->target = dbidx;
 			adc->arrived = 0;
 			xpt_async(AC_CONTRACT, fc->path, &ac);
 		}
 		lp->state = FC_PORTDB_STATE_NIL;
 	}
 	if (fc->ready) {
 		if (more_to_do) {
 			callout_reset(&fc->gdt, hz, isp_gdt, fc);
 		} else {
 			callout_deactivate(&fc->gdt);
 			isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Stopping Gone Device Timer @ %lu", chan, (unsigned long) time_uptime);
 		}
 	}
 	ISP_UNLOCK(isp);
 }
 
 /*
  * Loop Down Timer Function- when loop goes down, a timer is started and
  * and after it expires we come here and take all probational devices that
  * the OS knows about and the tell the OS that they've gone away.
  * 
  * We don't clear the devices out of our port database because, when loop
  * come back up, we have to do some actual cleanup with the chip at that
  * point (implicit PLOGO, e.g., to get the chip's port database state right).
  */
 static void
 isp_ldt(void *arg)
 {
 	struct isp_fc *fc = arg;
 	taskqueue_enqueue(taskqueue_thread, &fc->ltask);
 }
 
 static void
 isp_ldt_task(void *arg, int pending)
 {
 	struct isp_fc *fc = arg;
 	ispsoftc_t *isp = fc->isp;
 	int chan = fc - isp->isp_osinfo.pc.fc;
 	fcportdb_t *lp;
 	struct ac_contract ac;
 	struct ac_device_changed *adc;
 	int dbidx, i;
 
 	ISP_LOCK(isp);
 	isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Loop Down Timer expired @ %lu", chan, (unsigned long) time_uptime);
 	callout_deactivate(&fc->ldt);
 
 	/*
 	 * Notify to the OS all targets who we now consider have departed.
 	 */
 	for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
 		lp = &FCPARAM(isp, chan)->portdb[dbidx];
 
 		if (lp->state == FC_PORTDB_STATE_NIL)
 			continue;
 
 		/*
 		 * XXX: CLEAN UP AND COMPLETE ANY PENDING COMMANDS FIRST!
 		 */
 		for (i = 0; i < isp->isp_maxcmds; i++) {
 			struct ccb_scsiio *xs;
 
 			if (!ISP_VALID_HANDLE(isp, isp->isp_xflist[i].handle)) {
 				continue;
 			}
 			if ((xs = isp->isp_xflist[i].cmd) == NULL) {
 				continue;
                         }
 			if (dbidx != XS_TGT(xs)) {
 				continue;
 			}
 			isp_prt(isp, ISP_LOGWARN, "command handle 0x%x for %d.%d.%jx orphaned by loop down timeout",
 			    isp->isp_xflist[i].handle, chan, XS_TGT(xs),
 			    (uintmax_t)XS_LUN(xs));
 		}
 
 		isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Loop Down Timeout");
 		if (lp->is_target) {
 			lp->is_target = 0;
 			isp_make_gone(isp, lp, chan, dbidx);
 		}
 		if (lp->is_initiator) {
 			lp->is_initiator = 0;
 			ac.contract_number = AC_CONTRACT_DEV_CHG;
 			adc = (struct ac_device_changed *) ac.contract_data;
 			adc->wwpn = lp->port_wwn;
 			adc->port = lp->portid;
 			adc->target = dbidx;
 			adc->arrived = 0;
 			xpt_async(AC_CONTRACT, fc->path, &ac);
 		}
 	}
 
 	isp_unfreeze_loopdown(isp, chan);
 	/*
 	 * The loop down timer has expired. Wake up the kthread
 	 * to notice that fact (or make it false).
 	 */
 	fc->loop_dead = 1;
 	fc->loop_down_time = fc->loop_down_limit+1;
 	wakeup(fc);
 	ISP_UNLOCK(isp);
 }
 
 static void
 isp_kthread(void *arg)
 {
 	struct isp_fc *fc = arg;
 	ispsoftc_t *isp = fc->isp;
 	int chan = fc - isp->isp_osinfo.pc.fc;
 	int slp = 0;
 
 	mtx_lock(&isp->isp_osinfo.lock);
 
 	while (isp->isp_osinfo.is_exiting == 0) {
 		int lb, lim;
 
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d checking FC state", __func__, chan);
 		lb = isp_fc_runstate(isp, chan, 250000);
 
 		/*
 		 * Our action is different based upon whether we're supporting
 		 * Initiator mode or not. If we are, we might freeze the simq
 		 * when loop is down and set all sorts of different delays to
 		 * check again.
 		 *
 		 * If not, we simply just wait for loop to come up.
 		 */
 		if (lb && (FCPARAM(isp, chan)->role & ISP_ROLE_INITIATOR)) {
 			/*
 			 * Increment loop down time by the last sleep interval
 			 */
 			fc->loop_down_time += slp;
 
 			if (lb < 0) {
 				isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC loop not up (down count %d)", __func__, chan, fc->loop_down_time);
 			} else {
 				isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC got to %d (down count %d)", __func__, chan, lb, fc->loop_down_time);
 			}
 
 			/*
 			 * If we've never seen loop up and we've waited longer
 			 * than quickboot time, or we've seen loop up but we've
 			 * waited longer than loop_down_limit, give up and go
 			 * to sleep until loop comes up.
 			 */
 			if (FCPARAM(isp, chan)->loop_seen_once == 0) {
 				lim = isp_quickboot_time;
 			} else {
 				lim = fc->loop_down_limit;
 			}
 			if (fc->loop_down_time >= lim) {
 				isp_freeze_loopdown(isp, chan, "loop limit hit");
 				slp = 0;
 			} else if (fc->loop_down_time < 10) {
 				slp = 1;
 			} else if (fc->loop_down_time < 30) {
 				slp = 5;
 			} else if (fc->loop_down_time < 60) {
 				slp = 10;
 			} else if (fc->loop_down_time < 120) {
 				slp = 20;
 			} else {
 				slp = 30;
 			}
 
 		} else if (lb) {
 			isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC Loop Down", __func__, chan);
 			fc->loop_down_time += slp;
 			if (fc->loop_down_time > 300)
 				slp = 0;
 			else
 				slp = 60;
 		} else {
 			isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC state OK", __func__, chan);
 			fc->loop_down_time = 0;
 			slp = 0;
 		}
 
 
 		/*
 		 * If this is past the first loop up or the loop is dead and if we'd frozen the simq, unfreeze it
 		 * now so that CAM can start sending us commands.
 		 *
 		 * If the FC state isn't okay yet, they'll hit that in isp_start which will freeze the queue again
 		 * or kill the commands, as appropriate.
 		 */
 
 		if (FCPARAM(isp, chan)->loop_seen_once || fc->loop_dead) {
 			isp_unfreeze_loopdown(isp, chan);
 		}
 
 		isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d sleep time %d", __func__, chan, slp);
 
 		msleep(fc, &isp->isp_osinfo.lock, PRIBIO, "ispf", slp * hz);
 
 		/*
 		 * If slp is zero, we're waking up for the first time after
 		 * things have been okay. In this case, we set a deferral state
 		 * for all commands and delay hysteresis seconds before starting
 		 * the FC state evaluation. This gives the loop/fabric a chance
 		 * to settle.
 		 */
 		if (slp == 0 && fc->hysteresis) {
 			isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d sleep hysteresis ticks %d", __func__, chan, fc->hysteresis * hz);
 			mtx_unlock(&isp->isp_osinfo.lock);
 			pause("ispt", fc->hysteresis * hz);
 			mtx_lock(&isp->isp_osinfo.lock);
 		}
 	}
 	fc->num_threads -= 1;
 	mtx_unlock(&isp->isp_osinfo.lock);
 	kthread_exit();
 }
 
 static void
 isp_action(struct cam_sim *sim, union ccb *ccb)
 {
 	int bus, tgt, ts, error, lim;
 	ispsoftc_t *isp;
 	struct ccb_trans_settings *cts;
 
 	CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("isp_action\n"));
 
 	isp = (ispsoftc_t *)cam_sim_softc(sim);
 	mtx_assert(&isp->isp_lock, MA_OWNED);
 	isp_prt(isp, ISP_LOGDEBUG2, "isp_action code %x", ccb->ccb_h.func_code);
 	ISP_PCMD(ccb) = NULL;
 
 	switch (ccb->ccb_h.func_code) {
 	case XPT_SCSI_IO:	/* Execute the requested I/O operation */
 		bus = XS_CHANNEL(ccb);
 		/*
 		 * Do a couple of preliminary checks...
 		 */
 		if ((ccb->ccb_h.flags & CAM_CDB_POINTER) != 0) {
 			if ((ccb->ccb_h.flags & CAM_CDB_PHYS) != 0) {
 				ccb->ccb_h.status = CAM_REQ_INVALID;
 				isp_done((struct ccb_scsiio *) ccb);
 				break;
 			}
 		}
 		ccb->csio.req_map = NULL;
 #ifdef	DIAGNOSTIC
 		if (ccb->ccb_h.target_id >= ISP_MAX_TARGETS(isp)) {
 			xpt_print(ccb->ccb_h.path, "invalid target\n");
 			ccb->ccb_h.status = CAM_PATH_INVALID;
 		} else if (ISP_MAX_LUNS(isp) > 0 &&
 		    ccb->ccb_h.target_lun >= ISP_MAX_LUNS(isp)) {
 			xpt_print(ccb->ccb_h.path, "invalid lun\n");
 			ccb->ccb_h.status = CAM_PATH_INVALID;
 		}
 		if (ccb->ccb_h.status == CAM_PATH_INVALID) {
 			xpt_done(ccb);
 			break;
 		}
 #endif
 		ccb->csio.scsi_status = SCSI_STATUS_OK;
 		if (isp_get_pcmd(isp, ccb)) {
 			isp_prt(isp, ISP_LOGWARN, "out of PCMDs");
 			cam_freeze_devq(ccb->ccb_h.path);
 			cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 250, 0);
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			xpt_done(ccb);
 			break;
 		}
 		error = isp_start((XS_T *) ccb);
 		switch (error) {
 		case CMD_QUEUED:
 			ccb->ccb_h.status |= CAM_SIM_QUEUED;
 			if (ccb->ccb_h.timeout == CAM_TIME_INFINITY) {
 				break;
 			}
 			ts = ccb->ccb_h.timeout;
 			if (ts == CAM_TIME_DEFAULT) {
 				ts = 60*1000;
 			}
 			ts = isp_mstohz(ts);
 			callout_reset(&PISP_PCMD(ccb)->wdog, ts, isp_watchdog, ccb);
 			break;
 		case CMD_RQLATER:
 			/*
 			 * We get this result for FC devices if the loop state isn't ready yet
 			 * or if the device in question has gone zombie on us.
 			 *
 			 * If we've never seen Loop UP at all, we requeue this request and wait
 			 * for the initial loop up delay to expire.
 			 */
 			lim = ISP_FC_PC(isp, bus)->loop_down_limit;
 			if (FCPARAM(isp, bus)->loop_seen_once == 0 || ISP_FC_PC(isp, bus)->loop_down_time >= lim) {
 				if (FCPARAM(isp, bus)->loop_seen_once == 0) {
 					isp_prt(isp, ISP_LOGDEBUG0,
 					    "%d.%jx loop not seen yet @ %lu",
 					    XS_TGT(ccb), (uintmax_t)XS_LUN(ccb),
 					    (unsigned long) time_uptime);
 				} else {
 					isp_prt(isp, ISP_LOGDEBUG0,
 					    "%d.%jx downtime (%d) > lim (%d)",
 					    XS_TGT(ccb), (uintmax_t)XS_LUN(ccb),
 					    ISP_FC_PC(isp, bus)->loop_down_time,
 					    lim);
 				}
 				ccb->ccb_h.status = CAM_SEL_TIMEOUT;
 				isp_done((struct ccb_scsiio *) ccb);
 				break;
 			}
 			isp_prt(isp, ISP_LOGDEBUG0, "%d.%jx retry later",
 			    XS_TGT(ccb), (uintmax_t)XS_LUN(ccb));
 			cam_freeze_devq(ccb->ccb_h.path);
 			cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 1000, 0);
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			isp_free_pcmd(isp, ccb);
 			xpt_done(ccb);
 			break;
 		case CMD_EAGAIN:
 			isp_free_pcmd(isp, ccb);
 			cam_freeze_devq(ccb->ccb_h.path);
 			cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 100, 0);
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			xpt_done(ccb);
 			break;
 		case CMD_COMPLETE:
 			isp_done((struct ccb_scsiio *) ccb);
 			break;
 		default:
 			isp_prt(isp, ISP_LOGERR, "What's this? 0x%x at %d in file %s", error, __LINE__, __FILE__);
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			isp_free_pcmd(isp, ccb);
 			xpt_done(ccb);
 		}
 		break;
 
 #ifdef	ISP_TARGET_MODE
 	case XPT_EN_LUN:		/* Enable/Disable LUN as a target */
 		if (ccb->cel.enable) {
 			isp_enable_lun(isp, ccb);
 		} else {
 			isp_disable_lun(isp, ccb);
 		}
 		break;
 	case XPT_IMMED_NOTIFY:
 	case XPT_IMMEDIATE_NOTIFY:	/* Add Immediate Notify Resource */
 	case XPT_ACCEPT_TARGET_IO:	/* Add Accept Target IO Resource */
 	{
 		tstate_t *tptr = get_lun_statep(isp, XS_CHANNEL(ccb), ccb->ccb_h.target_lun);
 		if (tptr == NULL) {
 			tptr = get_lun_statep(isp, XS_CHANNEL(ccb), CAM_LUN_WILDCARD);
 		}
 		if (tptr == NULL) {
 			const char *str;
 			uint32_t tag;
 
 			if (ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) {
 				str = "XPT_IMMEDIATE_NOTIFY";
 				tag = ccb->cin1.seq_id;
 			} else {
 				tag = ccb->atio.tag_id;
 				str = "XPT_ACCEPT_TARGET_IO";
 			}
 			ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "%s: [0x%x] no state pointer found for %s\n", __func__, tag, str);
 			dump_tstates(isp, XS_CHANNEL(ccb));
 			ccb->ccb_h.status = CAM_DEV_NOT_THERE;
 			break;
 		}
 		ccb->ccb_h.spriv_field0 = 0;
 		ccb->ccb_h.spriv_ptr1 = isp;
 
 		if (ccb->ccb_h.func_code == XPT_ACCEPT_TARGET_IO) {
 			if (ccb->atio.tag_id) {
 				atio_private_data_t *atp = isp_find_atpd(isp, tptr, ccb->atio.tag_id);
 				if (atp) {
 					isp_put_atpd(isp, tptr, atp);
 				}
 			}
 			tptr->atio_count++;
 			SLIST_INSERT_HEAD(&tptr->atios, &ccb->ccb_h, sim_links.sle);
 			ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE ATIO (tag id 0x%x), count now %d\n",
 			    ccb->atio.tag_id, tptr->atio_count);
 			ccb->atio.tag_id = 0;
 		} else if (ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) {
 			if (ccb->cin1.tag_id) {
 				inot_private_data_t *ntp = isp_find_ntpd(isp, tptr, ccb->cin1.tag_id, ccb->cin1.seq_id);
 				if (ntp) {
 					isp_put_ntpd(isp, tptr, ntp);
 				}
 			}
 			tptr->inot_count++;
 			SLIST_INSERT_HEAD(&tptr->inots, &ccb->ccb_h, sim_links.sle);
 			ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE INOT, (seq id 0x%x) count now %d\n",
 			    ccb->cin1.seq_id, tptr->inot_count);
 			ccb->cin1.seq_id = 0;
 		} else if (ccb->ccb_h.func_code == XPT_IMMED_NOTIFY) {
 			tptr->inot_count++;
 			SLIST_INSERT_HEAD(&tptr->inots, &ccb->ccb_h, sim_links.sle);
 			ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE INOT, (seq id 0x%x) count now %d\n",
 			    ccb->cin1.seq_id, tptr->inot_count);
 			ccb->cin1.seq_id = 0;
 		}
 		rls_lun_statep(isp, tptr);
 		ccb->ccb_h.status = CAM_REQ_INPROG;
 		break;
 	}
 	case XPT_NOTIFY_ACK:
 		ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 		break;
 	case XPT_NOTIFY_ACKNOWLEDGE:		/* notify ack */
 	{
 		tstate_t *tptr;
 		inot_private_data_t *ntp;
 
 		/*
 		 * XXX: Because we cannot guarantee that the path information in the notify acknowledge ccb
 		 * XXX: matches that for the immediate notify, we have to *search* for the notify structure
 		 */
 		/*
 		 * All the relevant path information is in the associated immediate notify
 		 */
 		ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, ccb->ccb_h.path, "%s: [0x%x] NOTIFY ACKNOWLEDGE for 0x%x seen\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id);
 		ntp = get_ntp_from_tagdata(isp, ccb->cna2.tag_id, ccb->cna2.seq_id, &tptr);
 		if (ntp == NULL) {
 			ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "%s: [0x%x] XPT_NOTIFY_ACKNOWLEDGE of 0x%x cannot find ntp private data\n", __func__,
 			     ccb->cna2.tag_id, ccb->cna2.seq_id);
 			ccb->ccb_h.status = CAM_DEV_NOT_THERE;
 			xpt_done(ccb);
 			break;
 		}
 		if (isp_handle_platform_target_notify_ack(isp, &ntp->rd.nt)) {
 			rls_lun_statep(isp, tptr);
 			cam_freeze_devq(ccb->ccb_h.path);
 			cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 1000, 0);
 			ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 			ccb->ccb_h.status |= CAM_REQUEUE_REQ;
 			break;
 		}
 		isp_put_ntpd(isp, tptr, ntp);
 		rls_lun_statep(isp, tptr);
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, ccb->ccb_h.path, "%s: [0x%x] calling xpt_done for tag 0x%x\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id);
 		xpt_done(ccb);
 		break;
 	}
 	case XPT_CONT_TARGET_IO:
 		isp_target_start_ctio(isp, ccb, FROM_CAM);
 		break;
 #endif
 	case XPT_RESET_DEV:		/* BDR the specified SCSI device */
-	{
-		struct isp_fc *fc;
-
 		bus = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
 		tgt = ccb->ccb_h.target_id;
 		tgt |= (bus << 16);
-		if (IS_FC(isp))
-			fc = ISP_FC_PC(isp, bus);
-		else
-			fc = NULL;
 
 		error = isp_control(isp, ISPCTL_RESET_DEV, bus, tgt);
 		if (error) {
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 		} else {
 			/*
 			 * If we have a FC device, reset the Command
 			 * Reference Number, because the target will expect
 			 * that we re-start the CRN at 1 after a reset.
 			 */
-			if (fc != NULL)
-				isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
+			if (IS_FC(isp))
+				isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1);
 
 			ccb->ccb_h.status = CAM_REQ_CMP;
 		}
 		xpt_done(ccb);
 		break;
-	}
 	case XPT_ABORT:			/* Abort the specified CCB */
 	{
 		union ccb *accb = ccb->cab.abort_ccb;
 		switch (accb->ccb_h.func_code) {
 #ifdef	ISP_TARGET_MODE
 		case XPT_ACCEPT_TARGET_IO:
 			isp_target_mark_aborted(isp, ccb);
 			break;
 #endif
 		case XPT_SCSI_IO:
 			error = isp_control(isp, ISPCTL_ABORT_CMD, accb);
 			if (error) {
 				ccb->ccb_h.status = CAM_UA_ABORT;
 			} else {
 				ccb->ccb_h.status = CAM_REQ_CMP;
 			}
 			break;
 		default:
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			break;
 		}
 		/*
 		 * This is not a queued CCB, so the caller expects it to be
 		 * complete when control is returned.
 		 */
 		break;
 	}
 #define	IS_CURRENT_SETTINGS(c)	(c->type == CTS_TYPE_CURRENT_SETTINGS)
 	case XPT_SET_TRAN_SETTINGS:	/* Nexus Settings */
 		cts = &ccb->cts;
 		if (!IS_CURRENT_SETTINGS(cts)) {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			xpt_done(ccb);
 			break;
 		}
 		tgt = cts->ccb_h.target_id;
 		bus = cam_sim_bus(xpt_path_sim(cts->ccb_h.path));
 		if (IS_SCSI(isp)) {
 			struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi;
 			struct ccb_trans_settings_spi *spi = &cts->xport_specific.spi;
 			sdparam *sdp = SDPARAM(isp, bus);
 			uint16_t *dptr;
 
 			if (spi->valid == 0 && scsi->valid == 0) {
 				ccb->ccb_h.status = CAM_REQ_CMP;
 				xpt_done(ccb);
 				break;
 			}
 
 			/*
 			 * We always update (internally) from goal_flags
 			 * so any request to change settings just gets
 			 * vectored to that location.
 			 */
 			dptr = &sdp->isp_devparam[tgt].goal_flags;
 
 			if ((spi->valid & CTS_SPI_VALID_DISC) != 0) {
 				if ((spi->flags & CTS_SPI_FLAGS_DISC_ENB) != 0)
 					*dptr |= DPARM_DISC;
 				else
 					*dptr &= ~DPARM_DISC;
 			}
 
 			if ((scsi->valid & CTS_SCSI_VALID_TQ) != 0) {
 				if ((scsi->flags & CTS_SCSI_FLAGS_TAG_ENB) != 0)
 					*dptr |= DPARM_TQING;
 				else
 					*dptr &= ~DPARM_TQING;
 			}
 
 			if ((spi->valid & CTS_SPI_VALID_BUS_WIDTH) != 0) {
 				if (spi->bus_width == MSG_EXT_WDTR_BUS_16_BIT)
 					*dptr |= DPARM_WIDE;
 				else
 					*dptr &= ~DPARM_WIDE;
 			}
 
 			/*
 			 * XXX: FIX ME
 			 */
 			if ((spi->valid & CTS_SPI_VALID_SYNC_OFFSET) && (spi->valid & CTS_SPI_VALID_SYNC_RATE) && (spi->sync_period && spi->sync_offset)) {
 				*dptr |= DPARM_SYNC;
 				/*
 				 * XXX: CHECK FOR LEGALITY
 				 */
 				sdp->isp_devparam[tgt].goal_period = spi->sync_period;
 				sdp->isp_devparam[tgt].goal_offset = spi->sync_offset;
 			} else {
 				*dptr &= ~DPARM_SYNC;
 			}
 			isp_prt(isp, ISP_LOGDEBUG0, "SET (%d.%d.%jx) to flags %x off %x per %x", bus, tgt, (uintmax_t)cts->ccb_h.target_lun, sdp->isp_devparam[tgt].goal_flags,
 			    sdp->isp_devparam[tgt].goal_offset, sdp->isp_devparam[tgt].goal_period);
 			sdp->isp_devparam[tgt].dev_update = 1;
 			sdp->update = 1;
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		break;
 	case XPT_GET_TRAN_SETTINGS:
 		cts = &ccb->cts;
 		tgt = cts->ccb_h.target_id;
 		bus = cam_sim_bus(xpt_path_sim(cts->ccb_h.path));
 		if (IS_FC(isp)) {
 			fcparam *fcp = FCPARAM(isp, bus);
 			struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi;
 			struct ccb_trans_settings_fc *fc = &cts->xport_specific.fc;
 
 			cts->protocol = PROTO_SCSI;
 			cts->protocol_version = SCSI_REV_2;
 			cts->transport = XPORT_FC;
 			cts->transport_version = 0;
 
 			scsi->valid = CTS_SCSI_VALID_TQ;
 			scsi->flags = CTS_SCSI_FLAGS_TAG_ENB;
 			fc->valid = CTS_FC_VALID_SPEED;
 			fc->bitrate = 100000;
 			fc->bitrate *= fcp->isp_gbspeed;
 			if (tgt < MAX_FC_TARG) {
 				fcportdb_t *lp = &fcp->portdb[tgt];
 				fc->wwnn = lp->node_wwn;
 				fc->wwpn = lp->port_wwn;
 				fc->port = lp->portid;
 				fc->valid |= CTS_FC_VALID_WWNN | CTS_FC_VALID_WWPN | CTS_FC_VALID_PORT;
 			}
 		} else {
 			struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi;
 			struct ccb_trans_settings_spi *spi = &cts->xport_specific.spi;
 			sdparam *sdp = SDPARAM(isp, bus);
 			uint16_t dval, pval, oval;
 
 			if (IS_CURRENT_SETTINGS(cts)) {
 				sdp->isp_devparam[tgt].dev_refresh = 1;
 				sdp->update = 1;
 				(void) isp_control(isp, ISPCTL_UPDATE_PARAMS, bus);
 				dval = sdp->isp_devparam[tgt].actv_flags;
 				oval = sdp->isp_devparam[tgt].actv_offset;
 				pval = sdp->isp_devparam[tgt].actv_period;
 			} else {
 				dval = sdp->isp_devparam[tgt].nvrm_flags;
 				oval = sdp->isp_devparam[tgt].nvrm_offset;
 				pval = sdp->isp_devparam[tgt].nvrm_period;
 			}
 
 			cts->protocol = PROTO_SCSI;
 			cts->protocol_version = SCSI_REV_2;
 			cts->transport = XPORT_SPI;
 			cts->transport_version = 2;
 
 			spi->valid = 0;
 			scsi->valid = 0;
 			spi->flags = 0;
 			scsi->flags = 0;
 			if (dval & DPARM_DISC) {
 				spi->flags |= CTS_SPI_FLAGS_DISC_ENB;
 			}
 			if ((dval & DPARM_SYNC) && oval && pval) {
 				spi->sync_offset = oval;
 				spi->sync_period = pval;
 			} else {
 				spi->sync_offset = 0;
 				spi->sync_period = 0;
 			}
 			spi->valid |= CTS_SPI_VALID_SYNC_OFFSET;
 			spi->valid |= CTS_SPI_VALID_SYNC_RATE;
 			spi->valid |= CTS_SPI_VALID_BUS_WIDTH;
 			if (dval & DPARM_WIDE) {
 				spi->bus_width = MSG_EXT_WDTR_BUS_16_BIT;
 			} else {
 				spi->bus_width = MSG_EXT_WDTR_BUS_8_BIT;
 			}
 			if (cts->ccb_h.target_lun != CAM_LUN_WILDCARD) {
 				scsi->valid = CTS_SCSI_VALID_TQ;
 				if (dval & DPARM_TQING) {
 					scsi->flags |= CTS_SCSI_FLAGS_TAG_ENB;
 				}
 				spi->valid |= CTS_SPI_VALID_DISC;
 			}
 			isp_prt(isp, ISP_LOGDEBUG0, "GET %s (%d.%d.%jx) to flags %x off %x per %x", IS_CURRENT_SETTINGS(cts)? "ACTIVE" : "NVRAM",
 			    bus, tgt, (uintmax_t)cts->ccb_h.target_lun, dval, oval, pval);
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		break;
 
 	case XPT_CALC_GEOMETRY:
 		cam_calc_geometry(&ccb->ccg, 1);
 		xpt_done(ccb);
 		break;
 
 	case XPT_RESET_BUS:		/* Reset the specified bus */
 		bus = cam_sim_bus(sim);
 		error = isp_control(isp, ISPCTL_RESET_BUS, bus);
 		if (error) {
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 			xpt_done(ccb);
 			break;
 		}
 		if (bootverbose) {
 			xpt_print(ccb->ccb_h.path, "reset bus on channel %d\n", bus);
 		}
 		if (IS_FC(isp)) {
 			xpt_async(AC_BUS_RESET, ISP_FC_PC(isp, bus)->path, 0);
 		} else {
 			xpt_async(AC_BUS_RESET, ISP_SPI_PC(isp, bus)->path, 0);
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		break;
 
 	case XPT_TERM_IO:		/* Terminate the I/O process */
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		break;
 
 	case XPT_SET_SIM_KNOB:		/* Set SIM knobs */
 	{
 		struct ccb_sim_knob *kp = &ccb->knob;
 		fcparam *fcp;
 
 		if (!IS_FC(isp)) {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			xpt_done(ccb);
 			break;
 		}
 
 		bus = cam_sim_bus(xpt_path_sim(kp->ccb_h.path));
 		fcp = FCPARAM(isp, bus);
 
 		if (kp->xport_specific.fc.valid & KNOB_VALID_ADDRESS) {
 			fcp->isp_wwnn = ISP_FC_PC(isp, bus)->def_wwnn = kp->xport_specific.fc.wwnn;
 			fcp->isp_wwpn = ISP_FC_PC(isp, bus)->def_wwpn = kp->xport_specific.fc.wwpn;
 			isp_prt(isp, ISP_LOGALL, "Setting Channel %d wwns to 0x%jx 0x%jx", bus, fcp->isp_wwnn, fcp->isp_wwpn);
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		if (kp->xport_specific.fc.valid & KNOB_VALID_ROLE) {
 			int rchange = 0;
 			int newrole = 0;
 
 			switch (kp->xport_specific.fc.role) {
 			case KNOB_ROLE_NONE:
 				if (fcp->role != ISP_ROLE_NONE) {
 					rchange = 1;
 					newrole = ISP_ROLE_NONE;
 				}
 				break;
 			case KNOB_ROLE_TARGET:
 				if (fcp->role != ISP_ROLE_TARGET) {
 					rchange = 1;
 					newrole = ISP_ROLE_TARGET;
 				}
 				break;
 			case KNOB_ROLE_INITIATOR:
 				if (fcp->role != ISP_ROLE_INITIATOR) {
 					rchange = 1;
 					newrole = ISP_ROLE_INITIATOR;
 				}
 				break;
 			case KNOB_ROLE_BOTH:
 				if (fcp->role != ISP_ROLE_BOTH) {
 					rchange = 1;
 					newrole = ISP_ROLE_BOTH;
 				}
 				break;
 			}
 			if (rchange) {
 				ISP_PATH_PRT(isp, ISP_LOGCONFIG, ccb->ccb_h.path, "changing role on from %d to %d\n", fcp->role, newrole);
 #ifdef	ISP_TARGET_MODE
 				ISP_SET_PC(isp, bus, tm_enabled, 0);
 				ISP_SET_PC(isp, bus, tm_luns_enabled, 0);
 #endif
 				if (isp_control(isp, ISPCTL_CHANGE_ROLE,
 				    bus, newrole) != 0) {
 					ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 					xpt_done(ccb);
 					break;
 				}
 #ifdef	ISP_TARGET_MODE
 				if (newrole == ISP_ROLE_TARGET || newrole == ISP_ROLE_BOTH) {
 					/*
 					 * Give the new role a chance to complain and settle
 					 */
 					msleep(isp, &isp->isp_lock, PRIBIO, "taking a breather", 2);
 					ccb->ccb_h.status = isp_enable_deferred_luns(isp, bus);
 				}
 #endif
 			}
 		}
 		xpt_done(ccb);
 		break;
 	}
 	case XPT_GET_SIM_KNOB:		/* Get SIM knobs */
 	{
 		struct ccb_sim_knob *kp = &ccb->knob;
 
 		if (IS_FC(isp)) {
 			fcparam *fcp;
 
 			bus = cam_sim_bus(xpt_path_sim(kp->ccb_h.path));
 			fcp = FCPARAM(isp, bus);
 
 			kp->xport_specific.fc.wwnn = fcp->isp_wwnn;
 			kp->xport_specific.fc.wwpn = fcp->isp_wwpn;
 			switch (fcp->role) {
 			case ISP_ROLE_NONE:
 				kp->xport_specific.fc.role = KNOB_ROLE_NONE;
 				break;
 			case ISP_ROLE_TARGET:
 				kp->xport_specific.fc.role = KNOB_ROLE_TARGET;
 				break;
 			case ISP_ROLE_INITIATOR:
 				kp->xport_specific.fc.role = KNOB_ROLE_INITIATOR;
 				break;
 			case ISP_ROLE_BOTH:
 				kp->xport_specific.fc.role = KNOB_ROLE_BOTH;
 				break;
 			}
 			kp->xport_specific.fc.valid = KNOB_VALID_ADDRESS | KNOB_VALID_ROLE;
 			ccb->ccb_h.status = CAM_REQ_CMP;
 		} else {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 		}
 		xpt_done(ccb);
 		break;
 	}
 	case XPT_PATH_INQ:		/* Path routing inquiry */
 	{
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 #ifdef	ISP_TARGET_MODE
 		cpi->target_sprt = PIT_PROCESSOR | PIT_DISCONNECT | PIT_TERM_IO;
 #else
 		cpi->target_sprt = 0;
 #endif
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = ISP_MAX_TARGETS(isp) - 1;
 		cpi->max_lun = ISP_MAX_LUNS(isp) == 0 ?
 		    255 : ISP_MAX_LUNS(isp) - 1;
 		cpi->bus_id = cam_sim_bus(sim);
 		if (isp->isp_osinfo.sixtyfourbit)
 			cpi->maxio = (ISP_NSEG64_MAX - 1) * PAGE_SIZE;
 		else
 			cpi->maxio = (ISP_NSEG_MAX - 1) * PAGE_SIZE;
 
 		bus = cam_sim_bus(xpt_path_sim(cpi->ccb_h.path));
 		if (IS_FC(isp)) {
 			fcparam *fcp = FCPARAM(isp, bus);
 
 			cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED;
 #if __FreeBSD_version >= 1000700
 			cpi->hba_misc |= PIM_EXTLUNS;
 #endif
 #if __FreeBSD_version >= 1000039
 			cpi->hba_misc |= PIM_NOSCAN;
 #endif
 
 			/*
 			 * Because our loop ID can shift from time to time,
 			 * make our initiator ID out of range of our bus.
 			 */
 			cpi->initiator_id = cpi->max_target + 1;
 
 			/*
 			 * Set base transfer capabilities for Fibre Channel, for this HBA.
 			 */
 			if (IS_25XX(isp)) {
 				cpi->base_transfer_speed = 8000000;
 			} else if (IS_24XX(isp)) {
 				cpi->base_transfer_speed = 4000000;
 			} else if (IS_23XX(isp)) {
 				cpi->base_transfer_speed = 2000000;
 			} else {
 				cpi->base_transfer_speed = 1000000;
 			}
 			cpi->hba_inquiry = PI_TAG_ABLE;
 			cpi->transport = XPORT_FC;
 			cpi->transport_version = 0;
 			cpi->xport_specific.fc.wwnn = fcp->isp_wwnn;
 			cpi->xport_specific.fc.wwpn = fcp->isp_wwpn;
 			cpi->xport_specific.fc.port = fcp->isp_portid;
 			cpi->xport_specific.fc.bitrate = fcp->isp_gbspeed * 1000;
 		} else {
 			sdparam *sdp = SDPARAM(isp, bus);
 			cpi->hba_inquiry = PI_SDTR_ABLE|PI_TAG_ABLE|PI_WIDE_16;
 			cpi->hba_misc = PIM_UNMAPPED;
 			cpi->initiator_id = sdp->isp_initiator_id;
 			cpi->base_transfer_speed = 3300;
 			cpi->transport = XPORT_SPI;
 			cpi->transport_version = 2;
 		}
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_2;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, "Qlogic", HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		break;
 	}
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		break;
 	}
 }
 
 #define	ISPDDB	(CAM_DEBUG_INFO|CAM_DEBUG_TRACE|CAM_DEBUG_CDB)
 
 void
 isp_done(XS_T *sccb)
 {
 	ispsoftc_t *isp = XS_ISP(sccb);
 	uint32_t status;
 
 	if (XS_NOERR(sccb))
 		XS_SETERR(sccb, CAM_REQ_CMP);
 
 	if ((sccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP && (sccb->scsi_status != SCSI_STATUS_OK)) {
 		sccb->ccb_h.status &= ~CAM_STATUS_MASK;
 		if ((sccb->scsi_status == SCSI_STATUS_CHECK_COND) && (sccb->ccb_h.status & CAM_AUTOSNS_VALID) == 0) {
 			sccb->ccb_h.status |= CAM_AUTOSENSE_FAIL;
 		} else {
 			sccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 		}
 	}
 
 	sccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 	status = sccb->ccb_h.status & CAM_STATUS_MASK;
 	if (status != CAM_REQ_CMP) {
 		if (status != CAM_SEL_TIMEOUT)
 			isp_prt(isp, ISP_LOGDEBUG0,
 			    "target %d lun %jx CAM status 0x%x SCSI status 0x%x",
 			    XS_TGT(sccb), (uintmax_t)XS_LUN(sccb),
 			    sccb->ccb_h.status, sccb->scsi_status);
 		else if ((IS_FC(isp))
 		      && (XS_TGT(sccb) < MAX_FC_TARG)) {
 			fcparam *fcp;
 
 			fcp = FCPARAM(isp, XS_CHANNEL(sccb));
 			fcp->portdb[XS_TGT(sccb)].is_target = 0;
 		}
 		if ((sccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 			sccb->ccb_h.status |= CAM_DEV_QFRZN;
 			xpt_freeze_devq(sccb->ccb_h.path, 1);
 		}
 	}
 
 	if ((CAM_DEBUGGED(sccb->ccb_h.path, ISPDDB)) && (sccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
 		xpt_print(sccb->ccb_h.path, "cam completion status 0x%x\n", sccb->ccb_h.status);
 	}
 
 	if (ISP_PCMD(sccb)) {
 		if (callout_active(&PISP_PCMD(sccb)->wdog))
 			callout_stop(&PISP_PCMD(sccb)->wdog);
 		isp_free_pcmd(isp, (union ccb *) sccb);
 	}
 	xpt_done((union ccb *) sccb);
 }
 
 void
 isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
 {
 	int bus;
 	static const char prom[] = "Chan %d [%d] WWPN 0x%16jx PortID 0x%06x handle 0x%x %s %s";
 	char buf[64];
 	char *msg = NULL;
 	target_id_t tgt;
 	fcportdb_t *lp;
 	struct isp_fc *fc;
 	struct cam_path *tmppath;
 	struct ac_contract ac;
 	struct ac_device_changed *adc;
 	va_list ap;
 
 	switch (cmd) {
 	case ISPASYNC_NEW_TGT_PARAMS:
 	{
 		struct ccb_trans_settings_scsi *scsi;
 		struct ccb_trans_settings_spi *spi;
 		int flags, tgt;
 		sdparam *sdp;
 		struct ccb_trans_settings cts;
 
 		memset(&cts, 0, sizeof (struct ccb_trans_settings));
 
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		tgt = va_arg(ap, int);
 		va_end(ap);
 		sdp = SDPARAM(isp, bus);
 
 		if (xpt_create_path(&tmppath, NULL, cam_sim_path(ISP_SPI_PC(isp, bus)->sim), tgt, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 			isp_prt(isp, ISP_LOGWARN, "isp_async cannot make temp path for %d.%d", tgt, bus);
 			break;
 		}
 		flags = sdp->isp_devparam[tgt].actv_flags;
 		cts.type = CTS_TYPE_CURRENT_SETTINGS;
 		cts.protocol = PROTO_SCSI;
 		cts.transport = XPORT_SPI;
 
 		scsi = &cts.proto_specific.scsi;
 		spi = &cts.xport_specific.spi;
 
 		if (flags & DPARM_TQING) {
 			scsi->valid |= CTS_SCSI_VALID_TQ;
 			scsi->flags |= CTS_SCSI_FLAGS_TAG_ENB;
 		}
 
 		if (flags & DPARM_DISC) {
 			spi->valid |= CTS_SPI_VALID_DISC;
 			spi->flags |= CTS_SPI_FLAGS_DISC_ENB;
 		}
 		spi->flags |= CTS_SPI_VALID_BUS_WIDTH;
 		if (flags & DPARM_WIDE) {
 			spi->bus_width = MSG_EXT_WDTR_BUS_16_BIT;
 		} else {
 			spi->bus_width = MSG_EXT_WDTR_BUS_8_BIT;
 		}
 		if (flags & DPARM_SYNC) {
 			spi->valid |= CTS_SPI_VALID_SYNC_RATE;
 			spi->valid |= CTS_SPI_VALID_SYNC_OFFSET;
 			spi->sync_period = sdp->isp_devparam[tgt].actv_period;
 			spi->sync_offset = sdp->isp_devparam[tgt].actv_offset;
 		}
 		isp_prt(isp, ISP_LOGDEBUG2, "NEW_TGT_PARAMS bus %d tgt %d period %x offset %x flags %x", bus, tgt, sdp->isp_devparam[tgt].actv_period, sdp->isp_devparam[tgt].actv_offset, flags);
 		xpt_setup_ccb(&cts.ccb_h, tmppath, 1);
 		xpt_async(AC_TRANSFER_NEG, tmppath, &cts);
 		xpt_free_path(tmppath);
 		break;
 	}
 	case ISPASYNC_BUS_RESET:
 	{
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		va_end(ap);
 		isp_prt(isp, ISP_LOGINFO, "SCSI bus reset on bus %d detected", bus);
 		if (IS_FC(isp)) {
 			xpt_async(AC_BUS_RESET, ISP_FC_PC(isp, bus)->path, NULL);
 		} else {
 			xpt_async(AC_BUS_RESET, ISP_SPI_PC(isp, bus)->path, NULL);
 		}
 		break;
 	}
 	case ISPASYNC_LIP:
 		if (msg == NULL)
 			msg = "LIP Received";
 		/* FALLTHROUGH */
 	case ISPASYNC_LOOP_RESET:
 		if (msg == NULL)
 			msg = "LOOP Reset";
 		/* FALLTHROUGH */
 	case ISPASYNC_LOOP_DOWN:
 	{
 		if (msg == NULL)
 			msg = "LOOP Down";
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		va_end(ap);
 
 		FCPARAM(isp, bus)->isp_linkstate = 0;
 
 		fc = ISP_FC_PC(isp, bus);
 		if (cmd == ISPASYNC_LOOP_DOWN && fc->ready) {
 			/*
 			 * We don't do any simq freezing if we are only in target mode
 			 */
 			if (FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) {
 				if (fc->path) {
 					isp_freeze_loopdown(isp, bus, msg);
 				}
 			}
 			if (!callout_active(&fc->ldt)) {
 				callout_reset(&fc->ldt, fc->loop_down_limit * hz, isp_ldt, fc);
 				isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Starting Loop Down Timer @ %lu", (unsigned long) time_uptime);
 			}
 		}
-		isp_fcp_reset_crn(fc, /*tgt*/0, /*tgt_set*/ 0);
+		isp_fcp_reset_crn(isp, bus, /*tgt*/0, /*tgt_set*/ 0);
 
 		isp_prt(isp, ISP_LOGINFO, "Chan %d: %s", bus, msg);
 		break;
 	}
 	case ISPASYNC_LOOP_UP:
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		va_end(ap);
 		fc = ISP_FC_PC(isp, bus);
 		/*
 		 * Now we just note that Loop has come up. We don't
 		 * actually do anything because we're waiting for a
 		 * Change Notify before activating the FC cleanup
 		 * thread to look at the state of the loop again.
 		 */
 		FCPARAM(isp, bus)->isp_linkstate = 1;
 		fc->loop_dead = 0;
 		fc->loop_down_time = 0;
 		isp_prt(isp, ISP_LOGINFO, "Chan %d Loop UP", bus);
 		break;
 	case ISPASYNC_DEV_ARRIVED:
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		lp = va_arg(ap, fcportdb_t *);
 		va_end(ap);
 		fc = ISP_FC_PC(isp, bus);
 		tgt = FC_PORTDB_TGT(isp, bus, lp);
 		isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
 		isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "arrived");
 		if ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) &&
 		    (lp->prli_word3 & PRLI_WD3_TARGET_FUNCTION)) {
 			lp->is_target = 1;
-			isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
+			isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1);
 			isp_make_here(isp, lp, bus, tgt);
 		}
 		if ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) &&
 		    (lp->prli_word3 & PRLI_WD3_INITIATOR_FUNCTION)) {
 			lp->is_initiator = 1;
 			ac.contract_number = AC_CONTRACT_DEV_CHG;
 			adc = (struct ac_device_changed *) ac.contract_data;
 			adc->wwpn = lp->port_wwn;
 			adc->port = lp->portid;
 			adc->target = tgt;
 			adc->arrived = 1;
 			xpt_async(AC_CONTRACT, fc->path, &ac);
 		}
 		break;
 	case ISPASYNC_DEV_CHANGED:
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		lp = va_arg(ap, fcportdb_t *);
 		va_end(ap);
 		fc = ISP_FC_PC(isp, bus);
 		tgt = FC_PORTDB_TGT(isp, bus, lp);
 		isp_gen_role_str(buf, sizeof (buf), lp->new_prli_word3);
 		isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->new_portid, lp->handle, buf, "changed");
 changed:
 		if (lp->is_target !=
 		    ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) &&
 		     (lp->new_prli_word3 & PRLI_WD3_TARGET_FUNCTION))) {
 			lp->is_target = !lp->is_target;
 			if (lp->is_target) {
-				isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
+				isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1);
 				isp_make_here(isp, lp, bus, tgt);
 			} else {
 				isp_make_gone(isp, lp, bus, tgt);
-				isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
+				isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1);
 			}
 		}
 		if (lp->is_initiator !=
 		    ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) &&
 		     (lp->new_prli_word3 & PRLI_WD3_INITIATOR_FUNCTION))) {
 			lp->is_initiator = !lp->is_initiator;
 			ac.contract_number = AC_CONTRACT_DEV_CHG;
 			adc = (struct ac_device_changed *) ac.contract_data;
 			adc->wwpn = lp->port_wwn;
 			adc->port = lp->portid;
 			adc->target = tgt;
 			adc->arrived = lp->is_initiator;
 			xpt_async(AC_CONTRACT, fc->path, &ac);
 		}
 		break;
 	case ISPASYNC_DEV_STAYED:
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		lp = va_arg(ap, fcportdb_t *);
 		va_end(ap);
 		fc = ISP_FC_PC(isp, bus);
 		tgt = FC_PORTDB_TGT(isp, bus, lp);
 		isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
 		isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "stayed");
 		goto changed;
 	case ISPASYNC_DEV_GONE:
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		lp = va_arg(ap, fcportdb_t *);
 		va_end(ap);
 		fc = ISP_FC_PC(isp, bus);
 		tgt = FC_PORTDB_TGT(isp, bus, lp);
 		/*
 		 * If this has a virtual target or initiator set the isp_gdt
 		 * timer running on it to delay its departure.
 		 */
 		isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
 		if (lp->is_target || lp->is_initiator) {
 			lp->state = FC_PORTDB_STATE_ZOMBIE;
 			lp->gone_timer = fc->gone_device_time;
 			isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone zombie");
 			if (fc->ready && !callout_active(&fc->gdt)) {
 				isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Starting Gone Device Timer with %u seconds time now %lu", bus, lp->gone_timer, (unsigned long)time_uptime);
 				callout_reset(&fc->gdt, hz, isp_gdt, fc);
 			}
 			break;
 		}
 		isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone");
 		break;
 	case ISPASYNC_CHANGE_NOTIFY:
 	{
 		char *msg;
 		int evt, nphdl, nlstate, reason;
 
 		va_start(ap, cmd);
 		bus = va_arg(ap, int);
 		evt = va_arg(ap, int);
 		if (IS_24XX(isp) && evt == ISPASYNC_CHANGE_PDB) {
 			nphdl = va_arg(ap, int);
 			nlstate = va_arg(ap, int);
 			reason = va_arg(ap, int);
 		} else {
 			nphdl = NIL_HANDLE;
 			nlstate = reason = 0;
 		}
 		va_end(ap);
 		fc = ISP_FC_PC(isp, bus);
 
 		if (evt == ISPASYNC_CHANGE_PDB) {
 			msg = "Port Database Changed";
 		} else if (evt == ISPASYNC_CHANGE_SNS) {
 			msg = "Name Server Database Changed";
 		} else {
 			msg = "Other Change Notify";
 		}
 
 		/*
 		 * If the loop down timer is running, cancel it.
 		 */
 		if (fc->ready && callout_active(&fc->ldt)) {
 			isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Stopping Loop Down Timer @ %lu", (unsigned long) time_uptime);
 			callout_stop(&fc->ldt);
 		}
 		isp_prt(isp, ISP_LOGINFO, "Chan %d %s", bus, msg);
 		if (FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) {
 			isp_freeze_loopdown(isp, bus, msg);
 		}
 		wakeup(fc);
 		break;
 	}
 #ifdef	ISP_TARGET_MODE
 	case ISPASYNC_TARGET_NOTIFY:
 	{
 		isp_notify_t *notify;
 		va_start(ap, cmd);
 		notify = va_arg(ap, isp_notify_t *);
 		va_end(ap);
 		switch (notify->nt_ncode) {
 		case NT_ABORT_TASK:
 		case NT_ABORT_TASK_SET:
 		case NT_CLEAR_ACA:
 		case NT_CLEAR_TASK_SET:
 		case NT_LUN_RESET:
 		case NT_TARGET_RESET:
 		case NT_QUERY_TASK_SET:
 		case NT_QUERY_ASYNC_EVENT:
 			/*
 			 * These are task management functions.
 			 */
 			isp_handle_platform_target_tmf(isp, notify);
 			break;
 		case NT_BUS_RESET:
 		case NT_LIP_RESET:
 		case NT_LINK_UP:
 		case NT_LINK_DOWN:
 		case NT_HBA_RESET:
 			/*
 			 * No action need be taken here.
 			 */
 			break;
 		case NT_GLOBAL_LOGOUT:
 		case NT_LOGOUT:
 			/*
 			 * This is device arrival/departure notification
 			 */
 			isp_handle_platform_target_notify_ack(isp, notify);
 			break;
 		default:
 			isp_prt(isp, ISP_LOGALL, "target notify code 0x%x", notify->nt_ncode);
 			isp_handle_platform_target_notify_ack(isp, notify);
 			break;
 		}
 		break;
 	}
 	case ISPASYNC_TARGET_NOTIFY_ACK:
 	{
 		void *inot;
 		va_start(ap, cmd);
 		inot = va_arg(ap, void *);
 		va_end(ap);
 		if (isp_notify_ack(isp, inot)) {
 			isp_tna_t *tp = malloc(sizeof (*tp), M_DEVBUF, M_NOWAIT);
 			if (tp) {
 				tp->isp = isp;
 				if (inot) {
 					memcpy(tp->data, inot, sizeof (tp->data));
 					tp->not = tp->data;
 				} else {
 					tp->not = NULL;
 				}
 				callout_init_mtx(&tp->timer, &isp->isp_lock, 0);
 				callout_reset(&tp->timer, 5,
 				    isp_refire_notify_ack, tp);
 			} else {
 				isp_prt(isp, ISP_LOGERR, "you lose- cannot allocate a notify refire");
 			}
 		}
 		break;
 	}
 	case ISPASYNC_TARGET_ACTION:
 	{
 		isphdr_t *hp;
 
 		va_start(ap, cmd);
 		hp = va_arg(ap, isphdr_t *);
 		va_end(ap);
 		switch (hp->rqs_entry_type) {
 		default:
 			isp_prt(isp, ISP_LOGWARN, "%s: unhandled target action 0x%x", __func__, hp->rqs_entry_type);
 			break;
 		case RQSTYPE_NOTIFY:
 			if (IS_SCSI(isp)) {
 				isp_handle_platform_notify_scsi(isp, (in_entry_t *) hp);
 			} else if (IS_24XX(isp)) {
 				isp_handle_platform_notify_24xx(isp, (in_fcentry_24xx_t *) hp);
 			} else {
 				isp_handle_platform_notify_fc(isp, (in_fcentry_t *) hp);
 			}
 			break;
 		case RQSTYPE_ATIO:
 			if (IS_24XX(isp)) {
 				isp_handle_platform_atio7(isp, (at7_entry_t *) hp);
 			} else {
 				isp_handle_platform_atio(isp, (at_entry_t *) hp);
 			}
 			break;
 		case RQSTYPE_ATIO2:
 			isp_handle_platform_atio2(isp, (at2_entry_t *) hp);
 			break;
 		case RQSTYPE_CTIO7:
 		case RQSTYPE_CTIO3:
 		case RQSTYPE_CTIO2:
 		case RQSTYPE_CTIO:
 			isp_handle_platform_ctio(isp, hp);
 			break;
 		case RQSTYPE_ABTS_RCVD:
 		{
 			abts_t *abts = (abts_t *)hp;
 			isp_notify_t notify, *nt = &notify;
 			tstate_t *tptr;
 			fcportdb_t *lp;
 			uint16_t chan;
 			uint32_t sid, did;
 
 			did = (abts->abts_did_hi << 16) | abts->abts_did_lo;
 			sid = (abts->abts_sid_hi << 16) | abts->abts_sid_lo;
 			ISP_MEMZERO(nt, sizeof (isp_notify_t));
 
 			nt->nt_hba = isp;
 			nt->nt_did = did;
 			nt->nt_nphdl = abts->abts_nphdl;
 			nt->nt_sid = sid;
 			isp_find_chan_by_did(isp, did, &chan);
 			if (chan == ISP_NOCHAN) {
 				nt->nt_tgt = TGT_ANY;
 			} else {
 				nt->nt_tgt = FCPARAM(isp, chan)->isp_wwpn;
 				if (isp_find_pdb_by_handle(isp, chan, abts->abts_nphdl, &lp)) {
 					nt->nt_wwn = lp->port_wwn;
 				} else {
 					nt->nt_wwn = INI_ANY;
 				}
 			}
 			/*
 			 * Try hard to find the lun for this command.
 			 */
 			tptr = get_lun_statep_from_tag(isp, chan, abts->abts_rxid_task);
 			if (tptr) {
 				nt->nt_lun = tptr->ts_lun;
 				rls_lun_statep(isp, tptr);
 			} else {
 				nt->nt_lun = LUN_ANY;
 			}
 			nt->nt_need_ack = 1;
 			nt->nt_tagval = abts->abts_rxid_task;
 			nt->nt_tagval |= (((uint64_t) abts->abts_rxid_abts) << 32);
 			if (abts->abts_rxid_task == ISP24XX_NO_TASK) {
 				isp_prt(isp, ISP_LOGTINFO, "[0x%x] ABTS from N-Port handle 0x%x Port 0x%06x has no task id (rx_id 0x%04x ox_id 0x%04x)",
 				    abts->abts_rxid_abts, abts->abts_nphdl, sid, abts->abts_rx_id, abts->abts_ox_id);
 			} else {
 				isp_prt(isp, ISP_LOGTINFO, "[0x%x] ABTS from N-Port handle 0x%x Port 0x%06x for task 0x%x (rx_id 0x%04x ox_id 0x%04x)",
 				    abts->abts_rxid_abts, abts->abts_nphdl, sid, abts->abts_rxid_task, abts->abts_rx_id, abts->abts_ox_id);
 			}
 			nt->nt_channel = chan;
 			nt->nt_ncode = NT_ABORT_TASK;
 			nt->nt_lreserved = hp;
 			isp_handle_platform_target_tmf(isp, nt);
 			break;
 		}
 		case RQSTYPE_ENABLE_LUN:
 		case RQSTYPE_MODIFY_LUN:
 			isp_ledone(isp, (lun_entry_t *) hp);
 			break;
 		}
 		break;
 	}
 #endif
 	case ISPASYNC_FW_CRASH:
 	{
 		uint16_t mbox1, mbox6;
 		mbox1 = ISP_READ(isp, OUTMAILBOX1);
 		if (IS_DUALBUS(isp)) { 
 			mbox6 = ISP_READ(isp, OUTMAILBOX6);
 		} else {
 			mbox6 = 0;
 		}
 		isp_prt(isp, ISP_LOGERR, "Internal Firmware Error on bus %d @ RISC Address 0x%x", mbox6, mbox1);
 		mbox1 = isp->isp_osinfo.mbox_sleep_ok;
 		isp->isp_osinfo.mbox_sleep_ok = 0;
 		isp_reinit(isp, 1);
 		isp->isp_osinfo.mbox_sleep_ok = mbox1;
 		isp_async(isp, ISPASYNC_FW_RESTARTED, NULL);
 		break;
 	}
 	default:
 		isp_prt(isp, ISP_LOGERR, "unknown isp_async event %d", cmd);
 		break;
 	}
 }
 
 
 /*
  * Locks are held before coming here.
  */
 void
 isp_uninit(ispsoftc_t *isp)
 {
 	if (IS_24XX(isp)) {
 		ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_RESET);
 	} else {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_RESET);
 	}
 	ISP_DISABLE_INTS(isp);
 }
 
 /*
  * When we want to get the 'default' WWNs (when lacking NVRAM), we pick them
  * up from our platform default (defww{p|n}n) and morph them based upon
  * channel.
  * 
  * When we want to get the 'active' WWNs, we get NVRAM WWNs and then morph them
  * based upon channel.
  */
 
 uint64_t
 isp_default_wwn(ispsoftc_t * isp, int chan, int isactive, int iswwnn)
 {
 	uint64_t seed;
 	struct isp_fc *fc = ISP_FC_PC(isp, chan);
 
 	/*
 	 * If we're asking for a active WWN, the default overrides get
 	 * returned, otherwise the NVRAM value is picked.
 	 * 
 	 * If we're asking for a default WWN, we just pick the default override.
 	 */
 	if (isactive) {
 		seed = iswwnn ? fc->def_wwnn : fc->def_wwpn;
 		if (seed) {
 			return (seed);
 		}
 		seed = iswwnn ? FCPARAM(isp, chan)->isp_wwnn_nvram : FCPARAM(isp, chan)->isp_wwpn_nvram;
 		if (seed) {
 			return (seed);
 		}
 		return (0x400000007F000009ull);
 	}
 
 	seed = iswwnn ? fc->def_wwnn : fc->def_wwpn;
 
 	/*
 	 * For channel zero just return what we have. For either ACTIVE or
 	 * DEFAULT cases, we depend on default override of NVRAM values for
 	 * channel zero.
 	 */
 	if (chan == 0) {
 		return (seed);
 	}
 
 	/*
 	 * For other channels, we are doing one of three things:
 	 * 
 	 * 1. If what we have now is non-zero, return it. Otherwise we morph
 	 * values from channel 0. 2. If we're here for a WWPN we synthesize
 	 * it if Channel 0's wwpn has a type 2 NAA. 3. If we're here for a
 	 * WWNN we synthesize it if Channel 0's wwnn has a type 2 NAA.
 	 */
 
 	if (seed) {
 		return (seed);
 	}
 	seed = iswwnn ? ISP_FC_PC(isp, 0)->def_wwnn : ISP_FC_PC(isp, 0)->def_wwpn;
 	if (seed == 0)
 		seed = iswwnn ? FCPARAM(isp, 0)->isp_wwnn_nvram : FCPARAM(isp, 0)->isp_wwpn_nvram;
 
 	if (((seed >> 60) & 0xf) == 2) {
 		/*
 		 * The type 2 NAA fields for QLogic cards appear be laid out
 		 * thusly:
 		 * 
 		 * bits 63..60 NAA == 2 bits 59..57 unused/zero bit 56
 		 * port (1) or node (0) WWN distinguishor bit 48
 		 * physical port on dual-port chips (23XX/24XX)
 		 * 
 		 * This is somewhat nutty, particularly since bit 48 is
 		 * irrelevant as they assign separate serial numbers to
 		 * different physical ports anyway.
 		 * 
 		 * We'll stick our channel number plus one first into bits
 		 * 57..59 and thence into bits 52..55 which allows for 8 bits
 		 * of channel which is comfortably more than our maximum
 		 * (126) now.
 		 */
 		seed &= ~0x0FF0000000000000ULL;
 		if (iswwnn == 0) {
 			seed |= ((uint64_t) (chan + 1) & 0xf) << 56;
 			seed |= ((uint64_t) ((chan + 1) >> 4) & 0xf) << 52;
 		}
 	} else {
 		seed = 0;
 	}
 	return (seed);
 }
 
 void
 isp_prt(ispsoftc_t *isp, int level, const char *fmt, ...)
 {
 	int loc;
 	char lbuf[200];
 	va_list ap;
 
 	if (level != ISP_LOGALL && (level & isp->isp_dblev) == 0) {
 		return;
 	}
 	snprintf(lbuf, sizeof (lbuf), "%s: ", device_get_nameunit(isp->isp_dev));
 	loc = strlen(lbuf);
 	va_start(ap, fmt);
 	vsnprintf(&lbuf[loc], sizeof (lbuf) - loc - 1, fmt, ap); 
 	va_end(ap);
 	printf("%s\n", lbuf);
 }
 
 void
 isp_xs_prt(ispsoftc_t *isp, XS_T *xs, int level, const char *fmt, ...)
 {
 	va_list ap;
 	if (level != ISP_LOGALL && (level & isp->isp_dblev) == 0) {
 		return;
 	}
 	xpt_print_path(xs->ccb_h.path);
 	va_start(ap, fmt);
 	vprintf(fmt, ap);
 	va_end(ap);
 	printf("\n");
 }
 
 uint64_t
 isp_nanotime_sub(struct timespec *b, struct timespec *a)
 {
 	uint64_t elapsed;
 	struct timespec x = *b;
 	timespecsub(&x, a);
 	elapsed = GET_NANOSEC(&x);
 	if (elapsed == 0)
 		elapsed++;
 	return (elapsed);
 }
 
 int
 isp_mbox_acquire(ispsoftc_t *isp)
 {
 	if (isp->isp_osinfo.mboxbsy) {
 		return (1);
 	} else {
 		isp->isp_osinfo.mboxcmd_done = 0;
 		isp->isp_osinfo.mboxbsy = 1;
 		return (0);
 	}
 }
 
 void
 isp_mbox_wait_complete(ispsoftc_t *isp, mbreg_t *mbp)
 {
 	unsigned int usecs = mbp->timeout;
 	unsigned int max, olim, ilim;
 
 	if (usecs == 0) {
 		usecs = MBCMD_DEFAULT_TIMEOUT;
 	}
 	max = isp->isp_mbxwrk0 + 1;
 
 	if (isp->isp_osinfo.mbox_sleep_ok) {
 		unsigned int ms = (usecs + 999) / 1000;
 
 		isp->isp_osinfo.mbox_sleep_ok = 0;
 		isp->isp_osinfo.mbox_sleeping = 1;
 		for (olim = 0; olim < max; olim++) {
 			msleep(&isp->isp_mbxworkp, &isp->isp_osinfo.lock, PRIBIO, "ispmbx_sleep", isp_mstohz(ms));
 			if (isp->isp_osinfo.mboxcmd_done) {
 				break;
 			}
 		}
 		isp->isp_osinfo.mbox_sleep_ok = 1;
 		isp->isp_osinfo.mbox_sleeping = 0;
 	} else {
 		for (olim = 0; olim < max; olim++) {
 			for (ilim = 0; ilim < usecs; ilim += 100) {
 				uint16_t isr, sema, info;
 				if (isp->isp_osinfo.mboxcmd_done) {
 					break;
 				}
 				if (ISP_READ_ISR(isp, &isr, &sema, &info)) {
 					isp_intr(isp, isr, sema, info);
 					if (isp->isp_osinfo.mboxcmd_done) {
 						break;
 					}
 				}
 				ISP_DELAY(100);
 			}
 			if (isp->isp_osinfo.mboxcmd_done) {
 				break;
 			}
 		}
 	}
 	if (isp->isp_osinfo.mboxcmd_done == 0) {
 		isp_prt(isp, ISP_LOGWARN, "%s Mailbox Command (0x%x) Timeout (%uus) (started @ %s:%d)",
 		    isp->isp_osinfo.mbox_sleep_ok? "Interrupting" : "Polled", isp->isp_lastmbxcmd, usecs, mbp->func, mbp->lineno);
 		mbp->param[0] = MBOX_TIMEOUT;
 		isp->isp_osinfo.mboxcmd_done = 1;
 	}
 }
 
 void
 isp_mbox_notify_done(ispsoftc_t *isp)
 {
 	if (isp->isp_osinfo.mbox_sleeping) {
 		wakeup(&isp->isp_mbxworkp);
 	}
 	isp->isp_osinfo.mboxcmd_done = 1;
 }
 
 void
 isp_mbox_release(ispsoftc_t *isp)
 {
 	isp->isp_osinfo.mboxbsy = 0;
 }
 
 int
 isp_fc_scratch_acquire(ispsoftc_t *isp, int chan)
 {
 	int ret = 0;
 	if (isp->isp_osinfo.pc.fc[chan].fcbsy) {
 		ret = -1;
 	} else {
 		isp->isp_osinfo.pc.fc[chan].fcbsy = 1;
 	}
 	return (ret);
 }
 
 int
 isp_mstohz(int ms)
 {
 	int hz;
 	struct timeval t;
 	t.tv_sec = ms / 1000;
 	t.tv_usec = (ms % 1000) * 1000;
 	hz = tvtohz(&t);
 	if (hz < 0) {
 		hz = 0x7fffffff;
 	}
 	if (hz == 0) {
 		hz = 1;
 	}
 	return (hz);
 }
 
 void
 isp_platform_intr(void *arg)
 {
 	ispsoftc_t *isp = arg;
 	uint16_t isr, sema, info;
 
 	ISP_LOCK(isp);
 	isp->isp_intcnt++;
 	if (ISP_READ_ISR(isp, &isr, &sema, &info))
 		isp_intr(isp, isr, sema, info);
 	else
 		isp->isp_intbogus++;
 	ISP_UNLOCK(isp);
 }
 
 void
 isp_common_dmateardown(ispsoftc_t *isp, struct ccb_scsiio *csio, uint32_t hdl)
 {
 	if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
 		bus_dmamap_sync(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap, BUS_DMASYNC_POSTREAD);
 	} else {
 		bus_dmamap_sync(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap, BUS_DMASYNC_POSTWRITE);
 	}
 	bus_dmamap_unload(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap);
 }
 
 /*
  * Reset the command reference number for all LUNs on a specific target
  * (needed when a target arrives again) or for all targets on a port
  * (needed for events like a LIP).
  */
 void
-isp_fcp_reset_crn(struct isp_fc *fc, uint32_t tgt, int tgt_set)
+isp_fcp_reset_crn(ispsoftc_t *isp, int chan, uint32_t tgt, int tgt_set)
 {
-	int i;
+	struct isp_fc *fc = ISP_FC_PC(isp, chan);
 	struct isp_nexus *nxp;
+	int i;
 
 	if (tgt_set == 0)
-		isp_prt(fc->isp, ISP_LOG_SANCFG, "resetting CRN on all targets");
+		isp_prt(isp, ISP_LOGDEBUG0,
+		    "Chan %d resetting CRN on all targets", chan);
 	else
-		isp_prt(fc->isp, ISP_LOG_SANCFG, "resetting CRN target %u", tgt);
+		isp_prt(isp, ISP_LOGDEBUG0,
+		    "Chan %d resetting CRN on target %u", chan, tgt);
 
 	for (i = 0; i < NEXUS_HASH_WIDTH; i++) {
-		nxp = fc->nexus_hash[i];
-		while (nxp) {
-			if ((tgt_set != 0) && (tgt == nxp->tgt))
+		for (nxp = fc->nexus_hash[i]; nxp != NULL; nxp = nxp->next) {
+			if (tgt_set == 0 || tgt == nxp->tgt)
 				nxp->crnseed = 0;
-
-			nxp = nxp->next;
 		}
 	}
 }
 
 int
 isp_fcp_next_crn(ispsoftc_t *isp, uint8_t *crnp, XS_T *cmd)
 {
 	lun_id_t lun;
 	uint32_t chan, tgt;
 	struct isp_fc *fc;
 	struct isp_nexus *nxp;
 	int idx;
 
 	if (isp->isp_type < ISP_HA_FC_2300)
 		return (0);
 
 	chan = XS_CHANNEL(cmd);
 	tgt = XS_TGT(cmd);
 	lun = XS_LUN(cmd);
 	fc = &isp->isp_osinfo.pc.fc[chan];
 	idx = NEXUS_HASH(tgt, lun);
 	nxp = fc->nexus_hash[idx];
 
 	while (nxp) {
 		if (nxp->tgt == tgt && nxp->lun == lun)
 			break;
 		nxp = nxp->next;
 	}
 	if (nxp == NULL) {
 		nxp = fc->nexus_free_list;
 		if (nxp == NULL) {
 			nxp = malloc(sizeof (struct isp_nexus), M_DEVBUF, M_ZERO|M_NOWAIT);
 			if (nxp == NULL) {
 				return (-1);
 			}
 		} else {
 			fc->nexus_free_list = nxp->next;
 		}
 		nxp->tgt = tgt;
 		nxp->lun = lun;
 		nxp->next = fc->nexus_hash[idx];
 		fc->nexus_hash[idx] = nxp;
 	}
-	if (nxp) {
-		if (nxp->crnseed == 0)
-			nxp->crnseed = 1;
-		if (cmd)
-			PISP_PCMD(cmd)->crn = nxp->crnseed;
-		*crnp = nxp->crnseed++;
-		return (0);
-	}
-	return (-1);
+	if (nxp->crnseed == 0)
+		nxp->crnseed = 1;
+	PISP_PCMD(cmd)->crn = nxp->crnseed;
+	*crnp = nxp->crnseed++;
+	return (0);
 }
 
 /*
  * We enter with the lock held
  */
 void
 isp_timer(void *arg)
 {
 	ispsoftc_t *isp = arg;
 #ifdef	ISP_TARGET_MODE
 	isp_tmcmd_restart(isp);
 #endif
 	callout_reset(&isp->isp_osinfo.tmo, isp_timer_count, isp_timer, isp);
 }
 
 isp_ecmd_t *
 isp_get_ecmd(ispsoftc_t *isp)
 {
 	isp_ecmd_t *ecmd = isp->isp_osinfo.ecmd_free;
 	if (ecmd) {
 		isp->isp_osinfo.ecmd_free = ecmd->next;
 	}
 	return (ecmd);
 }
 
 void
 isp_put_ecmd(ispsoftc_t *isp, isp_ecmd_t *ecmd)
 {
 	ecmd->next = isp->isp_osinfo.ecmd_free;
 	isp->isp_osinfo.ecmd_free = ecmd;
 }
Index: projects/powernv/dev/isp/isp_freebsd.h
===================================================================
--- projects/powernv/dev/isp/isp_freebsd.h	(revision 290990)
+++ projects/powernv/dev/isp/isp_freebsd.h	(revision 290991)
@@ -1,779 +1,779 @@
 /* $FreeBSD$ */
 /*-
  * Qlogic ISP SCSI Host Adapter FreeBSD Wrapper Definitions
  *
  * Copyright (c) 1997-2008 by Matthew Jacob
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #ifndef	_ISP_FREEBSD_H
 #define	_ISP_FREEBSD_H
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/endian.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/sysctl.h>
 
 #include <sys/proc.h>
 #include <sys/bus.h>
 #include <sys/taskqueue.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/stdarg.h>
 
 #include <cam/cam.h>
 #include <cam/cam_debug.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_debug.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 #include "opt_ddb.h"
 #include "opt_isp.h"
 
 #define	ISP_PLATFORM_VERSION_MAJOR	7
 #define	ISP_PLATFORM_VERSION_MINOR	10
 
 /*
  * Efficiency- get rid of SBus code && tests unless we need them.
  */
 #ifdef __sparc64__
 #define	ISP_SBUS_SUPPORTED	1
 #else
 #define	ISP_SBUS_SUPPORTED	0
 #endif
 
 #define	ISP_IFLAGS	INTR_TYPE_CAM | INTR_ENTROPY | INTR_MPSAFE
 
 #define	N_XCMDS		64
 #define	XCMD_SIZE	512
 struct ispsoftc;
 typedef union isp_ecmd {
 	union isp_ecmd *	next;
 	uint8_t			data[XCMD_SIZE];
 } isp_ecmd_t;
 isp_ecmd_t *	isp_get_ecmd(struct ispsoftc *);
 void		isp_put_ecmd(struct ispsoftc *, isp_ecmd_t *);
 
 #ifdef	ISP_TARGET_MODE
 /* Not quite right, but there was no bump for this change */
 #if __FreeBSD_version < 225469
 #define	SDFIXED(x)	(&x)
 #else
 #define	SDFIXED(x)	((struct scsi_sense_data_fixed *)(&x))
 #endif
 
 #define	ISP_TARGET_FUNCTIONS	1
 #define	ATPDPSIZE	4096
 #define	ATPDPHASHSIZE	32
 #define	ATPDPHASH(x)	((((x) >> 24) ^ ((x) >> 16) ^ ((x) >> 8) ^ (x)) &  \
 			    ((ATPDPHASHSIZE) - 1))
 
 #include <dev/isp/isp_target.h>
 typedef struct atio_private_data {
 	LIST_ENTRY(atio_private_data)	next;
 	uint32_t	orig_datalen;
 	uint32_t	bytes_xfered;
 	uint32_t	bytes_in_transit;
 	uint32_t	tag;		/* typically f/w RX_ID */
 	uint32_t	lun;
 	uint32_t	nphdl;
 	uint32_t	sid;
 	uint32_t	portid;
 	uint16_t	rxid;	/* wire rxid */
 	uint16_t	oxid;	/* wire oxid */
 	uint16_t	word3;	/* PRLI word3 params */
 	uint16_t	ctcnt;	/* number of CTIOs currently active */
 	uint8_t		seqno;	/* CTIO sequence number */
 	uint32_t
 			srr_notify_rcvd	: 1,
 			cdb0		: 8,
 			sendst		: 1,
 			dead		: 1,
 			tattr		: 3,
 			state		: 3;
 	void *		ests;
 	/*
 	 * The current SRR notify copy
 	 */
 	uint8_t		srr[64];	/*  sb QENTRY_LEN, but order of definitions is wrong */
 	void *		srr_ccb;
 	uint32_t	nsrr;
 } atio_private_data_t;
 #define	ATPD_STATE_FREE			0
 #define	ATPD_STATE_ATIO			1
 #define	ATPD_STATE_CAM			2
 #define	ATPD_STATE_CTIO			3
 #define	ATPD_STATE_LAST_CTIO		4
 #define	ATPD_STATE_PDON			5
 
 #define	ATPD_CCB_OUTSTANDING		16
 
 #define	ATPD_SEQ_MASK			0x7f
 #define	ATPD_SEQ_NOTIFY_CAM		0x80
 #define	ATPD_SET_SEQNO(hdrp, atp)	((isphdr_t *)hdrp)->rqs_seqno &= ~ATPD_SEQ_MASK, ((isphdr_t *)hdrp)->rqs_seqno |= (atp)->seqno
 #define	ATPD_GET_SEQNO(hdrp)		(((isphdr_t *)hdrp)->rqs_seqno & ATPD_SEQ_MASK)
 #define	ATPD_GET_NCAM(hdrp)		((((isphdr_t *)hdrp)->rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0)
 
 typedef union inot_private_data inot_private_data_t;
 union inot_private_data {
 	inot_private_data_t *next;
 	struct {
 		isp_notify_t nt;	/* must be first! */
 		uint8_t data[64];	/* sb QENTRY_LEN, but order of definitions is wrong */
 		uint32_t tag_id, seq_id;
 	} rd;
 };
 typedef struct isp_timed_notify_ack {
 	void *isp;
 	void *not;
 	uint8_t data[64];	 /* sb QENTRY_LEN, but order of definitions is wrong */
 	struct callout timer;
 } isp_tna_t;
 
 TAILQ_HEAD(isp_ccbq, ccb_hdr);
 typedef struct tstate {
 	SLIST_ENTRY(tstate) next;
 	lun_id_t ts_lun;
 	struct cam_path *owner;
 	struct isp_ccbq waitq;		/* waiting CCBs */
 	struct ccb_hdr_slist atios;
 	struct ccb_hdr_slist inots;
 	uint32_t hold;
 	uint32_t
 		enabled		: 1,
 		atio_count	: 15,
 		inot_count	: 15;
 	inot_private_data_t *	restart_queue;
 	inot_private_data_t *	ntfree;
 	inot_private_data_t	ntpool[ATPDPSIZE];
 	LIST_HEAD(, atio_private_data)	atfree;
 	LIST_HEAD(, atio_private_data)	atused[ATPDPHASHSIZE];
 	atio_private_data_t	atpool[ATPDPSIZE];
 } tstate_t;
 
 #define	LUN_HASH_SIZE		32
 #define	LUN_HASH_FUNC(lun)	((lun) & (LUN_HASH_SIZE - 1))
 
 #endif
 
 /*
  * Per command info.
  */
 struct isp_pcmd {
 	struct isp_pcmd *	next;
 	bus_dmamap_t 		dmap;		/* dma map for this command */
 	struct ispsoftc *	isp;		/* containing isp */
 	struct callout		wdog;		/* watchdog timer */
 	uint32_t		datalen;	/* data length for this command (target mode only) */
 	uint8_t			totslen;	/* sense length on status response */
 	uint8_t			cumslen;	/* sense length on status response */
 	uint8_t 		crn;		/* command reference number */
 };
 #define	ISP_PCMD(ccb)		(ccb)->ccb_h.spriv_ptr1
 #define	PISP_PCMD(ccb)		((struct isp_pcmd *)ISP_PCMD(ccb))
 
 /*
  * Per nexus info.
  */
 struct isp_nexus {
 	uint64_t lun;			/* LUN for target */
 	uint32_t tgt;			/* TGT for target */
 	uint8_t crnseed;		/* next command reference number */
 	struct isp_nexus *next;
 };
 #define	NEXUS_HASH_WIDTH	32
 #define	INITIAL_NEXUS_COUNT	MAX_FC_TARG
 #define	NEXUS_HASH(tgt, lun)	((tgt + lun) % NEXUS_HASH_WIDTH)
 
 /*
  * Per channel information
  */
 SLIST_HEAD(tslist, tstate);
 
 struct isp_fc {
 	struct cam_sim *sim;
 	struct cam_path *path;
 	struct ispsoftc *isp;
 	struct proc *kproc;
 	bus_dma_tag_t tdmat;
 	bus_dmamap_t tdmap;
 	uint64_t def_wwpn;
 	uint64_t def_wwnn;
 	uint32_t loop_down_time;
 	uint32_t loop_down_limit;
 	uint32_t gone_device_time;
 	/*
 	 * Per target/lun info- just to keep a per-ITL nexus crn count
 	 */
 	struct isp_nexus *nexus_hash[NEXUS_HASH_WIDTH];
 	struct isp_nexus *nexus_free_list;
 	uint32_t
 #ifdef	ISP_TARGET_MODE
 		tm_luns_enabled	: 1,
 		tm_enable_defer	: 1,
 		tm_enabled	: 1,
 #endif
 		simqfrozen	: 3,
 		default_id	: 8,
 		hysteresis	: 8,
 		def_role	: 2,	/* default role */
 		gdt_running	: 1,
 		loop_dead	: 1,
 		fcbsy		: 1,
 		ready		: 1;
 	struct callout ldt;	/* loop down timer */
 	struct callout gdt;	/* gone device timer */
 	struct task ltask;
 	struct task gtask;
 #ifdef	ISP_TARGET_MODE
 	struct tslist lun_hash[LUN_HASH_SIZE];
 #if defined(DEBUG)
 	unsigned int inject_lost_data_frame;
 #endif
 #endif
 	int			num_threads;
 };
 
 struct isp_spi {
 	struct cam_sim *sim;
 	struct cam_path *path;
 	uint32_t
 #ifdef	ISP_TARGET_MODE
 		tm_luns_enabled	: 1,
 		tm_enable_defer	: 1,
 		tm_enabled	: 1,
 #endif
 		simqfrozen	: 3,
 		def_role	: 2,
 		iid		: 4;
 #ifdef	ISP_TARGET_MODE
 	struct tslist lun_hash[LUN_HASH_SIZE];
 #endif
 	int			num_threads;
 };
 
 struct isposinfo {
 	/*
 	 * Linkage, locking, and identity
 	 */
 	struct mtx		lock;
 	device_t		dev;
 	struct cdev *		cdev;
 	struct intr_config_hook	ehook;
 	struct cam_devq *	devq;
 
 	/*
 	 * Firmware pointer
 	 */
 	const struct firmware *	fw;
 
 	/*
 	 * DMA related sdtuff
 	 */
 	bus_space_tag_t		bus_tag;
 	bus_dma_tag_t		dmat;
 	bus_space_handle_t	bus_handle;
 	bus_dma_tag_t		cdmat;
 	bus_dmamap_t		cdmap;
 
 	/*
 	 * Command and transaction related related stuff
 	 */
 	struct isp_pcmd *	pcmd_pool;
 	struct isp_pcmd *	pcmd_free;
 
 	uint32_t
 #ifdef	ISP_TARGET_MODE
 		tmwanted	: 1,
 		tmbusy		: 1,
 #else
 				: 2,
 #endif
 		sixtyfourbit	: 1,	/* sixtyfour bit platform */
 		timer_active	: 1,
 		autoconf	: 1,
 		ehook_active	: 1,
 		disabled	: 1,
 		mbox_sleeping	: 1,
 		mbox_sleep_ok	: 1,
 		mboxcmd_done	: 1,
 		mboxbsy		: 1;
 
 	struct callout		tmo;	/* general timer */
 
 	/*
 	 * misc- needs to be sorted better XXXXXX
 	 */
 	int			framesize;
 	int			exec_throttle;
 	int			cont_max;
 
 #ifdef	ISP_TARGET_MODE
 	cam_status *		rptr;
 #endif
 
 	bus_addr_t		ecmd_dma;
 	isp_ecmd_t *		ecmd_base;
 	isp_ecmd_t *		ecmd_free;
 
 	/*
 	 * Per-type private storage...
 	 */
 	union {
 		struct isp_fc *fc;
 		struct isp_spi *spi;
 		void *ptr;
 	} pc;
 
 	int			is_exiting;
 };
 #define	ISP_FC_PC(isp, chan)	(&(isp)->isp_osinfo.pc.fc[(chan)])
 #define	ISP_SPI_PC(isp, chan)	(&(isp)->isp_osinfo.pc.spi[(chan)])
 #define	ISP_GET_PC(isp, chan, tag, rslt)		\
 	if (IS_SCSI(isp)) {				\
 		rslt = ISP_SPI_PC(isp, chan)-> tag;	\
 	} else {					\
 		rslt = ISP_FC_PC(isp, chan)-> tag;	\
 	}
 #define	ISP_GET_PC_ADDR(isp, chan, tag, rp)		\
 	if (IS_SCSI(isp)) {				\
 		rp = &ISP_SPI_PC(isp, chan)-> tag;	\
 	} else {					\
 		rp = &ISP_FC_PC(isp, chan)-> tag;	\
 	}
 #define	ISP_SET_PC(isp, chan, tag, val)			\
 	if (IS_SCSI(isp)) {				\
 		ISP_SPI_PC(isp, chan)-> tag = val;	\
 	} else {					\
 		ISP_FC_PC(isp, chan)-> tag = val;	\
 	}
 
 #define	FCP_NEXT_CRN	isp_fcp_next_crn
 #define	isp_lock	isp_osinfo.lock
 #define	isp_bus_tag	isp_osinfo.bus_tag
 #define	isp_bus_handle	isp_osinfo.bus_handle
 
 /*
  * Locking macros...
  */
 #define	ISP_LOCK(isp)	mtx_lock(&(isp)->isp_osinfo.lock)
 #define	ISP_UNLOCK(isp)	mtx_unlock(&(isp)->isp_osinfo.lock)
 #define	ISP_ASSERT_LOCKED(isp)	mtx_assert(&(isp)->isp_osinfo.lock, MA_OWNED)
 
 /*
  * Required Macros/Defines
  */
 #define	ISP_FC_SCRLEN		0x1000
 
 #define	ISP_MEMZERO(a, b)	memset(a, 0, b)
 #define	ISP_MEMCPY		memcpy
 #define	ISP_SNPRINTF		snprintf
 #define	ISP_DELAY(x)		DELAY(x)
 #if __FreeBSD_version < 1000029
 #define	ISP_SLEEP(isp, x)	msleep(&(isp)->isp_osinfo.is_exiting, \
     &(isp)->isp_osinfo.lock, 0, "isp_sleep", ((x) + tick - 1) / tick)
 #else
 #define	ISP_SLEEP(isp, x)	msleep_sbt(&(isp)->isp_osinfo.is_exiting, \
     &(isp)->isp_osinfo.lock, 0, "isp_sleep", (x) * SBT_1US, 0, 0)
 #endif
 
 #define	ISP_MIN			imin
 
 #ifndef	DIAGNOSTIC
 #define	ISP_INLINE		__inline
 #else
 #define	ISP_INLINE
 #endif
 
 #define	NANOTIME_T		struct timespec
 #define	GET_NANOTIME		nanotime
 #define	GET_NANOSEC(x)		((x)->tv_sec * 1000000000 + (x)->tv_nsec)
 #define	NANOTIME_SUB		isp_nanotime_sub
 
 #define	MAXISPREQUEST(isp)	((IS_FC(isp) || IS_ULTRA2(isp))? 1024 : 256)
 
 #define	MEMORYBARRIER(isp, type, offset, size, chan)		\
 switch (type) {							\
 case SYNC_SFORDEV:						\
 {								\
 	struct isp_fc *fc = ISP_FC_PC(isp, chan);		\
 	bus_dmamap_sync(fc->tdmat, fc->tdmap,			\
 	   BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);		\
 	break;							\
 }								\
 case SYNC_REQUEST:						\
 	bus_dmamap_sync(isp->isp_osinfo.cdmat,			\
 	   isp->isp_osinfo.cdmap, 				\
 	   BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);		\
 	break;							\
 case SYNC_SFORCPU:						\
 {								\
 	struct isp_fc *fc = ISP_FC_PC(isp, chan);		\
 	bus_dmamap_sync(fc->tdmat, fc->tdmap,			\
 	   BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);	\
 	break;							\
 }								\
 case SYNC_RESULT:						\
 	bus_dmamap_sync(isp->isp_osinfo.cdmat, 			\
 	   isp->isp_osinfo.cdmap,				\
 	   BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);	\
 	break;							\
 case SYNC_REG:							\
 	bus_space_barrier(isp->isp_osinfo.bus_tag,		\
 	    isp->isp_osinfo.bus_handle, offset, size,		\
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);	\
 	break;							\
 default:							\
 	break;							\
 }
 
 #define	MEMORYBARRIERW(isp, type, offset, size, chan)		\
 switch (type) {							\
 case SYNC_SFORDEV:						\
 {								\
 	struct isp_fc *fc = ISP_FC_PC(isp, chan);		\
 	bus_dmamap_sync(fc->tdmat, fc->tdmap,			\
 	   BUS_DMASYNC_PREWRITE);				\
 	break;							\
 }								\
 case SYNC_REQUEST:						\
 	bus_dmamap_sync(isp->isp_osinfo.cdmat,			\
 	   isp->isp_osinfo.cdmap, BUS_DMASYNC_PREWRITE);	\
 	break;							\
 case SYNC_SFORCPU:						\
 {								\
 	struct isp_fc *fc = ISP_FC_PC(isp, chan);		\
 	bus_dmamap_sync(fc->tdmat, fc->tdmap,			\
 	   BUS_DMASYNC_POSTWRITE);				\
 	break;							\
 }								\
 case SYNC_RESULT:						\
 	bus_dmamap_sync(isp->isp_osinfo.cdmat, 			\
 	   isp->isp_osinfo.cdmap, BUS_DMASYNC_POSTWRITE);	\
 	break;							\
 case SYNC_REG:							\
 	bus_space_barrier(isp->isp_osinfo.bus_tag,		\
 	    isp->isp_osinfo.bus_handle, offset, size,		\
 	    BUS_SPACE_BARRIER_WRITE);				\
 	break;							\
 default:							\
 	break;							\
 }
 
 #define	MBOX_ACQUIRE			isp_mbox_acquire
 #define	MBOX_WAIT_COMPLETE		isp_mbox_wait_complete
 #define	MBOX_NOTIFY_COMPLETE		isp_mbox_notify_done
 #define	MBOX_RELEASE			isp_mbox_release
 
 #define	FC_SCRATCH_ACQUIRE		isp_fc_scratch_acquire
 #define	FC_SCRATCH_RELEASE(isp, chan)	isp->isp_osinfo.pc.fc[chan].fcbsy = 0
 
 #ifndef	SCSI_GOOD
 #define	SCSI_GOOD	SCSI_STATUS_OK
 #endif
 #ifndef	SCSI_CHECK
 #define	SCSI_CHECK	SCSI_STATUS_CHECK_COND
 #endif
 #ifndef	SCSI_BUSY
 #define	SCSI_BUSY	SCSI_STATUS_BUSY
 #endif
 #ifndef	SCSI_QFULL
 #define	SCSI_QFULL	SCSI_STATUS_QUEUE_FULL
 #endif
 
 #define	XS_T			struct ccb_scsiio
 #define	XS_DMA_ADDR_T		bus_addr_t
 #define XS_GET_DMA64_SEG(a, b, c)		\
 {						\
 	ispds64_t *d = a;			\
 	bus_dma_segment_t *e = b;		\
 	uint32_t f = c;				\
 	e += f;					\
         d->ds_base = DMA_LO32(e->ds_addr);	\
         d->ds_basehi = DMA_HI32(e->ds_addr);	\
         d->ds_count = e->ds_len;		\
 }
 #define XS_GET_DMA_SEG(a, b, c)			\
 {						\
 	ispds_t *d = a;				\
 	bus_dma_segment_t *e = b;		\
 	uint32_t f = c;				\
 	e += f;					\
         d->ds_base = DMA_LO32(e->ds_addr);	\
         d->ds_count = e->ds_len;		\
 }
 #define	XS_ISP(ccb)		cam_sim_softc(xpt_path_sim((ccb)->ccb_h.path))
 #define	XS_CHANNEL(ccb)		cam_sim_bus(xpt_path_sim((ccb)->ccb_h.path))
 #define	XS_TGT(ccb)		(ccb)->ccb_h.target_id
 #define	XS_LUN(ccb)		(ccb)->ccb_h.target_lun
 
 #define	XS_CDBP(ccb)	\
 	(((ccb)->ccb_h.flags & CAM_CDB_POINTER)? \
 	 (ccb)->cdb_io.cdb_ptr : (ccb)->cdb_io.cdb_bytes)
 
 #define	XS_CDBLEN(ccb)		(ccb)->cdb_len
 #define	XS_XFRLEN(ccb)		(ccb)->dxfer_len
 #define	XS_TIME(ccb)		(ccb)->ccb_h.timeout
 #define	XS_GET_RESID(ccb)	(ccb)->resid
 #define	XS_SET_RESID(ccb, r)	(ccb)->resid = r
 #define	XS_STSP(ccb)		(&(ccb)->scsi_status)
 #define	XS_SNSP(ccb)		(&(ccb)->sense_data)
 
 #define	XS_TOT_SNSLEN(ccb)	ccb->sense_len
 #define	XS_CUR_SNSLEN(ccb)	(ccb->sense_len - ccb->sense_resid)
 
 #define	XS_SNSKEY(ccb)		(scsi_get_sense_key(&(ccb)->sense_data, \
 				 ccb->sense_len - ccb->sense_resid, 1))
 
 #define	XS_SNSASC(ccb)		(scsi_get_asc(&(ccb)->sense_data,	\
 				 ccb->sense_len - ccb->sense_resid, 1))
 
 #define	XS_SNSASCQ(ccb)		(scsi_get_ascq(&(ccb)->sense_data,	\
 				 ccb->sense_len - ccb->sense_resid, 1))
 #define	XS_TAG_P(ccb)	\
 	(((ccb)->ccb_h.flags & CAM_TAG_ACTION_VALID) && \
 	 (ccb)->tag_action != CAM_TAG_ACTION_NONE)
 
 #define	XS_TAG_TYPE(ccb)	\
 	((ccb->tag_action == MSG_SIMPLE_Q_TAG)? REQFLAG_STAG : \
 	 ((ccb->tag_action == MSG_HEAD_OF_Q_TAG)? REQFLAG_HTAG : REQFLAG_OTAG))
 		
 
 #define	XS_SETERR(ccb, v)	(ccb)->ccb_h.status &= ~CAM_STATUS_MASK, \
 				(ccb)->ccb_h.status |= v
 
 #	define	HBA_NOERROR		CAM_REQ_INPROG
 #	define	HBA_BOTCH		CAM_UNREC_HBA_ERROR
 #	define	HBA_CMDTIMEOUT		CAM_CMD_TIMEOUT
 #	define	HBA_SELTIMEOUT		CAM_SEL_TIMEOUT
 #	define	HBA_TGTBSY		CAM_SCSI_STATUS_ERROR
 #	define	HBA_BUSRESET		CAM_SCSI_BUS_RESET
 #	define	HBA_ABORTED		CAM_REQ_ABORTED
 #	define	HBA_DATAOVR		CAM_DATA_RUN_ERR
 #	define	HBA_ARQFAIL		CAM_AUTOSENSE_FAIL
 
 
 #define	XS_ERR(ccb)		((ccb)->ccb_h.status & CAM_STATUS_MASK)
 
 #define	XS_NOERR(ccb)		(((ccb)->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_INPROG)
 
 #define	XS_INITERR(ccb)		XS_SETERR(ccb, CAM_REQ_INPROG), ccb->sense_resid = ccb->sense_len
 
 #define	XS_SAVE_SENSE(xs, sense_ptr, totslen, slen)	do {			\
 		uint32_t tlen = slen;						\
 		if (tlen > (xs)->sense_len)					\
 			tlen = (xs)->sense_len;					\
 		PISP_PCMD(xs)->totslen = imin((xs)->sense_len, totslen);	\
 		PISP_PCMD(xs)->cumslen = tlen;					\
 		memcpy(&(xs)->sense_data, sense_ptr, tlen);			\
 		(xs)->sense_resid = (xs)->sense_len - tlen;			\
 		(xs)->ccb_h.status |= CAM_AUTOSNS_VALID;			\
 	} while (0)
 
 #define	XS_SENSE_APPEND(xs, xsnsp, xsnsl)	do {				\
 		uint32_t off = PISP_PCMD(xs)->cumslen;				\
 		uint8_t *ptr = &((uint8_t *)(&(xs)->sense_data))[off];		\
 		uint32_t amt = imin(xsnsl, PISP_PCMD(xs)->totslen - off);	\
 		if (amt) {							\
 			memcpy(ptr, xsnsp, amt);				\
 			(xs)->sense_resid -= amt;				\
 			PISP_PCMD(xs)->cumslen += amt;				\
 		}								\
 	} while (0)
 
 #define	XS_SENSE_VALID(xs)	(((xs)->ccb_h.status & CAM_AUTOSNS_VALID) != 0)
 
 #define	DEFAULT_FRAMESIZE(isp)		isp->isp_osinfo.framesize
 #define	DEFAULT_EXEC_THROTTLE(isp)	isp->isp_osinfo.exec_throttle
 
 #define	GET_DEFAULT_ROLE(isp, chan)	\
 	(IS_FC(isp)? ISP_FC_PC(isp, chan)->def_role : ISP_SPI_PC(isp, chan)->def_role)
 #define	SET_DEFAULT_ROLE(isp, chan, val)		\
 	if (IS_FC(isp)) { 				\
 		ISP_FC_PC(isp, chan)->def_role = val;	\
 	} else {					\
 		ISP_SPI_PC(isp, chan)->def_role = val;	\
 	}
 
 #define	DEFAULT_IID(isp, chan)		isp->isp_osinfo.pc.spi[chan].iid
 
 #define	DEFAULT_LOOPID(x, chan)		isp->isp_osinfo.pc.fc[chan].default_id
 
 #define DEFAULT_NODEWWN(isp, chan)  	isp_default_wwn(isp, chan, 0, 1)
 #define DEFAULT_PORTWWN(isp, chan)  	isp_default_wwn(isp, chan, 0, 0)
 #define ACTIVE_NODEWWN(isp, chan)   	isp_default_wwn(isp, chan, 1, 1)
 #define ACTIVE_PORTWWN(isp, chan)   	isp_default_wwn(isp, chan, 1, 0)
 
 
 #if	BYTE_ORDER == BIG_ENDIAN
 #ifdef	ISP_SBUS_SUPPORTED
 #define	ISP_IOXPUT_8(isp, s, d)		*(d) = s
 #define	ISP_IOXPUT_16(isp, s, d)				\
 	*(d) = (isp->isp_bustype == ISP_BT_SBUS)? s : bswap16(s)
 #define	ISP_IOXPUT_32(isp, s, d)				\
 	*(d) = (isp->isp_bustype == ISP_BT_SBUS)? s : bswap32(s)
 #define	ISP_IOXGET_8(isp, s, d)		d = (*((uint8_t *)s))
 #define	ISP_IOXGET_16(isp, s, d)				\
 	d = (isp->isp_bustype == ISP_BT_SBUS)?			\
 	*((uint16_t *)s) : bswap16(*((uint16_t *)s))
 #define	ISP_IOXGET_32(isp, s, d)				\
 	d = (isp->isp_bustype == ISP_BT_SBUS)?			\
 	*((uint32_t *)s) : bswap32(*((uint32_t *)s))
 
 #else	/* ISP_SBUS_SUPPORTED */
 #define	ISP_IOXPUT_8(isp, s, d)		*(d) = s
 #define	ISP_IOXPUT_16(isp, s, d)	*(d) = bswap16(s)
 #define	ISP_IOXPUT_32(isp, s, d)	*(d) = bswap32(s)
 #define	ISP_IOXGET_8(isp, s, d)		d = (*((uint8_t *)s))
 #define	ISP_IOXGET_16(isp, s, d)	d = bswap16(*((uint16_t *)s))
 #define	ISP_IOXGET_32(isp, s, d)	d = bswap32(*((uint32_t *)s))
 #endif
 #define	ISP_SWIZZLE_NVRAM_WORD(isp, rp)	*rp = bswap16(*rp)
 #define	ISP_SWIZZLE_NVRAM_LONG(isp, rp)	*rp = bswap32(*rp)
 
 #define	ISP_IOZGET_8(isp, s, d)		d = (*((uint8_t *)s))
 #define	ISP_IOZGET_16(isp, s, d)	d = (*((uint16_t *)s))
 #define	ISP_IOZGET_32(isp, s, d)	d = (*((uint32_t *)s))
 #define	ISP_IOZPUT_8(isp, s, d)		*(d) = s
 #define	ISP_IOZPUT_16(isp, s, d)	*(d) = s
 #define	ISP_IOZPUT_32(isp, s, d)	*(d) = s
 
 
 #else
 #define	ISP_IOXPUT_8(isp, s, d)		*(d) = s
 #define	ISP_IOXPUT_16(isp, s, d)	*(d) = s
 #define	ISP_IOXPUT_32(isp, s, d)	*(d) = s
 #define	ISP_IOXGET_8(isp, s, d)		d = *(s)
 #define	ISP_IOXGET_16(isp, s, d)	d = *(s)
 #define	ISP_IOXGET_32(isp, s, d)	d = *(s)
 #define	ISP_SWIZZLE_NVRAM_WORD(isp, rp)
 #define	ISP_SWIZZLE_NVRAM_LONG(isp, rp)
 
 #define	ISP_IOZPUT_8(isp, s, d)		*(d) = s
 #define	ISP_IOZPUT_16(isp, s, d)	*(d) = bswap16(s)
 #define	ISP_IOZPUT_32(isp, s, d)	*(d) = bswap32(s)
 
 #define	ISP_IOZGET_8(isp, s, d)		d = (*((uint8_t *)(s)))
 #define	ISP_IOZGET_16(isp, s, d)	d = bswap16(*((uint16_t *)(s)))
 #define	ISP_IOZGET_32(isp, s, d)	d = bswap32(*((uint32_t *)(s)))
 
 #endif
 
 #define	ISP_SWAP16(isp, s)	bswap16(s)
 #define	ISP_SWAP32(isp, s)	bswap32(s)
 
 /*
  * Includes of common header files
  */
 
 #include <dev/isp/ispreg.h>
 #include <dev/isp/ispvar.h>
 #include <dev/isp/ispmbox.h>
 
 /*
  * isp_osinfo definiitions && shorthand
  */
 #define	SIMQFRZ_RESOURCE	0x1
 #define	SIMQFRZ_LOOPDOWN	0x2
 #define	SIMQFRZ_TIMED		0x4
 
 #define	isp_dev		isp_osinfo.dev
 
 /*
  * prototypes for isp_pci && isp_freebsd to share
  */
 extern int isp_attach(ispsoftc_t *);
 extern int isp_detach(ispsoftc_t *);
 extern void isp_uninit(ispsoftc_t *);
 extern uint64_t isp_default_wwn(ispsoftc_t *, int, int, int);
 
 /*
  * driver global data
  */
 extern int isp_announced;
 extern int isp_fabric_hysteresis;
 extern int isp_loop_down_limit;
 extern int isp_gone_device_time;
 extern int isp_quickboot_time;
 
 /*
  * Platform private flags
  */
 
 /*
  * Platform Library Functions
  */
 void isp_prt(ispsoftc_t *, int level, const char *, ...) __printflike(3, 4);
 void isp_xs_prt(ispsoftc_t *, XS_T *, int level, const char *, ...) __printflike(4, 5);
 uint64_t isp_nanotime_sub(struct timespec *, struct timespec *);
 int isp_mbox_acquire(ispsoftc_t *);
 void isp_mbox_wait_complete(ispsoftc_t *, mbreg_t *);
 void isp_mbox_notify_done(ispsoftc_t *);
 void isp_mbox_release(ispsoftc_t *);
 int isp_fc_scratch_acquire(ispsoftc_t *, int);
 int isp_mstohz(int);
 void isp_platform_intr(void *);
 void isp_common_dmateardown(ispsoftc_t *, struct ccb_scsiio *, uint32_t);
-void isp_fcp_reset_crn(struct isp_fc *, uint32_t, int);
+void isp_fcp_reset_crn(ispsoftc_t *, int, uint32_t, int);
 int isp_fcp_next_crn(ispsoftc_t *, uint8_t *, XS_T *);
 
 /*
  * Platform Version specific defines
  */
 #define	BUS_DMA_ROOTARG(x)	bus_get_dma_tag(x)
 #define	isp_dma_tag_create(a, b, c, d, e, f, g, h, i, j, k, z)	\
 	bus_dma_tag_create(a, b, c, d, e, f, g, h, i, j, k, \
 	busdma_lock_mutex, &isp->isp_osinfo.lock, z)
 
 #define	isp_setup_intr	bus_setup_intr
 
 #define	isp_sim_alloc(a, b, c, d, e, f, g, h)	\
 	cam_sim_alloc(a, b, c, d, e, &(d)->isp_osinfo.lock, f, g, h)
 
 #define	ISP_PATH_PRT(i, l, p, ...)					\
 	if ((l) == ISP_LOGALL || ((l)& (i)->isp_dblev) != 0) {		\
                 xpt_print(p, __VA_ARGS__);				\
         }
 
 /*
  * Platform specific inline functions
  */
 
 /*
  * ISP General Library functions
  */
 
 #include <dev/isp/isp_library.h>
 
 #endif	/* _ISP_FREEBSD_H */
Index: projects/powernv/dev/isp/isp_library.c
===================================================================
--- projects/powernv/dev/isp/isp_library.c	(revision 290990)
+++ projects/powernv/dev/isp/isp_library.c	(revision 290991)
@@ -1,4016 +1,4010 @@
 /*-
  *  Copyright (c) 1997-2009 by Matthew Jacob
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *
  *  1. Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  *
  *  THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  *  ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  *  SUCH DAMAGE.
  *
  */
 /*
  * Qlogic Host Adapter Internal Library Functions
  */
 #ifdef	__NetBSD__
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD$");
 #include <dev/ic/isp_netbsd.h>
 #endif
 #ifdef	__FreeBSD__
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <dev/isp/isp_freebsd.h>
 #endif
 #ifdef	__OpenBSD__
 #include <dev/ic/isp_openbsd.h>
 #endif
 #ifdef	__linux__
 #include "isp_linux.h"
 #endif
 #ifdef	__svr4__
 #include "isp_solaris.h"
 #endif
 
 const char *isp_class3_roles[4] = {
     "None", "Target", "Initiator", "Target/Initiator"
 };
 
 /*
  * Command shipping- finish off first queue entry and do dma mapping and additional segments as needed.
  *
  * Called with the first queue entry at least partially filled out.
  */
 int
 isp_send_cmd(ispsoftc_t *isp, void *fqe, void *segp, uint32_t nsegs, uint32_t totalcnt, isp_ddir_t ddir, ispds64_t *ecmd)
 {
 	uint8_t storage[QENTRY_LEN];
 	uint8_t type, nqe;
 	uint32_t seg, curseg, seglim, nxt, nxtnxt, ddf;
 	ispds_t *dsp = NULL;
 	ispds64_t *dsp64 = NULL;
 	void *qe0, *qe1;
 
 	qe0 = isp_getrqentry(isp);
 	if (qe0 == NULL) {
 		return (CMD_EAGAIN);
 	}
 	nxt = ISP_NXT_QENTRY(isp->isp_reqidx, RQUEST_QUEUE_LEN(isp));
 
 	type = ((isphdr_t *)fqe)->rqs_entry_type;
 	nqe = 1;
 
 	/*
 	 * If we have no data to transmit, just copy the first IOCB and start it up.
 	 */
 	if (ddir == ISP_NOXFR) {
 		if (type == RQSTYPE_T2RQS || type == RQSTYPE_T3RQS) {
 			ddf = CT2_NO_DATA;
 		} else {
 			ddf = 0;
 		}
 		goto copy_and_sync;
 	}
 
 	/*
 	 * First figure out how many pieces of data to transfer and what kind and how many we can put into the first queue entry.
 	 */
 	switch (type) {
 	case RQSTYPE_REQUEST:
 		ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN;
 		dsp = ((ispreq_t *)fqe)->req_dataseg;
 		seglim = ISP_RQDSEG;
 		break;
 	case RQSTYPE_CMDONLY:
 		ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN;
 		seglim = 0;
 		break;
 	case RQSTYPE_T2RQS:
 		ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN;
 		dsp = ((ispreqt2_t *)fqe)->req_dataseg;
 		seglim = ISP_RQDSEG_T2;
 		break;
 	case RQSTYPE_A64:
 		ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN;
 		dsp64 = ((ispreqt3_t *)fqe)->req_dataseg;
 		seglim = ISP_RQDSEG_T3;
 		break;
 	case RQSTYPE_T3RQS:
 		ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN;
 		dsp64 = ((ispreqt3_t *)fqe)->req_dataseg;
 		seglim = ISP_RQDSEG_T3;
 		break;
 	case RQSTYPE_T7RQS:
 		ddf = (ddir == ISP_TO_DEVICE)? FCP_CMND_DATA_WRITE : FCP_CMND_DATA_READ;
 		dsp64 = &((ispreqt7_t *)fqe)->req_dataseg;
 		seglim = 1;
 		break;
 	default:
 		return (CMD_COMPLETE);
 	}
 
 	if (seglim > nsegs) {
 		seglim = nsegs;
 	}
 
 	for (seg = curseg = 0; curseg < seglim; curseg++) {
 		if (dsp64) {
 			XS_GET_DMA64_SEG(dsp64++, segp, seg++);
 		} else {
 			XS_GET_DMA_SEG(dsp++, segp, seg++);
 		}
 	}
 
 
 	/*
 	 * Second, start building additional continuation segments as needed.
 	 */
 	while (seg < nsegs) {
 		nxtnxt = ISP_NXT_QENTRY(nxt, RQUEST_QUEUE_LEN(isp));
 		if (nxtnxt == isp->isp_reqodx) {
 			isp->isp_reqodx = ISP_READ(isp, isp->isp_rqstoutrp);
 			if (nxtnxt == isp->isp_reqodx)
 				return (CMD_EAGAIN);
 		}
 		ISP_MEMZERO(storage, QENTRY_LEN);
 		qe1 = ISP_QUEUE_ENTRY(isp->isp_rquest, nxt);
 		nxt = nxtnxt;
 		if (dsp64) {
 			ispcontreq64_t *crq = (ispcontreq64_t *) storage;
 			seglim = ISP_CDSEG64;
 			crq->req_header.rqs_entry_type = RQSTYPE_A64_CONT;
 			crq->req_header.rqs_entry_count = 1;
 			dsp64 = crq->req_dataseg;
 		} else {
 			ispcontreq_t *crq = (ispcontreq_t *) storage;
 			seglim = ISP_CDSEG;
 			crq->req_header.rqs_entry_type = RQSTYPE_DATASEG;
 			crq->req_header.rqs_entry_count = 1;
 			dsp = crq->req_dataseg;
 		}
 		if (seg + seglim > nsegs) {
 			seglim = nsegs - seg;
 		}
 		for (curseg = 0; curseg < seglim; curseg++) {
 			if (dsp64) {
 				XS_GET_DMA64_SEG(dsp64++, segp, seg++);
 			} else {
 				XS_GET_DMA_SEG(dsp++, segp, seg++);
 			}
 		}
 		if (dsp64) {
 			isp_put_cont64_req(isp, (ispcontreq64_t *)storage, qe1);
 		} else {
 			isp_put_cont_req(isp, (ispcontreq_t *)storage, qe1);
 		}
 		if (isp->isp_dblev & ISP_LOGDEBUG1) {
 			isp_print_bytes(isp, "additional queue entry", QENTRY_LEN, storage);
 		}
 		nqe++;
         }
 
 copy_and_sync:
 	((isphdr_t *)fqe)->rqs_entry_count = nqe;
 	switch (type) {
 	case RQSTYPE_REQUEST:
 		((ispreq_t *)fqe)->req_flags |= ddf;
 		/*
 		 * This is historical and not clear whether really needed.
 		 */
 		if (nsegs == 0) {
 			nsegs = 1;
 		}
 		((ispreq_t *)fqe)->req_seg_count = nsegs;
 		isp_put_request(isp, fqe, qe0);
 		break;
 	case RQSTYPE_CMDONLY:
 		((ispreq_t *)fqe)->req_flags |= ddf;
 		/*
 		 * This is historical and not clear whether really needed.
 		 */
 		if (nsegs == 0) {
 			nsegs = 1;
 		}
 		((ispextreq_t *)fqe)->req_seg_count = nsegs;
 		isp_put_extended_request(isp, fqe, qe0);
 		break;
 	case RQSTYPE_T2RQS:
 		((ispreqt2_t *)fqe)->req_flags |= ddf;
 		((ispreqt2_t *)fqe)->req_seg_count = nsegs;
 		((ispreqt2_t *)fqe)->req_totalcnt = totalcnt;
 		if (ISP_CAP_2KLOGIN(isp)) {
 			isp_put_request_t2e(isp, fqe, qe0);
 		} else {
 			isp_put_request_t2(isp, fqe, qe0);
 		}
 		break;
 	case RQSTYPE_A64:
 	case RQSTYPE_T3RQS:
 		((ispreqt3_t *)fqe)->req_flags |= ddf;
 		((ispreqt3_t *)fqe)->req_seg_count = nsegs;
 		((ispreqt3_t *)fqe)->req_totalcnt = totalcnt;
 		if (ISP_CAP_2KLOGIN(isp)) {
 			isp_put_request_t3e(isp, fqe, qe0);
 		} else {
 			isp_put_request_t3(isp, fqe, qe0);
 		}
 		break;
 	case RQSTYPE_T7RQS:
         	((ispreqt7_t *)fqe)->req_alen_datadir = ddf;
 		((ispreqt7_t *)fqe)->req_seg_count = nsegs;
 		((ispreqt7_t *)fqe)->req_dl = totalcnt;
 		isp_put_request_t7(isp, fqe, qe0);
 		break;
 	default:
 		return (CMD_COMPLETE);
 	}
 	if (isp->isp_dblev & ISP_LOGDEBUG1) {
 		isp_print_bytes(isp, "first queue entry", QENTRY_LEN, fqe);
 	}
 	ISP_ADD_REQUEST(isp, nxt);
 	return (CMD_QUEUED);
 }
 
 int
 isp_allocate_xs(ispsoftc_t *isp, XS_T *xs, uint32_t *handlep)
 {
 	isp_hdl_t *hdp;
 
 	hdp = isp->isp_xffree;
 	if (hdp == NULL) {
 		return (-1);
 	}
 	isp->isp_xffree = hdp->cmd;
 	hdp->cmd = xs;
 	hdp->handle = (hdp - isp->isp_xflist);
 	hdp->handle |= (ISP_HANDLE_INITIATOR << ISP_HANDLE_USAGE_SHIFT);
 	hdp->handle |= (isp->isp_seqno++ << ISP_HANDLE_SEQ_SHIFT);
 	*handlep = hdp->handle;
 	return (0);
 }
 
 XS_T *
 isp_find_xs(ispsoftc_t *isp, uint32_t handle)
 {
 	if (!ISP_VALID_INI_HANDLE(isp, handle)) {
 		isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle);
 		return (NULL);
 	}
 	return (isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)].cmd);
 }
 
 uint32_t
 isp_find_handle(ispsoftc_t *isp, XS_T *xs)
 {
 	uint32_t i, foundhdl = ISP_HANDLE_FREE;
 
 	if (xs != NULL) {
 		for (i = 0; i < isp->isp_maxcmds; i++) {
 			if (isp->isp_xflist[i].cmd != xs) {
 				continue;
 			}
 			foundhdl = isp->isp_xflist[i].handle;
 			break;
 		}
 	}
 	return (foundhdl);
 }
 
 uint32_t
 isp_handle_index(ispsoftc_t *isp, uint32_t handle)
 {
 	if (!ISP_VALID_HANDLE(isp, handle)) {
 		isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle);
 		return (ISP_BAD_HANDLE_INDEX);
 	} else {
 		return (handle & ISP_HANDLE_CMD_MASK);
 	}
 }
 
 void
 isp_destroy_handle(ispsoftc_t *isp, uint32_t handle)
 {
 	if (!ISP_VALID_INI_HANDLE(isp, handle)) {
 		isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle);
 	} else {
 		isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)].handle = ISP_HANDLE_FREE;
 		isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)].cmd = isp->isp_xffree;
 		isp->isp_xffree = &isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)];
 	}
 }
 
 /*
  * Make sure we have space to put something on the request queue.
  * Return a pointer to that entry if we do. A side effect of this
  * function is to update the output index. The input index
  * stays the same.
  */
 void *
 isp_getrqentry(ispsoftc_t *isp)
 {
 	uint32_t next;
 
 	next = ISP_NXT_QENTRY(isp->isp_reqidx, RQUEST_QUEUE_LEN(isp));
 	if (next == isp->isp_reqodx) {
 		isp->isp_reqodx = ISP_READ(isp, isp->isp_rqstoutrp);
 		if (next == isp->isp_reqodx)
 			return (NULL);
 	}
 	return (ISP_QUEUE_ENTRY(isp->isp_rquest, isp->isp_reqidx));
 }
 
 #define	TBA	(4 * (((QENTRY_LEN >> 2) * 3) + 1) + 1)
 void
 isp_print_qentry(ispsoftc_t *isp, const char *msg, int idx, void *arg)
 {
 	char buf[TBA];
 	int amt, i, j;
 	uint8_t *ptr = arg;
 
 	isp_prt(isp, ISP_LOGALL, "%s index %d=>", msg, idx);
 	for (buf[0] = 0, amt = i = 0; i < 4; i++) {
 		buf[0] = 0;
 		ISP_SNPRINTF(buf, TBA, "  ");
 		for (j = 0; j < (QENTRY_LEN >> 2); j++) {
 			ISP_SNPRINTF(buf, TBA, "%s %02x", buf, ptr[amt++] & 0xff);
 		}
 		isp_prt(isp, ISP_LOGALL, "%s", buf);
 	}
 }
 
 void
 isp_print_bytes(ispsoftc_t *isp, const char *msg, int amt, void *arg)
 {
 	char buf[128];
 	uint8_t *ptr = arg;
 	int off;
 
 	if (msg)
 		isp_prt(isp, ISP_LOGALL, "%s:", msg);
 	off = 0;
 	buf[0] = 0;
 	while (off < amt) {
 		int j, to;
 		to = off;
 		for (j = 0; j < 16; j++) {
 			ISP_SNPRINTF(buf, 128, "%s %02x", buf, ptr[off++] & 0xff);
 			if (off == amt) {
 				break;
 			}
 		}
 		isp_prt(isp, ISP_LOGALL, "0x%08x:%s", to, buf);
 		buf[0] = 0;
 	}
 }
 
 /*
  * Do the common path to try and ensure that link is up, we've scanned
  * the fabric (if we're on a fabric), and that we've synchronized this
  * all with our own database and done the appropriate logins.
  *
  * We repeatedly check for firmware state and loop state after each
  * action because things may have changed while we were doing this.
  * Any failure or change of state causes us to return a nonzero value.
  *
  * We assume we enter here with any locks held.
  */
 
 int
 isp_fc_runstate(ispsoftc_t *isp, int chan, int tval)
 {
 	fcparam *fcp;
 
 	fcp = FCPARAM(isp, chan);
         if (fcp->role == ISP_ROLE_NONE) {
 		return (0);
 	}
 	if (fcp->isp_fwstate < FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) {
 		if (isp_control(isp, ISPCTL_FCLINK_TEST, chan, tval) != 0) {
 			isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: linktest failed for channel %d", chan);
 			return (-1);
 		}
 		if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) {
 			isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: f/w not ready for channel %d", chan);
 			return (-1);
 		}
 	}
 
 	if (isp_control(isp, ISPCTL_SCAN_LOOP, chan) != 0) {
 		isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: scan loop fails on channel %d", chan);
 		return (LOOP_PDB_RCVD);
 	}
 	if (isp_control(isp, ISPCTL_SCAN_FABRIC, chan) != 0) {
 		isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: scan fabric fails on channel %d", chan);
 		return (LOOP_LSCAN_DONE);
 	}
 	if (isp_control(isp, ISPCTL_PDB_SYNC, chan) != 0) {
 		isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: pdb_sync fails on channel %d", chan);
 		return (LOOP_FSCAN_DONE);
 	}
 	if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate != LOOP_READY) {
 		isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: f/w not ready again on channel %d", chan);
 		return (-1);
 	}
 	return (0);
 }
 
 /*
  * Fibre Channel Support routines
  */
 void
 isp_dump_portdb(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	int i;
 
 	for (i = 0; i < MAX_FC_TARG; i++) {
 		char buf1[64], buf2[64];
 		const char *dbs[8] = {
 			"NIL ",
 			"PROB",
 			"DEAD",
 			"CHGD",
 			"NEW ",
 			"PVLD",
 			"ZOMB",
 			"VLD "
 		};
 		fcportdb_t *lp = &fcp->portdb[i];
 
 		if (lp->state == FC_PORTDB_STATE_NIL) {
 			continue;
 		}
 		isp_gen_role_str(buf1, sizeof (buf1), lp->prli_word3);
 		isp_gen_role_str(buf2, sizeof (buf2), lp->new_prli_word3);
 		isp_prt(isp, ISP_LOGALL, "Chan %d [%d]: hdl 0x%x %s al%d %s 0x%06x =>%s 0x%06x; WWNN 0x%08x%08x WWPN 0x%08x%08x",
 		    chan, i, lp->handle, dbs[lp->state], lp->autologin, buf1, lp->portid, buf2, lp->new_portid,
 		    (uint32_t) (lp->node_wwn >> 32), (uint32_t) (lp->node_wwn), (uint32_t) (lp->port_wwn >> 32), (uint32_t) (lp->port_wwn));
 	}
 }
 
 void
 isp_gen_role_str(char *buf, size_t len, uint16_t p3)
 {
 	int nd = 0;
 	buf[0] = '(';
 	buf[1] = 0;
 	if (p3 & PRLI_WD3_ENHANCED_DISCOVERY) {
 		nd++;
 		strlcat(buf, "EDisc", len);
 	}
 	if (p3 & PRLI_WD3_REC_SUPPORT) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "REC", len);
 	}
 	if (p3 & PRLI_WD3_TASK_RETRY_IDENTIFICATION_REQUESTED) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "RetryID", len);
 	}
 	if (p3 & PRLI_WD3_RETRY) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "Retry", len);
 	}
 	if (p3 & PRLI_WD3_CONFIRMED_COMPLETION_ALLOWED) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "CNFRM", len);
 	}
 	if (p3 & PRLI_WD3_DATA_OVERLAY_ALLOWED) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "DOver", len);
 	}
 	if (p3 & PRLI_WD3_INITIATOR_FUNCTION) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "INI", len);
 	}
 	if (p3 & PRLI_WD3_TARGET_FUNCTION) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "TGT", len);
 	}
 	if (p3 & PRLI_WD3_READ_FCP_XFER_RDY_DISABLED) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "RdXfrDis", len);
 	}
 	if (p3 & PRLI_WD3_WRITE_FCP_XFER_RDY_DISABLED) {
 		if (nd++) {
 			strlcat(buf, ",", len);
 		}
 		strlcat(buf, "XfrDis", len);
 	}
 	strlcat(buf, ")", len);
 }
 
 const char *
 isp_fc_fw_statename(int state)
 {
 	switch (state) {
 	case FW_CONFIG_WAIT:	return "Config Wait";
 	case FW_WAIT_AL_PA:	return "Waiting for AL_PA";
 	case FW_WAIT_LOGIN:	return "Wait Login";
 	case FW_READY:		return "Ready";
 	case FW_LOSS_OF_SYNC:	return "Loss Of Sync";
 	case FW_ERROR:		return "Error";
 	case FW_REINIT:		return "Re-Init";
 	case FW_NON_PART:	return "Nonparticipating";
 	default:		return "?????";
 	}
 }
 
 const char *
 isp_fc_loop_statename(int state)
 {
 	switch (state) {
 	case LOOP_NIL:                  return "NIL";
 	case LOOP_LIP_RCVD:             return "LIP Received";
 	case LOOP_PDB_RCVD:             return "PDB Received";
 	case LOOP_SCANNING_LOOP:        return "Scanning";
 	case LOOP_LSCAN_DONE:           return "Loop Scan Done";
 	case LOOP_SCANNING_FABRIC:      return "Scanning Fabric";
 	case LOOP_FSCAN_DONE:           return "Fabric Scan Done";
 	case LOOP_SYNCING_PDB:          return "Syncing PDB";
 	case LOOP_READY:                return "Ready"; 
 	default:                        return "?????";
 	}
 }
 
 const char *
 isp_fc_toponame(fcparam *fcp)
 {
 
 	if (fcp->isp_fwstate != FW_READY) {
 		return "Unavailable";
 	}
 	switch (fcp->isp_topo) {
 	case TOPO_NL_PORT:      return "Private Loop";
 	case TOPO_FL_PORT:      return "FL Port";
 	case TOPO_N_PORT:       return "N-Port to N-Port";
 	case TOPO_F_PORT:       return "F Port";
 	case TOPO_PTP_STUB:     return "F Port (no FLOGI_ACC response)";
 	default:                return "?????";
 	}
 }
 
 static int
 isp_fc_enable_vp(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	mbreg_t mbs;
 	vp_modify_t *vp;
 	uint8_t qe[QENTRY_LEN], *scp;
 
 	ISP_MEMZERO(qe, QENTRY_LEN);
 	if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 		return (EBUSY);
 	}
 	scp = fcp->isp_scratch;
 
 	/*
 	 * Build a VP MODIFY command in memory
 	 */
 	vp = (vp_modify_t *) qe;
 	vp->vp_mod_hdr.rqs_entry_type = RQSTYPE_VP_MODIFY;
 	vp->vp_mod_hdr.rqs_entry_count = 1;
 	vp->vp_mod_cnt = 1;
 	vp->vp_mod_idx0 = chan;
 	vp->vp_mod_cmd = VP_MODIFY_ENA;
-	vp->vp_mod_ports[0].options = ICB2400_VPOPT_ENABLED;
+	vp->vp_mod_ports[0].options = ICB2400_VPOPT_ENABLED |
+	    ICB2400_VPOPT_ENA_SNSLOGIN;
 	if (fcp->role & ISP_ROLE_INITIATOR) {
 		vp->vp_mod_ports[0].options |= ICB2400_VPOPT_INI_ENABLE;
 	}
 	if ((fcp->role & ISP_ROLE_TARGET) == 0) {
 		vp->vp_mod_ports[0].options |= ICB2400_VPOPT_TGT_DISABLE;
 	}
 	if (fcp->isp_loopid < LOCAL_LOOP_LIM) {
 		vp->vp_mod_ports[0].loopid = fcp->isp_loopid;
 		if (isp->isp_confopts & ISP_CFG_OWNLOOPID)
 			vp->vp_mod_ports[0].options |=
 			    ICB2400_VPOPT_HARD_ADDRESS;
 		else
 			vp->vp_mod_ports[0].options |=
 			    ICB2400_VPOPT_PREV_ADDRESS;
 	}
 	MAKE_NODE_NAME_FROM_WWN(vp->vp_mod_ports[0].wwpn, fcp->isp_wwpn);
 	MAKE_NODE_NAME_FROM_WWN(vp->vp_mod_ports[0].wwnn, fcp->isp_wwnn);
 	isp_put_vp_modify(isp, vp, (vp_modify_t *) scp);
 
 	/*
 	 * Build a EXEC IOCB A64 command that points to the VP MODIFY command
 	 */
 	MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 0);
 	mbs.param[1] = QENTRY_LEN;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, 2 * QENTRY_LEN, chan);
 	isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		FC_SCRATCH_RELEASE(isp, chan);
 		return (EIO);
 	}
 	MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan);
 	isp_get_vp_modify(isp, (vp_modify_t *)&scp[QENTRY_LEN], vp);
 
 	FC_SCRATCH_RELEASE(isp, chan);
 
 	if (vp->vp_mod_status != VP_STS_OK) {
 		isp_prt(isp, ISP_LOGERR, "%s: VP_MODIFY of Chan %d failed with status %d", __func__, chan, vp->vp_mod_status);
 		return (EIO);
 	}
 	return (0);
 }
 
 static int
 isp_fc_disable_vp(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	mbreg_t mbs;
 	vp_ctrl_info_t *vp;
 	uint8_t qe[QENTRY_LEN], *scp;
 
 	ISP_MEMZERO(qe, QENTRY_LEN);
 	if (FC_SCRATCH_ACQUIRE(isp, chan)) {
 		return (EBUSY);
 	}
 	scp = fcp->isp_scratch;
 
 	/*
 	 * Build a VP CTRL command in memory
 	 */
 	vp = (vp_ctrl_info_t *) qe;
 	vp->vp_ctrl_hdr.rqs_entry_type = RQSTYPE_VP_CTRL;
 	vp->vp_ctrl_hdr.rqs_entry_count = 1;
 	if (ISP_CAP_VP0(isp)) {
 		vp->vp_ctrl_status = 1;
 	} else {
 		vp->vp_ctrl_status = 0;
 		chan--;	/* VP0 can not be controlled in this case. */
 	}
 	vp->vp_ctrl_command = VP_CTRL_CMD_DISABLE_VP_LOGO_ALL;
 	vp->vp_ctrl_vp_count = 1;
 	vp->vp_ctrl_idmap[chan / 16] |= (1 << chan % 16);
 	isp_put_vp_ctrl_info(isp, vp, (vp_ctrl_info_t *) scp);
 
 	/*
 	 * Build a EXEC IOCB A64 command that points to the VP CTRL command
 	 */
 	MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 0);
 	mbs.param[1] = QENTRY_LEN;
 	mbs.param[2] = DMA_WD1(fcp->isp_scdma);
 	mbs.param[3] = DMA_WD0(fcp->isp_scdma);
 	mbs.param[6] = DMA_WD3(fcp->isp_scdma);
 	mbs.param[7] = DMA_WD2(fcp->isp_scdma);
 	MEMORYBARRIER(isp, SYNC_SFORDEV, 0, 2 * QENTRY_LEN, chan);
 	isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs);
 	if (mbs.param[0] != MBOX_COMMAND_COMPLETE) {
 		FC_SCRATCH_RELEASE(isp, chan);
 		return (EIO);
 	}
 	MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan);
 	isp_get_vp_ctrl_info(isp, (vp_ctrl_info_t *)&scp[QENTRY_LEN], vp);
 
 	FC_SCRATCH_RELEASE(isp, chan);
 
 	if (vp->vp_ctrl_status != 0) {
 		isp_prt(isp, ISP_LOGERR,
 		    "%s: VP_CTRL of Chan %d failed with status %d %d",
 		    __func__, chan, vp->vp_ctrl_status, vp->vp_ctrl_index_fail);
 		return (EIO);
 	}
 	return (0);
 }
 
 /*
  * Change Roles
  */
 int
 isp_fc_change_role(ispsoftc_t *isp, int chan, int new_role)
 {
 	fcparam *fcp = FCPARAM(isp, chan);
 	int i, was, res = 0;
 
 	if (chan >= isp->isp_nchan) {
 		isp_prt(isp, ISP_LOGWARN, "%s: bad channel %d", __func__, chan);
 		return (ENXIO);
 	}
 	if (fcp->role == new_role)
 		return (0);
 	for (was = 0, i = 0; i < isp->isp_nchan; i++) {
 		if (FCPARAM(isp, i)->role != ISP_ROLE_NONE)
 			was++;
 	}
 	if (was == 0 || (was == 1 && fcp->role != ISP_ROLE_NONE)) {
 		fcp->role = new_role;
 		return (isp_reinit(isp, 0));
 	}
 	if (fcp->role != ISP_ROLE_NONE)
 		res = isp_fc_disable_vp(isp, chan);
 	fcp->role = new_role;
 	if (fcp->role != ISP_ROLE_NONE)
 		res = isp_fc_enable_vp(isp, chan);
 	return (res);
 }
 
 void
 isp_clear_commands(ispsoftc_t *isp)
 {
 	uint32_t tmp;
 	isp_hdl_t *hdp;
 #ifdef	ISP_TARGET_MODE
 	isp_notify_t notify;
 #endif
 
 	for (tmp = 0; isp->isp_xflist && tmp < isp->isp_maxcmds; tmp++) {
 		XS_T *xs;
 
 		hdp = &isp->isp_xflist[tmp];
 		if (hdp->handle == ISP_HANDLE_FREE) {
 			continue;
 		}
 		xs = hdp->cmd;
 		if (XS_XFRLEN(xs)) {
 			ISP_DMAFREE(isp, xs, hdp->handle);
 			XS_SET_RESID(xs, XS_XFRLEN(xs));
 		} else {
 			XS_SET_RESID(xs, 0);
 		}
 		hdp->handle = 0;
 		hdp->cmd = NULL;
 		XS_SETERR(xs, HBA_BUSRESET);
 		isp_done(xs);
 	}
 #ifdef	ISP_TARGET_MODE
 	for (tmp = 0; isp->isp_tgtlist && tmp < isp->isp_maxcmds; tmp++) {
 		uint8_t local[QENTRY_LEN];
 		hdp = &isp->isp_tgtlist[tmp];
 		if (hdp->handle == ISP_HANDLE_FREE) {
 			continue;
 		}
 		ISP_DMAFREE(isp, hdp->cmd, hdp->handle);
 		ISP_MEMZERO(local, QENTRY_LEN);
 		if (IS_24XX(isp)) {
 			ct7_entry_t *ctio = (ct7_entry_t *) local;
 			ctio->ct_syshandle = hdp->handle;
 			ctio->ct_nphdl = CT_HBA_RESET;
 			ctio->ct_header.rqs_entry_type = RQSTYPE_CTIO7;
 		} else if (IS_FC(isp)) {
 			ct2_entry_t *ctio = (ct2_entry_t *) local;
 			ctio->ct_syshandle = hdp->handle;
 			ctio->ct_status = CT_HBA_RESET;
 			ctio->ct_header.rqs_entry_type = RQSTYPE_CTIO2;
 		} else {
 			ct_entry_t *ctio = (ct_entry_t *) local;
 			ctio->ct_syshandle = hdp->handle;
 			ctio->ct_status = CT_HBA_RESET & 0xff;
 			ctio->ct_header.rqs_entry_type = RQSTYPE_CTIO;
 		}
 		isp_async(isp, ISPASYNC_TARGET_ACTION, local);
 	}
 	for (tmp = 0; tmp < isp->isp_nchan; tmp++) {
 		ISP_MEMZERO(&notify, sizeof (isp_notify_t));
 		notify.nt_ncode = NT_HBA_RESET;
 		notify.nt_hba = isp;
 		notify.nt_wwn = INI_ANY;
 		notify.nt_nphdl = NIL_HANDLE;
 		notify.nt_sid = PORT_ANY;
 		notify.nt_did = PORT_ANY;
 		notify.nt_tgt = TGT_ANY;
 		notify.nt_channel = tmp;
 		notify.nt_lun = LUN_ANY;
 		notify.nt_tagval = TAG_ANY;
 		isp_async(isp, ISPASYNC_TARGET_NOTIFY, &notify);
 	}
 #endif
 }
 
 void
 isp_shutdown(ispsoftc_t *isp)
 {
 	if (IS_FC(isp)) {
 		if (IS_24XX(isp)) {
 			ISP_WRITE(isp, BIU2400_ICR, 0);
 			ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_PAUSE);
 		} else {
 			ISP_WRITE(isp, BIU_ICR, 0);
 			ISP_WRITE(isp, HCCR, HCCR_CMD_PAUSE);
 			ISP_WRITE(isp, BIU2100_CSR, BIU2100_FPM0_REGS);
 			ISP_WRITE(isp, FPM_DIAG_CONFIG, FPM_SOFT_RESET);
 			ISP_WRITE(isp, BIU2100_CSR, BIU2100_FB_REGS);
 			ISP_WRITE(isp, FBM_CMD, FBMCMD_FIFO_RESET_ALL);
 			ISP_WRITE(isp, BIU2100_CSR, BIU2100_RISC_REGS);
 		}
 	} else {
 		ISP_WRITE(isp, BIU_ICR, 0);
 		ISP_WRITE(isp, HCCR, HCCR_CMD_PAUSE);
 	}
 }
 
 /*
  * Functions to move stuff to a form that the QLogic RISC engine understands
  * and functions to move stuff back to a form the processor understands.
  *
  * Each platform is required to provide the 8, 16 and 32 bit
  * swizzle and unswizzle macros (ISP_IOX{PUT|GET}_{8,16,32})
  *
  * The assumption is that swizzling and unswizzling is mostly done 'in place'
  * (with a few exceptions for efficiency).
  */
 
 #define	ISP_IS_SBUS(isp)	(ISP_SBUS_SUPPORTED && (isp)->isp_bustype == ISP_BT_SBUS)
 
 #define	ASIZE(x)	(sizeof (x) / sizeof (x[0]))
 /*
  * Swizzle/Copy Functions
  */
 void
 isp_put_hdr(ispsoftc_t *isp, isphdr_t *hpsrc, isphdr_t *hpdst)
 {
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, hpsrc->rqs_entry_type, &hpdst->rqs_entry_count);
 		ISP_IOXPUT_8(isp, hpsrc->rqs_entry_count, &hpdst->rqs_entry_type);
 		ISP_IOXPUT_8(isp, hpsrc->rqs_seqno, &hpdst->rqs_flags);
 		ISP_IOXPUT_8(isp, hpsrc->rqs_flags, &hpdst->rqs_seqno);
 	} else {
 		ISP_IOXPUT_8(isp, hpsrc->rqs_entry_type, &hpdst->rqs_entry_type);
 		ISP_IOXPUT_8(isp, hpsrc->rqs_entry_count, &hpdst->rqs_entry_count);
 		ISP_IOXPUT_8(isp, hpsrc->rqs_seqno, &hpdst->rqs_seqno);
 		ISP_IOXPUT_8(isp, hpsrc->rqs_flags, &hpdst->rqs_flags);
 	}
 }
 
 void
 isp_get_hdr(ispsoftc_t *isp, isphdr_t *hpsrc, isphdr_t *hpdst)
 {
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXGET_8(isp, &hpsrc->rqs_entry_type, hpdst->rqs_entry_count);
 		ISP_IOXGET_8(isp, &hpsrc->rqs_entry_count, hpdst->rqs_entry_type);
 		ISP_IOXGET_8(isp, &hpsrc->rqs_seqno, hpdst->rqs_flags);
 		ISP_IOXGET_8(isp, &hpsrc->rqs_flags, hpdst->rqs_seqno);
 	} else {
 		ISP_IOXGET_8(isp, &hpsrc->rqs_entry_type, hpdst->rqs_entry_type);
 		ISP_IOXGET_8(isp, &hpsrc->rqs_entry_count, hpdst->rqs_entry_count);
 		ISP_IOXGET_8(isp, &hpsrc->rqs_seqno, hpdst->rqs_seqno);
 		ISP_IOXGET_8(isp, &hpsrc->rqs_flags, hpdst->rqs_flags);
 	}
 }
 
 int
 isp_get_response_type(ispsoftc_t *isp, isphdr_t *hp)
 {
 	uint8_t type;
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXGET_8(isp, &hp->rqs_entry_count, type);
 	} else {
 		ISP_IOXGET_8(isp, &hp->rqs_entry_type, type);
 	}
 	return ((int)type);
 }
 
 void
 isp_put_request(ispsoftc_t *isp, ispreq_t *rqsrc, ispreq_t *rqdst)
 {
 	int i;
 	isp_put_hdr(isp, &rqsrc->req_header, &rqdst->req_header);
 	ISP_IOXPUT_32(isp, rqsrc->req_handle, &rqdst->req_handle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, rqsrc->req_lun_trn, &rqdst->req_target);
 		ISP_IOXPUT_8(isp, rqsrc->req_target, &rqdst->req_lun_trn);
 	} else {
 		ISP_IOXPUT_8(isp, rqsrc->req_lun_trn, &rqdst->req_lun_trn);
 		ISP_IOXPUT_8(isp, rqsrc->req_target, &rqdst->req_target);
 	}
 	ISP_IOXPUT_16(isp, rqsrc->req_cdblen, &rqdst->req_cdblen);
 	ISP_IOXPUT_16(isp, rqsrc->req_flags, &rqdst->req_flags);
 	ISP_IOXPUT_16(isp, rqsrc->req_time, &rqdst->req_time);
 	ISP_IOXPUT_16(isp, rqsrc->req_seg_count, &rqdst->req_seg_count);
 	for (i = 0; i < ASIZE(rqsrc->req_cdb); i++) {
 		ISP_IOXPUT_8(isp, rqsrc->req_cdb[i], &rqdst->req_cdb[i]);
 	}
 	for (i = 0; i < ISP_RQDSEG; i++) {
 		ISP_IOXPUT_32(isp, rqsrc->req_dataseg[i].ds_base, &rqdst->req_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, rqsrc->req_dataseg[i].ds_count, &rqdst->req_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_marker(ispsoftc_t *isp, isp_marker_t *src, isp_marker_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->mrk_header, &dst->mrk_header);
 	ISP_IOXPUT_32(isp, src->mrk_handle, &dst->mrk_handle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->mrk_reserved0, &dst->mrk_target);
 		ISP_IOXPUT_8(isp, src->mrk_target, &dst->mrk_reserved0);
 	} else {
 		ISP_IOXPUT_8(isp, src->mrk_reserved0, &dst->mrk_reserved0);
 		ISP_IOXPUT_8(isp, src->mrk_target, &dst->mrk_target);
 	}
 	ISP_IOXPUT_16(isp, src->mrk_modifier, &dst->mrk_modifier);
 	ISP_IOXPUT_16(isp, src->mrk_flags, &dst->mrk_flags);
 	ISP_IOXPUT_16(isp, src->mrk_lun, &dst->mrk_lun);
 	for (i = 0; i < ASIZE(src->mrk_reserved1); i++) {
 		ISP_IOXPUT_8(isp, src->mrk_reserved1[i], &dst->mrk_reserved1[i]);
 	}
 }
 
 void
 isp_put_marker_24xx(ispsoftc_t *isp, isp_marker_24xx_t *src, isp_marker_24xx_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->mrk_header, &dst->mrk_header);
 	ISP_IOXPUT_32(isp, src->mrk_handle, &dst->mrk_handle);
 	ISP_IOXPUT_16(isp, src->mrk_nphdl, &dst->mrk_nphdl);
 	ISP_IOXPUT_8(isp, src->mrk_modifier, &dst->mrk_modifier);
 	ISP_IOXPUT_8(isp, src->mrk_reserved0, &dst->mrk_reserved0);
 	ISP_IOXPUT_8(isp, src->mrk_reserved1, &dst->mrk_reserved1);
 	ISP_IOXPUT_8(isp, src->mrk_vphdl, &dst->mrk_vphdl);
 	ISP_IOXPUT_8(isp, src->mrk_reserved2, &dst->mrk_reserved2);
 	for (i = 0; i < ASIZE(src->mrk_lun); i++) {
 		ISP_IOXPUT_8(isp, src->mrk_lun[i], &dst->mrk_lun[i]);
 	}
 	for (i = 0; i < ASIZE(src->mrk_reserved3); i++) {
 		ISP_IOXPUT_8(isp, src->mrk_reserved3[i], &dst->mrk_reserved3[i]);
 	}
 }
 
 void
 isp_put_request_t2(ispsoftc_t *isp, ispreqt2_t *src, ispreqt2_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle);
 	ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_lun_trn);
 	ISP_IOXPUT_8(isp, src->req_target, &dst->req_target);
 	ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun);
 	ISP_IOXPUT_16(isp, src->req_flags,  &dst->req_flags);
 	ISP_IOXPUT_16(isp, src->req_reserved, &dst->req_reserved);
 	ISP_IOXPUT_16(isp, src->req_time, &dst->req_time);
 	ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count);
 	for (i = 0; i < ASIZE(src->req_cdb); i++) {
 		ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]);
 	}
 	ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt);
 	for (i = 0; i < ISP_RQDSEG_T2; i++) {
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_request_t2e(ispsoftc_t *isp, ispreqt2e_t *src, ispreqt2e_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle);
 	ISP_IOXPUT_16(isp, src->req_target, &dst->req_target);
 	ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun);
 	ISP_IOXPUT_16(isp, src->req_flags,  &dst->req_flags);
 	ISP_IOXPUT_16(isp, src->req_reserved, &dst->req_reserved);
 	ISP_IOXPUT_16(isp, src->req_time, &dst->req_time);
 	ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count);
 	for (i = 0; i < ASIZE(src->req_cdb); i++) {
 		ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]);
 	}
 	ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt);
 	for (i = 0; i < ISP_RQDSEG_T2; i++) {
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_request_t3(ispsoftc_t *isp, ispreqt3_t *src, ispreqt3_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle);
 	ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_lun_trn);
 	ISP_IOXPUT_8(isp, src->req_target, &dst->req_target);
 	ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun);
 	ISP_IOXPUT_16(isp, src->req_flags,  &dst->req_flags);
 	ISP_IOXPUT_8(isp, src->req_crn, &dst->req_crn);
 	ISP_IOXPUT_8(isp, src->req_reserved, &dst->req_reserved);
 	ISP_IOXPUT_16(isp, src->req_time, &dst->req_time);
 	ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count);
 	for (i = 0; i < ASIZE(src->req_cdb); i++) {
 		ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]);
 	}
 	ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt);
 	for (i = 0; i < ISP_RQDSEG_T3; i++) {
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_basehi, &dst->req_dataseg[i].ds_basehi);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_request_t3e(ispsoftc_t *isp, ispreqt3e_t *src, ispreqt3e_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle);
 	ISP_IOXPUT_16(isp, src->req_target, &dst->req_target);
 	ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun);
 	ISP_IOXPUT_16(isp, src->req_flags,  &dst->req_flags);
 	ISP_IOXPUT_8(isp, src->req_crn, &dst->req_crn);
 	ISP_IOXPUT_8(isp, src->req_reserved, &dst->req_reserved);
 	ISP_IOXPUT_16(isp, src->req_time, &dst->req_time);
 	ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count);
 	for (i = 0; i < ASIZE(src->req_cdb); i++) {
 		ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]);
 	}
 	ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt);
 	for (i = 0; i < ISP_RQDSEG_T3; i++) {
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_basehi, &dst->req_dataseg[i].ds_basehi);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_extended_request(ispsoftc_t *isp, ispextreq_t *src, ispextreq_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_target);
 		ISP_IOXPUT_8(isp, src->req_target, &dst->req_lun_trn);
 	} else {
 		ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_lun_trn);
 		ISP_IOXPUT_8(isp, src->req_target, &dst->req_target);
 	}
 	ISP_IOXPUT_16(isp, src->req_cdblen, &dst->req_cdblen);
 	ISP_IOXPUT_16(isp, src->req_flags, &dst->req_flags);
 	ISP_IOXPUT_16(isp, src->req_time, &dst->req_time);
 	ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count);
 	for (i = 0; i < ASIZE(src->req_cdb); i++) {
 		ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]);
 	}
 }
 
 void
 isp_put_request_t7(ispsoftc_t *isp, ispreqt7_t *src, ispreqt7_t *dst)
 {
 	int i;
 	uint32_t *a, *b;
 
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle);
 	ISP_IOXPUT_16(isp, src->req_nphdl, &dst->req_nphdl);
 	ISP_IOXPUT_16(isp, src->req_time, &dst->req_time);
 	ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count);
 	ISP_IOXPUT_16(isp, src->req_reserved, &dst->req_reserved);
 	a = (uint32_t *) src->req_lun;
 	b = (uint32_t *) dst->req_lun;
 	for (i = 0; i < (ASIZE(src->req_lun) >> 2); i++ ) {
 		*b++ = ISP_SWAP32(isp, *a++);
 	}
 	ISP_IOXPUT_8(isp, src->req_alen_datadir, &dst->req_alen_datadir);
 	ISP_IOXPUT_8(isp, src->req_task_management, &dst->req_task_management);
 	ISP_IOXPUT_8(isp, src->req_task_attribute, &dst->req_task_attribute);
 	ISP_IOXPUT_8(isp, src->req_crn, &dst->req_crn);
 	a = (uint32_t *) src->req_cdb;
 	b = (uint32_t *) dst->req_cdb;
 	for (i = 0; i < (ASIZE(src->req_cdb) >> 2); i++) {
 		*b++ = ISP_SWAP32(isp, *a++);
 	}
 	ISP_IOXPUT_32(isp, src->req_dl, &dst->req_dl);
 	ISP_IOXPUT_16(isp, src->req_tidlo, &dst->req_tidlo);
 	ISP_IOXPUT_8(isp, src->req_tidhi, &dst->req_tidhi);
 	ISP_IOXPUT_8(isp, src->req_vpidx, &dst->req_vpidx);
 	ISP_IOXPUT_32(isp, src->req_dataseg.ds_base, &dst->req_dataseg.ds_base);
 	ISP_IOXPUT_32(isp, src->req_dataseg.ds_basehi, &dst->req_dataseg.ds_basehi);
 	ISP_IOXPUT_32(isp, src->req_dataseg.ds_count, &dst->req_dataseg.ds_count);
 }
 
 void
 isp_put_24xx_tmf(ispsoftc_t *isp, isp24xx_tmf_t *src, isp24xx_tmf_t *dst)
 {
 	int i;
 	uint32_t *a, *b;
 
 	isp_put_hdr(isp, &src->tmf_header, &dst->tmf_header);
 	ISP_IOXPUT_32(isp, src->tmf_handle, &dst->tmf_handle);
 	ISP_IOXPUT_16(isp, src->tmf_nphdl, &dst->tmf_nphdl);
 	ISP_IOXPUT_16(isp, src->tmf_delay, &dst->tmf_delay);
 	ISP_IOXPUT_16(isp, src->tmf_timeout, &dst->tmf_timeout);
 	for (i = 0; i < ASIZE(src->tmf_reserved0); i++) {
 		ISP_IOXPUT_8(isp, src->tmf_reserved0[i], &dst->tmf_reserved0[i]);
 	}
 	a = (uint32_t *) src->tmf_lun;
 	b = (uint32_t *) dst->tmf_lun;
 	for (i = 0; i < (ASIZE(src->tmf_lun) >> 2); i++ ) {
 		*b++ = ISP_SWAP32(isp, *a++);
 	}
 	ISP_IOXPUT_32(isp, src->tmf_flags, &dst->tmf_flags);
 	for (i = 0; i < ASIZE(src->tmf_reserved1); i++) {
 		ISP_IOXPUT_8(isp, src->tmf_reserved1[i], &dst->tmf_reserved1[i]);
 	}
 	ISP_IOXPUT_16(isp, src->tmf_tidlo, &dst->tmf_tidlo);
 	ISP_IOXPUT_8(isp, src->tmf_tidhi, &dst->tmf_tidhi);
 	ISP_IOXPUT_8(isp, src->tmf_vpidx, &dst->tmf_vpidx);
 	for (i = 0; i < ASIZE(src->tmf_reserved2); i++) {
 		ISP_IOXPUT_8(isp, src->tmf_reserved2[i], &dst->tmf_reserved2[i]);
 	}
 }
 
 void
 isp_put_24xx_abrt(ispsoftc_t *isp, isp24xx_abrt_t *src, isp24xx_abrt_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->abrt_header, &dst->abrt_header);
 	ISP_IOXPUT_32(isp, src->abrt_handle, &dst->abrt_handle);
 	ISP_IOXPUT_16(isp, src->abrt_nphdl, &dst->abrt_nphdl);
 	ISP_IOXPUT_16(isp, src->abrt_options, &dst->abrt_options);
 	ISP_IOXPUT_32(isp, src->abrt_cmd_handle, &dst->abrt_cmd_handle);
 	ISP_IOXPUT_16(isp, src->abrt_queue_number, &dst->abrt_queue_number);
 	for (i = 0; i < ASIZE(src->abrt_reserved); i++) {
 		ISP_IOXPUT_8(isp, src->abrt_reserved[i], &dst->abrt_reserved[i]);
 	}
 	ISP_IOXPUT_16(isp, src->abrt_tidlo, &dst->abrt_tidlo);
 	ISP_IOXPUT_8(isp, src->abrt_tidhi, &dst->abrt_tidhi);
 	ISP_IOXPUT_8(isp, src->abrt_vpidx, &dst->abrt_vpidx);
 	for (i = 0; i < ASIZE(src->abrt_reserved1); i++) {
 		ISP_IOXPUT_8(isp, src->abrt_reserved1[i], &dst->abrt_reserved1[i]);
 	}
 }
 
 void
 isp_put_cont_req(ispsoftc_t *isp, ispcontreq_t *src, ispcontreq_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	for (i = 0; i < ISP_CDSEG; i++) {
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_cont64_req(ispsoftc_t *isp, ispcontreq64_t *src, ispcontreq64_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->req_header, &dst->req_header);
 	for (i = 0; i < ISP_CDSEG64; i++) {
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_basehi, &dst->req_dataseg[i].ds_basehi);
 		ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_get_response(ispsoftc_t *isp, ispstatusreq_t *src, ispstatusreq_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXGET_32(isp, &src->req_handle, dst->req_handle);
 	ISP_IOXGET_16(isp, &src->req_scsi_status, dst->req_scsi_status);
 	ISP_IOXGET_16(isp, &src->req_completion_status, dst->req_completion_status);
 	ISP_IOXGET_16(isp, &src->req_state_flags, dst->req_state_flags);
 	ISP_IOXGET_16(isp, &src->req_status_flags, dst->req_status_flags);
 	ISP_IOXGET_16(isp, &src->req_time, dst->req_time);
 	ISP_IOXGET_16(isp, &src->req_sense_len, dst->req_sense_len);
 	ISP_IOXGET_32(isp, &src->req_resid, dst->req_resid);
 	for (i = 0; i < sizeof (src->req_response); i++) {
 		ISP_IOXGET_8(isp, &src->req_response[i], dst->req_response[i]);
 	}
 	for (i = 0; i < sizeof (src->req_sense_data); i++) {
 		ISP_IOXGET_8(isp, &src->req_sense_data[i], dst->req_sense_data[i]);
 	}
 }
 
 void
 isp_get_cont_response(ispsoftc_t *isp, ispstatus_cont_t *src, ispstatus_cont_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->req_header, &dst->req_header);
 	if (IS_24XX(isp)) {
 		uint32_t *a, *b;
 		a = (uint32_t *) src->req_sense_data;
 		b = (uint32_t *) dst->req_sense_data;
 		for (i = 0; i < (sizeof (src->req_sense_data) / sizeof (uint32_t)); i++) {
 			ISP_IOZGET_32(isp, a++, *b++);
 		}
 	} else {
 		for (i = 0; i < sizeof (src->req_sense_data); i++) {
 			ISP_IOXGET_8(isp, &src->req_sense_data[i], dst->req_sense_data[i]);
 		}
 	}
 }
 
 void
 isp_get_24xx_response(ispsoftc_t *isp, isp24xx_statusreq_t *src, isp24xx_statusreq_t *dst)
 {
 	int i;
 	uint32_t *s, *d;
 
 	isp_get_hdr(isp, &src->req_header, &dst->req_header);
 	ISP_IOXGET_32(isp, &src->req_handle, dst->req_handle);
 	ISP_IOXGET_16(isp, &src->req_completion_status, dst->req_completion_status);
 	ISP_IOXGET_16(isp, &src->req_oxid, dst->req_oxid);
 	ISP_IOXGET_32(isp, &src->req_resid, dst->req_resid);
 	ISP_IOXGET_16(isp, &src->req_reserved0, dst->req_reserved0);
 	ISP_IOXGET_16(isp, &src->req_state_flags, dst->req_state_flags);
 	ISP_IOXGET_16(isp, &src->req_retry_delay, dst->req_retry_delay);
 	ISP_IOXGET_16(isp, &src->req_scsi_status, dst->req_scsi_status);
 	ISP_IOXGET_32(isp, &src->req_fcp_residual, dst->req_fcp_residual);
 	ISP_IOXGET_32(isp, &src->req_sense_len, dst->req_sense_len);
 	ISP_IOXGET_32(isp, &src->req_response_len, dst->req_response_len);
 	s = (uint32_t *)src->req_rsp_sense;
 	d = (uint32_t *)dst->req_rsp_sense;
 	for (i = 0; i < (ASIZE(src->req_rsp_sense) >> 2); i++) {
 		d[i] = ISP_SWAP32(isp, s[i]);
 	}
 }
 
 void
 isp_get_24xx_abrt(ispsoftc_t *isp, isp24xx_abrt_t *src, isp24xx_abrt_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->abrt_header, &dst->abrt_header);
 	ISP_IOXGET_32(isp, &src->abrt_handle, dst->abrt_handle);
 	ISP_IOXGET_16(isp, &src->abrt_nphdl, dst->abrt_nphdl);
 	ISP_IOXGET_16(isp, &src->abrt_options, dst->abrt_options);
 	ISP_IOXGET_32(isp, &src->abrt_cmd_handle, dst->abrt_cmd_handle);
 	ISP_IOXGET_16(isp, &src->abrt_queue_number, dst->abrt_queue_number);
 	for (i = 0; i < ASIZE(src->abrt_reserved); i++) {
 		ISP_IOXGET_8(isp, &src->abrt_reserved[i], dst->abrt_reserved[i]);
 	}
 	ISP_IOXGET_16(isp, &src->abrt_tidlo, dst->abrt_tidlo);
 	ISP_IOXGET_8(isp, &src->abrt_tidhi, dst->abrt_tidhi);
 	ISP_IOXGET_8(isp, &src->abrt_vpidx, dst->abrt_vpidx);
 	for (i = 0; i < ASIZE(src->abrt_reserved1); i++) {
 		ISP_IOXGET_8(isp, &src->abrt_reserved1[i], dst->abrt_reserved1[i]);
 	}
 }
 
 
 void
 isp_get_rio1(ispsoftc_t *isp, isp_rio1_t *r1src, isp_rio1_t *r1dst)
 {
 	const int lim = sizeof (r1dst->req_handles) / sizeof (r1dst->req_handles[0]);
 	int i;
 	isp_get_hdr(isp, &r1src->req_header, &r1dst->req_header);
 	if (r1dst->req_header.rqs_seqno > lim) {
 		r1dst->req_header.rqs_seqno = lim;
 	}
 	for (i = 0; i < r1dst->req_header.rqs_seqno; i++) {
 		ISP_IOXGET_32(isp, &r1src->req_handles[i], r1dst->req_handles[i]);
 	}
 	while (i < lim) {
 		r1dst->req_handles[i++] = 0;
 	}
 }
 
 void
 isp_get_rio2(ispsoftc_t *isp, isp_rio2_t *r2src, isp_rio2_t *r2dst)
 {
 	const int lim = sizeof (r2dst->req_handles) / sizeof (r2dst->req_handles[0]);
 	int i;
 
 	isp_get_hdr(isp, &r2src->req_header, &r2dst->req_header);
 	if (r2dst->req_header.rqs_seqno > lim) {
 		r2dst->req_header.rqs_seqno = lim;
 	}
 	for (i = 0; i < r2dst->req_header.rqs_seqno; i++) {
 		ISP_IOXGET_16(isp, &r2src->req_handles[i], r2dst->req_handles[i]);
 	}
 	while (i < lim) {
 		r2dst->req_handles[i++] = 0;
 	}
 }
 
 void
 isp_put_icb(ispsoftc_t *isp, isp_icb_t *src, isp_icb_t *dst)
 {
 	int i;
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->icb_version, &dst->icb_reserved0);
 		ISP_IOXPUT_8(isp, src->icb_reserved0, &dst->icb_version);
 	} else {
 		ISP_IOXPUT_8(isp, src->icb_version, &dst->icb_version);
 		ISP_IOXPUT_8(isp, src->icb_reserved0, &dst->icb_reserved0);
 	}
 	ISP_IOXPUT_16(isp, src->icb_fwoptions, &dst->icb_fwoptions);
 	ISP_IOXPUT_16(isp, src->icb_maxfrmlen, &dst->icb_maxfrmlen);
 	ISP_IOXPUT_16(isp, src->icb_maxalloc, &dst->icb_maxalloc);
 	ISP_IOXPUT_16(isp, src->icb_execthrottle, &dst->icb_execthrottle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->icb_retry_count, &dst->icb_retry_delay);
 		ISP_IOXPUT_8(isp, src->icb_retry_delay, &dst->icb_retry_count);
 	} else {
 		ISP_IOXPUT_8(isp, src->icb_retry_count, &dst->icb_retry_count);
 		ISP_IOXPUT_8(isp, src->icb_retry_delay, &dst->icb_retry_delay);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXPUT_8(isp, src->icb_portname[i], &dst->icb_portname[i]);
 	}
 	ISP_IOXPUT_16(isp, src->icb_hardaddr, &dst->icb_hardaddr);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->icb_iqdevtype, &dst->icb_logintime);
 		ISP_IOXPUT_8(isp, src->icb_logintime, &dst->icb_iqdevtype);
 	} else {
 		ISP_IOXPUT_8(isp, src->icb_iqdevtype, &dst->icb_iqdevtype);
 		ISP_IOXPUT_8(isp, src->icb_logintime, &dst->icb_logintime);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXPUT_8(isp, src->icb_nodename[i], &dst->icb_nodename[i]);
 	}
 	ISP_IOXPUT_16(isp, src->icb_rqstout, &dst->icb_rqstout);
 	ISP_IOXPUT_16(isp, src->icb_rspnsin, &dst->icb_rspnsin);
 	ISP_IOXPUT_16(isp, src->icb_rqstqlen, &dst->icb_rqstqlen);
 	ISP_IOXPUT_16(isp, src->icb_rsltqlen, &dst->icb_rsltqlen);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->icb_rqstaddr[i], &dst->icb_rqstaddr[i]);
 	}
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->icb_respaddr[i], &dst->icb_respaddr[i]);
 	}
 	ISP_IOXPUT_16(isp, src->icb_lunenables, &dst->icb_lunenables);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->icb_ccnt, &dst->icb_icnt);
 		ISP_IOXPUT_8(isp, src->icb_icnt, &dst->icb_ccnt);
 	} else {
 		ISP_IOXPUT_8(isp, src->icb_ccnt, &dst->icb_ccnt);
 		ISP_IOXPUT_8(isp, src->icb_icnt, &dst->icb_icnt);
 	}
 	ISP_IOXPUT_16(isp, src->icb_lunetimeout, &dst->icb_lunetimeout);
 	ISP_IOXPUT_16(isp, src->icb_reserved1, &dst->icb_reserved1);
 	ISP_IOXPUT_16(isp, src->icb_xfwoptions, &dst->icb_xfwoptions);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->icb_racctimer, &dst->icb_idelaytimer);
 		ISP_IOXPUT_8(isp, src->icb_idelaytimer, &dst->icb_racctimer);
 	} else {
 		ISP_IOXPUT_8(isp, src->icb_racctimer, &dst->icb_racctimer);
 		ISP_IOXPUT_8(isp, src->icb_idelaytimer, &dst->icb_idelaytimer);
 	}
 	ISP_IOXPUT_16(isp, src->icb_zfwoptions, &dst->icb_zfwoptions);
 }
 
 void
 isp_put_icb_2400(ispsoftc_t *isp, isp_icb_2400_t *src, isp_icb_2400_t *dst)
 {
 	int i;
 	ISP_IOXPUT_16(isp, src->icb_version, &dst->icb_version);
 	ISP_IOXPUT_16(isp, src->icb_reserved0, &dst->icb_reserved0);
 	ISP_IOXPUT_16(isp, src->icb_maxfrmlen, &dst->icb_maxfrmlen);
 	ISP_IOXPUT_16(isp, src->icb_execthrottle, &dst->icb_execthrottle);
 	ISP_IOXPUT_16(isp, src->icb_xchgcnt, &dst->icb_xchgcnt);
 	ISP_IOXPUT_16(isp, src->icb_hardaddr, &dst->icb_hardaddr);
 	for (i = 0; i < 8; i++) {
 		ISP_IOXPUT_8(isp, src->icb_portname[i], &dst->icb_portname[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXPUT_8(isp, src->icb_nodename[i], &dst->icb_nodename[i]);
 	}
 	ISP_IOXPUT_16(isp, src->icb_rspnsin, &dst->icb_rspnsin);
 	ISP_IOXPUT_16(isp, src->icb_rqstout, &dst->icb_rqstout);
 	ISP_IOXPUT_16(isp, src->icb_retry_count, &dst->icb_retry_count);
 	ISP_IOXPUT_16(isp, src->icb_priout, &dst->icb_priout);
 	ISP_IOXPUT_16(isp, src->icb_rsltqlen, &dst->icb_rsltqlen);
 	ISP_IOXPUT_16(isp, src->icb_rqstqlen, &dst->icb_rqstqlen);
 	ISP_IOXPUT_16(isp, src->icb_ldn_nols, &dst->icb_ldn_nols);
 	ISP_IOXPUT_16(isp, src->icb_prqstqlen, &dst->icb_prqstqlen);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->icb_rqstaddr[i], &dst->icb_rqstaddr[i]);
 	}
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->icb_respaddr[i], &dst->icb_respaddr[i]);
 	}
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->icb_priaddr[i], &dst->icb_priaddr[i]);
 	}
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->icb_reserved1[i], &dst->icb_reserved1[i]);
 	}
 	ISP_IOXPUT_16(isp, src->icb_atio_in, &dst->icb_atio_in);
 	ISP_IOXPUT_16(isp, src->icb_atioqlen, &dst->icb_atioqlen);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->icb_atioqaddr[i], &dst->icb_atioqaddr[i]);
 	}
 	ISP_IOXPUT_16(isp, src->icb_idelaytimer, &dst->icb_idelaytimer);
 	ISP_IOXPUT_16(isp, src->icb_logintime, &dst->icb_logintime);
 	ISP_IOXPUT_32(isp, src->icb_fwoptions1, &dst->icb_fwoptions1);
 	ISP_IOXPUT_32(isp, src->icb_fwoptions2, &dst->icb_fwoptions2);
 	ISP_IOXPUT_32(isp, src->icb_fwoptions3, &dst->icb_fwoptions3);
 	for (i = 0; i < 12; i++) {
 		ISP_IOXPUT_16(isp, src->icb_reserved2[i], &dst->icb_reserved2[i]);
 	}
 }
 
 void
 isp_put_icb_2400_vpinfo(ispsoftc_t *isp, isp_icb_2400_vpinfo_t *src, isp_icb_2400_vpinfo_t *dst)
 {
 	ISP_IOXPUT_16(isp, src->vp_count, &dst->vp_count);
 	ISP_IOXPUT_16(isp, src->vp_global_options, &dst->vp_global_options);
 }
 
 void
 isp_put_vp_port_info(ispsoftc_t *isp, vp_port_info_t *src, vp_port_info_t *dst)
 {
 	int i;
 	ISP_IOXPUT_16(isp, src->vp_port_status, &dst->vp_port_status);
 	ISP_IOXPUT_8(isp, src->vp_port_options, &dst->vp_port_options);
 	ISP_IOXPUT_8(isp, src->vp_port_loopid, &dst->vp_port_loopid);
 	for (i = 0; i < 8; i++) {
 		ISP_IOXPUT_8(isp, src->vp_port_portname[i], &dst->vp_port_portname[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXPUT_8(isp, src->vp_port_nodename[i], &dst->vp_port_nodename[i]);
 	}
 	/* we never *put* portid_lo/portid_hi */
 }
 
 void
 isp_get_vp_port_info(ispsoftc_t *isp, vp_port_info_t *src, vp_port_info_t *dst)
 {
 	int i;
 	ISP_IOXGET_16(isp, &src->vp_port_status, dst->vp_port_status);
 	ISP_IOXGET_8(isp, &src->vp_port_options, dst->vp_port_options);
 	ISP_IOXGET_8(isp, &src->vp_port_loopid, dst->vp_port_loopid);
 	for (i = 0; i < ASIZE(src->vp_port_portname); i++) {
 		ISP_IOXGET_8(isp, &src->vp_port_portname[i], dst->vp_port_portname[i]);
 	}
 	for (i = 0; i < ASIZE(src->vp_port_nodename); i++) {
 		ISP_IOXGET_8(isp, &src->vp_port_nodename[i], dst->vp_port_nodename[i]);
 	}
 	ISP_IOXGET_16(isp, &src->vp_port_portid_lo, dst->vp_port_portid_lo);
 	ISP_IOXGET_16(isp, &src->vp_port_portid_hi, dst->vp_port_portid_hi);
 }
 
 void
 isp_put_vp_ctrl_info(ispsoftc_t *isp, vp_ctrl_info_t *src, vp_ctrl_info_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->vp_ctrl_hdr, &dst->vp_ctrl_hdr);
 	ISP_IOXPUT_32(isp, src->vp_ctrl_handle, &dst->vp_ctrl_handle);
 	ISP_IOXPUT_16(isp, src->vp_ctrl_index_fail, &dst->vp_ctrl_index_fail);
 	ISP_IOXPUT_16(isp, src->vp_ctrl_status, &dst->vp_ctrl_status);
 	ISP_IOXPUT_16(isp, src->vp_ctrl_command, &dst->vp_ctrl_command);
 	ISP_IOXPUT_16(isp, src->vp_ctrl_vp_count, &dst->vp_ctrl_vp_count);
 	for (i = 0; i < ASIZE(src->vp_ctrl_idmap); i++) {
 		ISP_IOXPUT_16(isp, src->vp_ctrl_idmap[i], &dst->vp_ctrl_idmap[i]);
 	}
 	for (i = 0; i < ASIZE(src->vp_ctrl_reserved); i++) {
 		ISP_IOXPUT_16(isp, src->vp_ctrl_reserved[i], &dst->vp_ctrl_reserved[i]);
 	}
 	ISP_IOXPUT_16(isp, src->vp_ctrl_fcf_index, &dst->vp_ctrl_fcf_index);
 }
 
 void
 isp_get_vp_ctrl_info(ispsoftc_t *isp, vp_ctrl_info_t *src, vp_ctrl_info_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->vp_ctrl_hdr, &dst->vp_ctrl_hdr);
 	ISP_IOXGET_32(isp, &src->vp_ctrl_handle, dst->vp_ctrl_handle);
 	ISP_IOXGET_16(isp, &src->vp_ctrl_index_fail, dst->vp_ctrl_index_fail);
 	ISP_IOXGET_16(isp, &src->vp_ctrl_status, dst->vp_ctrl_status);
 	ISP_IOXGET_16(isp, &src->vp_ctrl_command, dst->vp_ctrl_command);
 	ISP_IOXGET_16(isp, &src->vp_ctrl_vp_count, dst->vp_ctrl_vp_count);
 	for (i = 0; i < ASIZE(src->vp_ctrl_idmap); i++) {
 		ISP_IOXGET_16(isp, &src->vp_ctrl_idmap[i], dst->vp_ctrl_idmap[i]);
 	}
 	for (i = 0; i < ASIZE(src->vp_ctrl_reserved); i++) {
 		ISP_IOXGET_16(isp, &src->vp_ctrl_reserved[i], dst->vp_ctrl_reserved[i]);
 	}
 	ISP_IOXGET_16(isp, &src->vp_ctrl_fcf_index, dst->vp_ctrl_fcf_index);
 }
 
 void
 isp_put_vp_modify(ispsoftc_t *isp, vp_modify_t *src, vp_modify_t *dst)
 {
 	int i, j;
 	isp_put_hdr(isp, &src->vp_mod_hdr, &dst->vp_mod_hdr);
 	ISP_IOXPUT_32(isp, src->vp_mod_hdl, &dst->vp_mod_hdl);
 	ISP_IOXPUT_16(isp, src->vp_mod_reserved0, &dst->vp_mod_reserved0);
 	ISP_IOXPUT_16(isp, src->vp_mod_status, &dst->vp_mod_status);
 	ISP_IOXPUT_8(isp, src->vp_mod_cmd, &dst->vp_mod_cmd);
 	ISP_IOXPUT_8(isp, src->vp_mod_cnt, &dst->vp_mod_cnt);
 	ISP_IOXPUT_8(isp, src->vp_mod_idx0, &dst->vp_mod_idx0);
 	ISP_IOXPUT_8(isp, src->vp_mod_idx1, &dst->vp_mod_idx1);
 	for (i = 0; i < ASIZE(src->vp_mod_ports); i++) {
 		ISP_IOXPUT_8(isp, src->vp_mod_ports[i].options, &dst->vp_mod_ports[i].options);
 		ISP_IOXPUT_8(isp, src->vp_mod_ports[i].loopid, &dst->vp_mod_ports[i].loopid);
 		ISP_IOXPUT_16(isp, src->vp_mod_ports[i].reserved1, &dst->vp_mod_ports[i].reserved1);
 		for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwpn); j++) {
 			ISP_IOXPUT_8(isp, src->vp_mod_ports[i].wwpn[j], &dst->vp_mod_ports[i].wwpn[j]);
 		}
 		for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwnn); j++) {
 			ISP_IOXPUT_8(isp, src->vp_mod_ports[i].wwnn[j], &dst->vp_mod_ports[i].wwnn[j]);
 		}
 	}
 	for (i = 0; i < ASIZE(src->vp_mod_reserved2); i++) {
 		ISP_IOXPUT_8(isp, src->vp_mod_reserved2[i], &dst->vp_mod_reserved2[i]);
 	}
 }
 
 void
 isp_get_vp_modify(ispsoftc_t *isp, vp_modify_t *src, vp_modify_t *dst)
 {
 	int i, j;
 	isp_get_hdr(isp, &src->vp_mod_hdr, &dst->vp_mod_hdr);
 	ISP_IOXGET_32(isp, &src->vp_mod_hdl, dst->vp_mod_hdl);
 	ISP_IOXGET_16(isp, &src->vp_mod_reserved0, dst->vp_mod_reserved0);
 	ISP_IOXGET_16(isp, &src->vp_mod_status, dst->vp_mod_status);
 	ISP_IOXGET_8(isp, &src->vp_mod_cmd, dst->vp_mod_cmd);
 	ISP_IOXGET_8(isp, &src->vp_mod_cnt, dst->vp_mod_cnt);
 	ISP_IOXGET_8(isp, &src->vp_mod_idx0, dst->vp_mod_idx0);
 	ISP_IOXGET_8(isp, &src->vp_mod_idx1, dst->vp_mod_idx1);
 	for (i = 0; i < ASIZE(src->vp_mod_ports); i++) {
 		ISP_IOXGET_8(isp, &src->vp_mod_ports[i].options, dst->vp_mod_ports[i].options);
 		ISP_IOXGET_8(isp, &src->vp_mod_ports[i].loopid, dst->vp_mod_ports[i].loopid);
 		ISP_IOXGET_16(isp, &src->vp_mod_ports[i].reserved1, dst->vp_mod_ports[i].reserved1);
 		for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwpn); j++) {
 			ISP_IOXGET_8(isp, &src->vp_mod_ports[i].wwpn[j], dst->vp_mod_ports[i].wwpn[j]);
 		}
 		for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwnn); j++) {
 			ISP_IOXGET_8(isp, &src->vp_mod_ports[i].wwnn[j], dst->vp_mod_ports[i].wwnn[j]);
 		}
 	}
 	for (i = 0; i < ASIZE(src->vp_mod_reserved2); i++) {
 		ISP_IOXGET_8(isp, &src->vp_mod_reserved2[i], dst->vp_mod_reserved2[i]);
 	}
 }
 
 void
 isp_get_pdb_21xx(ispsoftc_t *isp, isp_pdb_21xx_t *src, isp_pdb_21xx_t *dst)
 {
 	int i;
 	ISP_IOXGET_16(isp, &src->pdb_options, dst->pdb_options);
         ISP_IOXGET_8(isp, &src->pdb_mstate, dst->pdb_mstate);
         ISP_IOXGET_8(isp, &src->pdb_sstate, dst->pdb_sstate);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_hardaddr_bits[i], dst->pdb_hardaddr_bits[i]);
 	}
 	for (i = 0; i < 4; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_portid_bits[i], dst->pdb_portid_bits[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_nodename[i], dst->pdb_nodename[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_portname[i], dst->pdb_portname[i]);
 	}
 	ISP_IOXGET_16(isp, &src->pdb_execthrottle, dst->pdb_execthrottle);
 	ISP_IOXGET_16(isp, &src->pdb_exec_count, dst->pdb_exec_count);
 	ISP_IOXGET_8(isp, &src->pdb_retry_count, dst->pdb_retry_count);
 	ISP_IOXGET_8(isp, &src->pdb_retry_delay, dst->pdb_retry_delay);
 	ISP_IOXGET_16(isp, &src->pdb_resalloc, dst->pdb_resalloc);
 	ISP_IOXGET_16(isp, &src->pdb_curalloc, dst->pdb_curalloc);
 	ISP_IOXGET_16(isp, &src->pdb_qhead, dst->pdb_qhead);
 	ISP_IOXGET_16(isp, &src->pdb_qtail, dst->pdb_qtail);
 	ISP_IOXGET_16(isp, &src->pdb_tl_next, dst->pdb_tl_next);
 	ISP_IOXGET_16(isp, &src->pdb_tl_last, dst->pdb_tl_last);
 	ISP_IOXGET_16(isp, &src->pdb_features, dst->pdb_features);
 	ISP_IOXGET_16(isp, &src->pdb_pconcurrnt, dst->pdb_pconcurrnt);
 	ISP_IOXGET_16(isp, &src->pdb_roi, dst->pdb_roi);
 	ISP_IOXGET_8(isp, &src->pdb_target, dst->pdb_target);
 	ISP_IOXGET_8(isp, &src->pdb_initiator, dst->pdb_initiator);
 	ISP_IOXGET_16(isp, &src->pdb_rdsiz, dst->pdb_rdsiz);
 	ISP_IOXGET_16(isp, &src->pdb_ncseq, dst->pdb_ncseq);
 	ISP_IOXGET_16(isp, &src->pdb_noseq, dst->pdb_noseq);
 	ISP_IOXGET_16(isp, &src->pdb_labrtflg, dst->pdb_labrtflg);
 	ISP_IOXGET_16(isp, &src->pdb_lstopflg, dst->pdb_lstopflg);
 	ISP_IOXGET_16(isp, &src->pdb_sqhead, dst->pdb_sqhead);
 	ISP_IOXGET_16(isp, &src->pdb_sqtail, dst->pdb_sqtail);
 	ISP_IOXGET_16(isp, &src->pdb_ptimer, dst->pdb_ptimer);
 	ISP_IOXGET_16(isp, &src->pdb_nxt_seqid, dst->pdb_nxt_seqid);
 	ISP_IOXGET_16(isp, &src->pdb_fcount, dst->pdb_fcount);
 	ISP_IOXGET_16(isp, &src->pdb_prli_len, dst->pdb_prli_len);
 	ISP_IOXGET_16(isp, &src->pdb_prli_svc0, dst->pdb_prli_svc0);
 	ISP_IOXGET_16(isp, &src->pdb_prli_svc3, dst->pdb_prli_svc3);
 	ISP_IOXGET_16(isp, &src->pdb_loopid, dst->pdb_loopid);
 	ISP_IOXGET_16(isp, &src->pdb_il_ptr, dst->pdb_il_ptr);
 	ISP_IOXGET_16(isp, &src->pdb_sl_ptr, dst->pdb_sl_ptr);
 }
 
 void
 isp_get_pdb_24xx(ispsoftc_t *isp, isp_pdb_24xx_t *src, isp_pdb_24xx_t *dst)
 {
 	int i;
 	ISP_IOXGET_16(isp, &src->pdb_flags, dst->pdb_flags);
         ISP_IOXGET_8(isp, &src->pdb_curstate, dst->pdb_curstate);
         ISP_IOXGET_8(isp, &src->pdb_laststate, dst->pdb_laststate);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_hardaddr_bits[i], dst->pdb_hardaddr_bits[i]);
 	}
 	for (i = 0; i < 4; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_portid_bits[i], dst->pdb_portid_bits[i]);
 	}
 	ISP_IOXGET_16(isp, &src->pdb_retry_timer, dst->pdb_retry_timer);
 	ISP_IOXGET_16(isp, &src->pdb_handle, dst->pdb_handle);
 	ISP_IOXGET_16(isp, &src->pdb_rcv_dsize, dst->pdb_rcv_dsize);
 	ISP_IOXGET_16(isp, &src->pdb_reserved0, dst->pdb_reserved0);
 	ISP_IOXGET_16(isp, &src->pdb_prli_svc0, dst->pdb_prli_svc0);
 	ISP_IOXGET_16(isp, &src->pdb_prli_svc3, dst->pdb_prli_svc3);
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_nodename[i], dst->pdb_nodename[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_portname[i], dst->pdb_portname[i]);
 	}
 	for (i = 0; i < 24; i++) {
 		ISP_IOXGET_8(isp, &src->pdb_reserved1[i], dst->pdb_reserved1[i]);
 	}
 }
 
 void
 isp_get_pnhle_21xx(ispsoftc_t *isp, isp_pnhle_21xx_t *src, isp_pnhle_21xx_t *dst)
 {
 
 	ISP_IOXGET_16(isp, &src->pnhle_port_id_lo, dst->pnhle_port_id_lo);
 	ISP_IOXGET_16(isp, &src->pnhle_port_id_hi_handle, dst->pnhle_port_id_hi_handle);
 }
 
 void
 isp_get_pnhle_23xx(ispsoftc_t *isp, isp_pnhle_23xx_t *src, isp_pnhle_23xx_t *dst)
 {
 
 	ISP_IOXGET_16(isp, &src->pnhle_port_id_lo, dst->pnhle_port_id_lo);
 	ISP_IOXGET_16(isp, &src->pnhle_port_id_hi, dst->pnhle_port_id_hi);
 	ISP_IOXGET_16(isp, &src->pnhle_handle, dst->pnhle_handle);
 }
 
 void
 isp_get_pnhle_24xx(ispsoftc_t *isp, isp_pnhle_24xx_t *src, isp_pnhle_24xx_t *dst)
 {
 
 	ISP_IOXGET_16(isp, &src->pnhle_port_id_lo, dst->pnhle_port_id_lo);
 	ISP_IOXGET_16(isp, &src->pnhle_port_id_hi, dst->pnhle_port_id_hi);
 	ISP_IOXGET_16(isp, &src->pnhle_handle, dst->pnhle_handle);
 	ISP_IOXGET_16(isp, &src->pnhle_reserved, dst->pnhle_reserved);
 }
 
 void
 isp_get_pnnle(ispsoftc_t *isp, isp_pnnle_t *src, isp_pnnle_t *dst)
 {
 	int i;
 
 	for (i = 0; i < 8; i++)
 		ISP_IOXGET_8(isp, &src->pnnle_name[i], dst->pnnle_name[i]);
 	ISP_IOXGET_16(isp, &src->pnnle_handle, dst->pnnle_handle);
 	ISP_IOXGET_16(isp, &src->pnnle_reserved, dst->pnnle_reserved);
 }
 
 /*
  * PLOGI/LOGO IOCB canonicalization
  */
 
 void
 isp_get_plogx(ispsoftc_t *isp, isp_plogx_t *src, isp_plogx_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->plogx_header, &dst->plogx_header);
 	ISP_IOXGET_32(isp, &src->plogx_handle, dst->plogx_handle);
 	ISP_IOXGET_16(isp, &src->plogx_status, dst->plogx_status);
 	ISP_IOXGET_16(isp, &src->plogx_nphdl, dst->plogx_nphdl);
 	ISP_IOXGET_16(isp, &src->plogx_flags, dst->plogx_flags);
 	ISP_IOXGET_16(isp, &src->plogx_vphdl, dst->plogx_vphdl);
 	ISP_IOXGET_16(isp, &src->plogx_portlo, dst->plogx_portlo);
 	ISP_IOXGET_16(isp, &src->plogx_rspsz_porthi, dst->plogx_rspsz_porthi);
 	for (i = 0; i < 11; i++) {
 		ISP_IOXGET_16(isp, &src->plogx_ioparm[i].lo16, dst->plogx_ioparm[i].lo16);
 		ISP_IOXGET_16(isp, &src->plogx_ioparm[i].hi16, dst->plogx_ioparm[i].hi16);
 	}
 }
 
 void
 isp_put_plogx(ispsoftc_t *isp, isp_plogx_t *src, isp_plogx_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->plogx_header, &dst->plogx_header);
 	ISP_IOXPUT_32(isp, src->plogx_handle, &dst->plogx_handle);
 	ISP_IOXPUT_16(isp, src->plogx_status, &dst->plogx_status);
 	ISP_IOXPUT_16(isp, src->plogx_nphdl, &dst->plogx_nphdl);
 	ISP_IOXPUT_16(isp, src->plogx_flags, &dst->plogx_flags);
 	ISP_IOXPUT_16(isp, src->plogx_vphdl, &dst->plogx_vphdl);
 	ISP_IOXPUT_16(isp, src->plogx_portlo, &dst->plogx_portlo);
 	ISP_IOXPUT_16(isp, src->plogx_rspsz_porthi, &dst->plogx_rspsz_porthi);
 	for (i = 0; i < 11; i++) {
 		ISP_IOXPUT_16(isp, src->plogx_ioparm[i].lo16, &dst->plogx_ioparm[i].lo16);
 		ISP_IOXPUT_16(isp, src->plogx_ioparm[i].hi16, &dst->plogx_ioparm[i].hi16);
 	}
 }
 
 /*
  * Report ID canonicalization
  */
 void
 isp_get_ridacq(ispsoftc_t *isp, isp_ridacq_t *src, isp_ridacq_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->ridacq_hdr, &dst->ridacq_hdr);
 	ISP_IOXGET_32(isp, &src->ridacq_handle, dst->ridacq_handle);
+	ISP_IOXGET_8(isp, &src->ridacq_vp_acquired, dst->ridacq_vp_acquired);
+	ISP_IOXGET_8(isp, &src->ridacq_vp_setup, dst->ridacq_vp_setup);
+	ISP_IOXGET_8(isp, &src->ridacq_vp_index, dst->ridacq_vp_index);
+	ISP_IOXGET_8(isp, &src->ridacq_vp_status, dst->ridacq_vp_status);
 	ISP_IOXGET_16(isp, &src->ridacq_vp_port_lo, dst->ridacq_vp_port_lo);
 	ISP_IOXGET_8(isp, &src->ridacq_vp_port_hi, dst->ridacq_vp_port_hi);
 	ISP_IOXGET_8(isp, &src->ridacq_format, dst->ridacq_format);
 	for (i = 0; i < sizeof (src->ridacq_map) / sizeof (src->ridacq_map[0]); i++) {
 		ISP_IOXGET_16(isp, &src->ridacq_map[i], dst->ridacq_map[i]);
 	}
 	for (i = 0; i < sizeof (src->ridacq_reserved1) / sizeof (src->ridacq_reserved1[0]); i++) {
 		ISP_IOXGET_16(isp, &src->ridacq_reserved1[i], dst->ridacq_reserved1[i]);
-	}
-	if (dst->ridacq_format == 0) {
-		ISP_IOXGET_8(isp, &src->un.type0.ridacq_vp_acquired, dst->un.type0.ridacq_vp_acquired);
-		ISP_IOXGET_8(isp, &src->un.type0.ridacq_vp_setup, dst->un.type0.ridacq_vp_setup);
-		ISP_IOXGET_16(isp, &src->un.type0.ridacq_reserved0, dst->un.type0.ridacq_reserved0);
-	} else if (dst->ridacq_format == 1) {
-		ISP_IOXGET_16(isp, &src->un.type1.ridacq_vp_count, dst->un.type1.ridacq_vp_count);
-		ISP_IOXGET_8(isp, &src->un.type1.ridacq_vp_index, dst->un.type1.ridacq_vp_index);
-		ISP_IOXGET_8(isp, &src->un.type1.ridacq_vp_status, dst->un.type1.ridacq_vp_status);
-	} else {
-		ISP_MEMZERO(&dst->un, sizeof (dst->un));
 	}
 }
 
 
 /*
  * CT Passthru canonicalization
  */
 void
 isp_get_ct_pt(ispsoftc_t *isp, isp_ct_pt_t *src, isp_ct_pt_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->ctp_header, &dst->ctp_header);
 	ISP_IOXGET_32(isp, &src->ctp_handle, dst->ctp_handle);
 	ISP_IOXGET_16(isp, &src->ctp_status, dst->ctp_status);
 	ISP_IOXGET_16(isp, &src->ctp_nphdl, dst->ctp_nphdl);
 	ISP_IOXGET_16(isp, &src->ctp_cmd_cnt, dst->ctp_cmd_cnt);
 	ISP_IOXGET_8(isp, &src->ctp_vpidx, dst->ctp_vpidx);
 	ISP_IOXGET_8(isp, &src->ctp_reserved0, dst->ctp_reserved0);
 	ISP_IOXGET_16(isp, &src->ctp_time, dst->ctp_time);
 	ISP_IOXGET_16(isp, &src->ctp_reserved1, dst->ctp_reserved1);
 	ISP_IOXGET_16(isp, &src->ctp_rsp_cnt, dst->ctp_rsp_cnt);
 	for (i = 0; i < 5; i++) {
 		ISP_IOXGET_16(isp, &src->ctp_reserved2[i], dst->ctp_reserved2[i]);
 	}
 	ISP_IOXGET_32(isp, &src->ctp_rsp_bcnt, dst->ctp_rsp_bcnt);
 	ISP_IOXGET_32(isp, &src->ctp_cmd_bcnt, dst->ctp_cmd_bcnt);
 	for (i = 0; i < 2; i++) {
 		ISP_IOXGET_32(isp, &src->ctp_dataseg[i].ds_base, dst->ctp_dataseg[i].ds_base);
 		ISP_IOXGET_32(isp, &src->ctp_dataseg[i].ds_basehi, dst->ctp_dataseg[i].ds_basehi);
 		ISP_IOXGET_32(isp, &src->ctp_dataseg[i].ds_count, dst->ctp_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_get_ms(ispsoftc_t *isp, isp_ms_t *src, isp_ms_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->ms_header, &dst->ms_header);
 	ISP_IOXGET_32(isp, &src->ms_handle, dst->ms_handle);
 	ISP_IOXGET_16(isp, &src->ms_nphdl, dst->ms_nphdl);
 	ISP_IOXGET_16(isp, &src->ms_status, dst->ms_status);
 	ISP_IOXGET_16(isp, &src->ms_flags, dst->ms_flags);
 	ISP_IOXGET_16(isp, &src->ms_reserved1, dst->ms_reserved1);
 	ISP_IOXGET_16(isp, &src->ms_time, dst->ms_time);
 	ISP_IOXGET_16(isp, &src->ms_cmd_cnt, dst->ms_cmd_cnt);
 	ISP_IOXGET_16(isp, &src->ms_tot_cnt, dst->ms_tot_cnt);
 	ISP_IOXGET_8(isp, &src->ms_type, dst->ms_type);
 	ISP_IOXGET_8(isp, &src->ms_r_ctl, dst->ms_r_ctl);
 	ISP_IOXGET_16(isp, &src->ms_rxid, dst->ms_rxid);
 	ISP_IOXGET_16(isp, &src->ms_reserved2, dst->ms_reserved2);
 	ISP_IOXGET_32(isp, &src->ms_rsp_bcnt, dst->ms_rsp_bcnt);
 	ISP_IOXGET_32(isp, &src->ms_cmd_bcnt, dst->ms_cmd_bcnt);
 	for (i = 0; i < 2; i++) {
 		ISP_IOXGET_32(isp, &src->ms_dataseg[i].ds_base, dst->ms_dataseg[i].ds_base);
 		ISP_IOXGET_32(isp, &src->ms_dataseg[i].ds_basehi, dst->ms_dataseg[i].ds_basehi);
 		ISP_IOXGET_32(isp, &src->ms_dataseg[i].ds_count, dst->ms_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_ct_pt(ispsoftc_t *isp, isp_ct_pt_t *src, isp_ct_pt_t *dst)
 {
 	int i;
 
 	isp_put_hdr(isp, &src->ctp_header, &dst->ctp_header);
 	ISP_IOXPUT_32(isp, src->ctp_handle, &dst->ctp_handle);
 	ISP_IOXPUT_16(isp, src->ctp_status, &dst->ctp_status);
 	ISP_IOXPUT_16(isp, src->ctp_nphdl, &dst->ctp_nphdl);
 	ISP_IOXPUT_16(isp, src->ctp_cmd_cnt, &dst->ctp_cmd_cnt);
 	ISP_IOXPUT_8(isp, src->ctp_vpidx, &dst->ctp_vpidx);
 	ISP_IOXPUT_8(isp, src->ctp_reserved0, &dst->ctp_reserved0);
 	ISP_IOXPUT_16(isp, src->ctp_time, &dst->ctp_time);
 	ISP_IOXPUT_16(isp, src->ctp_reserved1, &dst->ctp_reserved1);
 	ISP_IOXPUT_16(isp, src->ctp_rsp_cnt, &dst->ctp_rsp_cnt);
 	for (i = 0; i < 5; i++) {
 		ISP_IOXPUT_16(isp, src->ctp_reserved2[i], &dst->ctp_reserved2[i]);
 	}
 	ISP_IOXPUT_32(isp, src->ctp_rsp_bcnt, &dst->ctp_rsp_bcnt);
 	ISP_IOXPUT_32(isp, src->ctp_cmd_bcnt, &dst->ctp_cmd_bcnt);
 	for (i = 0; i < 2; i++) {
 		ISP_IOXPUT_32(isp, src->ctp_dataseg[i].ds_base, &dst->ctp_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->ctp_dataseg[i].ds_basehi, &dst->ctp_dataseg[i].ds_basehi);
 		ISP_IOXPUT_32(isp, src->ctp_dataseg[i].ds_count, &dst->ctp_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_ms(ispsoftc_t *isp, isp_ms_t *src, isp_ms_t *dst)
 {
 	int i;
 
 	isp_put_hdr(isp, &src->ms_header, &dst->ms_header);
 	ISP_IOXPUT_32(isp, src->ms_handle, &dst->ms_handle);
 	ISP_IOXPUT_16(isp, src->ms_nphdl, &dst->ms_nphdl);
 	ISP_IOXPUT_16(isp, src->ms_status, &dst->ms_status);
 	ISP_IOXPUT_16(isp, src->ms_flags, &dst->ms_flags);
 	ISP_IOXPUT_16(isp, src->ms_reserved1, &dst->ms_reserved1);
 	ISP_IOXPUT_16(isp, src->ms_time, &dst->ms_time);
 	ISP_IOXPUT_16(isp, src->ms_cmd_cnt, &dst->ms_cmd_cnt);
 	ISP_IOXPUT_16(isp, src->ms_tot_cnt, &dst->ms_tot_cnt);
 	ISP_IOXPUT_8(isp, src->ms_type, &dst->ms_type);
 	ISP_IOXPUT_8(isp, src->ms_r_ctl, &dst->ms_r_ctl);
 	ISP_IOXPUT_16(isp, src->ms_rxid, &dst->ms_rxid);
 	ISP_IOXPUT_16(isp, src->ms_reserved2, &dst->ms_reserved2);
 	ISP_IOXPUT_32(isp, src->ms_rsp_bcnt, &dst->ms_rsp_bcnt);
 	ISP_IOXPUT_32(isp, src->ms_cmd_bcnt, &dst->ms_cmd_bcnt);
 	for (i = 0; i < 2; i++) {
 		ISP_IOXPUT_32(isp, src->ms_dataseg[i].ds_base, &dst->ms_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->ms_dataseg[i].ds_basehi, &dst->ms_dataseg[i].ds_basehi);
 		ISP_IOXPUT_32(isp, src->ms_dataseg[i].ds_count, &dst->ms_dataseg[i].ds_count);
 	}
 }
 
 /*
  * Generic SNS request - not particularly useful since the per-command data
  * isn't always 16 bit words.
  */
 void
 isp_put_sns_request(ispsoftc_t *isp, sns_screq_t *src, sns_screq_t *dst)
 {
 	int i, nw = (int) src->snscb_sblen;
 	ISP_IOXPUT_16(isp, src->snscb_rblen, &dst->snscb_rblen);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->snscb_addr[i], &dst->snscb_addr[i]);
 	}
 	ISP_IOXPUT_16(isp, src->snscb_sblen, &dst->snscb_sblen);
 	for (i = 0; i < nw; i++) {
 		ISP_IOXPUT_16(isp, src->snscb_data[i], &dst->snscb_data[i]);
 	}
 }
 
 void
 isp_put_gid_ft_request(ispsoftc_t *isp, sns_gid_ft_req_t *src, sns_gid_ft_req_t *dst)
 {
 	ISP_IOXPUT_16(isp, src->snscb_rblen, &dst->snscb_rblen);
 	ISP_IOXPUT_16(isp, src->snscb_reserved0, &dst->snscb_reserved0);
 	ISP_IOXPUT_16(isp, src->snscb_addr[0], &dst->snscb_addr[0]);
 	ISP_IOXPUT_16(isp, src->snscb_addr[1], &dst->snscb_addr[1]);
 	ISP_IOXPUT_16(isp, src->snscb_addr[2], &dst->snscb_addr[2]);
 	ISP_IOXPUT_16(isp, src->snscb_addr[3], &dst->snscb_addr[3]);
 	ISP_IOXPUT_16(isp, src->snscb_sblen, &dst->snscb_sblen);
 	ISP_IOXPUT_16(isp, src->snscb_reserved1, &dst->snscb_reserved1);
 	ISP_IOXPUT_16(isp, src->snscb_cmd, &dst->snscb_cmd);
 	ISP_IOXPUT_16(isp, src->snscb_mword_div_2, &dst->snscb_mword_div_2);
 	ISP_IOXPUT_32(isp, src->snscb_reserved3, &dst->snscb_reserved3);
 	ISP_IOXPUT_32(isp, src->snscb_fc4_type, &dst->snscb_fc4_type);
 }
 
 void
 isp_put_gxn_id_request(ispsoftc_t *isp, sns_gxn_id_req_t *src, sns_gxn_id_req_t *dst)
 {
 	ISP_IOXPUT_16(isp, src->snscb_rblen, &dst->snscb_rblen);
 	ISP_IOXPUT_16(isp, src->snscb_reserved0, &dst->snscb_reserved0);
 	ISP_IOXPUT_16(isp, src->snscb_addr[0], &dst->snscb_addr[0]);
 	ISP_IOXPUT_16(isp, src->snscb_addr[1], &dst->snscb_addr[1]);
 	ISP_IOXPUT_16(isp, src->snscb_addr[2], &dst->snscb_addr[2]);
 	ISP_IOXPUT_16(isp, src->snscb_addr[3], &dst->snscb_addr[3]);
 	ISP_IOXPUT_16(isp, src->snscb_sblen, &dst->snscb_sblen);
 	ISP_IOXPUT_16(isp, src->snscb_reserved1, &dst->snscb_reserved1);
 	ISP_IOXPUT_16(isp, src->snscb_cmd, &dst->snscb_cmd);
 	ISP_IOXPUT_16(isp, src->snscb_reserved2, &dst->snscb_reserved2);
 	ISP_IOXPUT_32(isp, src->snscb_reserved3, &dst->snscb_reserved3);
 	ISP_IOXPUT_32(isp, src->snscb_portid, &dst->snscb_portid);
 }
 
 /*
  * Generic SNS response - not particularly useful since the per-command data
  * isn't always 16 bit words.
  */
 void
 isp_get_sns_response(ispsoftc_t *isp, sns_scrsp_t *src, sns_scrsp_t *dst, int nwords)
 {
 	int i;
 	isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr);
 	ISP_IOXGET_8(isp, &src->snscb_port_type, dst->snscb_port_type);
 	for (i = 0; i < 3; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_port_id[i],
 		    dst->snscb_port_id[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_portname[i],
 		    dst->snscb_portname[i]);
 	}
 	for (i = 0; i < nwords; i++) {
 		ISP_IOXGET_16(isp, &src->snscb_data[i], dst->snscb_data[i]);
 	}
 }
 
 void
 isp_get_gid_ft_response(ispsoftc_t *isp, sns_gid_ft_rsp_t *src, sns_gid_ft_rsp_t *dst, int nwords)
 {
 	int i;
 	isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr);
 	for (i = 0; i < nwords; i++) {
 		int j;
 		ISP_IOXGET_8(isp, &src->snscb_ports[i].control, dst->snscb_ports[i].control);
 		for (j = 0; j < 3; j++) {
 			ISP_IOXGET_8(isp, &src->snscb_ports[i].portid[j], dst->snscb_ports[i].portid[j]);
 		}
 		if (dst->snscb_ports[i].control & 0x80) {
 			break;
 		}
 	}
 }
 
 void
 isp_get_gxn_id_response(ispsoftc_t *isp, sns_gxn_id_rsp_t *src, sns_gxn_id_rsp_t *dst)
 {
 	int i;
 	isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr);
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_wwn[i], dst->snscb_wwn[i]);
 	}
 }
 
 void
 isp_get_gff_id_response(ispsoftc_t *isp, sns_gff_id_rsp_t *src, sns_gff_id_rsp_t *dst)
 {
 	int i;
 	isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr);
 	for (i = 0; i < 32; i++) {
 		ISP_IOXGET_32(isp, &src->snscb_fc4_features[i], dst->snscb_fc4_features[i]);
 	}
 }
 
 void
 isp_get_ga_nxt_response(ispsoftc_t *isp, sns_ga_nxt_rsp_t *src, sns_ga_nxt_rsp_t *dst)
 {
 	int i;
 	isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr);
 	ISP_IOXGET_8(isp, &src->snscb_port_type, dst->snscb_port_type);
 	for (i = 0; i < 3; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_port_id[i], dst->snscb_port_id[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_portname[i], dst->snscb_portname[i]);
 	}
 	ISP_IOXGET_8(isp, &src->snscb_pnlen, dst->snscb_pnlen);
 	for (i = 0; i < 255; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_pname[i], dst->snscb_pname[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_nodename[i], dst->snscb_nodename[i]);
 	}
 	ISP_IOXGET_8(isp, &src->snscb_nnlen, dst->snscb_nnlen);
 	for (i = 0; i < 255; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_nname[i], dst->snscb_nname[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_ipassoc[i], dst->snscb_ipassoc[i]);
 	}
 	for (i = 0; i < 16; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_ipaddr[i], dst->snscb_ipaddr[i]);
 	}
 	for (i = 0; i < 4; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_svc_class[i], dst->snscb_svc_class[i]);
 	}
 	for (i = 0; i < 32; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_fc4_types[i], dst->snscb_fc4_types[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_fpname[i], dst->snscb_fpname[i]);
 	}
 	ISP_IOXGET_8(isp, &src->snscb_reserved, dst->snscb_reserved);
 	for (i = 0; i < 3; i++) {
 		ISP_IOXGET_8(isp, &src->snscb_hardaddr[i], dst->snscb_hardaddr[i]);
 	}
 }
 
 void
 isp_get_els(ispsoftc_t *isp, els_t *src, els_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->els_hdr, &dst->els_hdr);
 	ISP_IOXGET_32(isp, &src->els_handle, dst->els_handle);
 	ISP_IOXGET_16(isp, &src->els_status, dst->els_status);
 	ISP_IOXGET_16(isp, &src->els_nphdl, dst->els_nphdl);
 	ISP_IOXGET_16(isp, &src->els_xmit_dsd_count, dst->els_xmit_dsd_count);
 	ISP_IOXGET_8(isp, &src->els_vphdl, dst->els_vphdl);
 	ISP_IOXGET_8(isp, &src->els_sof, dst->els_sof);
 	ISP_IOXGET_32(isp, &src->els_rxid, dst->els_rxid);
 	ISP_IOXGET_16(isp, &src->els_recv_dsd_count, dst->els_recv_dsd_count);
 	ISP_IOXGET_8(isp, &src->els_opcode, dst->els_opcode);
 	ISP_IOXGET_8(isp, &src->els_reserved2, dst->els_reserved1);
 	ISP_IOXGET_8(isp, &src->els_did_lo, dst->els_did_lo);
 	ISP_IOXGET_8(isp, &src->els_did_mid, dst->els_did_mid);
 	ISP_IOXGET_8(isp, &src->els_did_hi, dst->els_did_hi);
 	ISP_IOXGET_8(isp, &src->els_reserved2, dst->els_reserved2);
 	ISP_IOXGET_16(isp, &src->els_reserved3, dst->els_reserved3);
 	ISP_IOXGET_16(isp, &src->els_ctl_flags, dst->els_ctl_flags);
 	ISP_IOXGET_32(isp, &src->els_bytecnt, dst->els_bytecnt);
 	ISP_IOXGET_32(isp, &src->els_subcode1, dst->els_subcode1);
 	ISP_IOXGET_32(isp, &src->els_subcode2, dst->els_subcode2);
 	for (i = 0; i < 20; i++) {
 		ISP_IOXGET_8(isp, &src->els_reserved4[i], dst->els_reserved4[i]);
 	}
 }
 
 void
 isp_put_els(ispsoftc_t *isp, els_t *src, els_t *dst)
 {
 	isp_put_hdr(isp, &src->els_hdr, &dst->els_hdr);
 	ISP_IOXPUT_32(isp, src->els_handle, &dst->els_handle);
 	ISP_IOXPUT_16(isp, src->els_status, &dst->els_status);
 	ISP_IOXPUT_16(isp, src->els_nphdl, &dst->els_nphdl);
 	ISP_IOXPUT_16(isp, src->els_xmit_dsd_count, &dst->els_xmit_dsd_count);
 	ISP_IOXPUT_8(isp, src->els_vphdl, &dst->els_vphdl);
 	ISP_IOXPUT_8(isp, src->els_sof, &dst->els_sof);
 	ISP_IOXPUT_32(isp, src->els_rxid, &dst->els_rxid);
 	ISP_IOXPUT_16(isp, src->els_recv_dsd_count, &dst->els_recv_dsd_count);
 	ISP_IOXPUT_8(isp, src->els_opcode, &dst->els_opcode);
 	ISP_IOXPUT_8(isp, src->els_reserved2, &dst->els_reserved1);
 	ISP_IOXPUT_8(isp, src->els_did_lo, &dst->els_did_lo);
 	ISP_IOXPUT_8(isp, src->els_did_mid, &dst->els_did_mid);
 	ISP_IOXPUT_8(isp, src->els_did_hi, &dst->els_did_hi);
 	ISP_IOXPUT_8(isp, src->els_reserved2, &dst->els_reserved2);
 	ISP_IOXPUT_16(isp, src->els_reserved3, &dst->els_reserved3);
 	ISP_IOXPUT_16(isp, src->els_ctl_flags, &dst->els_ctl_flags);
 	ISP_IOXPUT_32(isp, src->els_recv_bytecnt, &dst->els_recv_bytecnt);
 	ISP_IOXPUT_32(isp, src->els_xmit_bytecnt, &dst->els_xmit_bytecnt);
 	ISP_IOXPUT_32(isp, src->els_xmit_dsd_length, &dst->els_xmit_dsd_length);
 	ISP_IOXPUT_16(isp, src->els_xmit_dsd_a1500, &dst->els_xmit_dsd_a1500);
 	ISP_IOXPUT_16(isp, src->els_xmit_dsd_a3116, &dst->els_xmit_dsd_a3116);
 	ISP_IOXPUT_16(isp, src->els_xmit_dsd_a4732, &dst->els_xmit_dsd_a4732);
 	ISP_IOXPUT_16(isp, src->els_xmit_dsd_a6348, &dst->els_xmit_dsd_a6348);
 	ISP_IOXPUT_32(isp, src->els_recv_dsd_length, &dst->els_recv_dsd_length);
 	ISP_IOXPUT_16(isp, src->els_recv_dsd_a1500, &dst->els_recv_dsd_a1500);
 	ISP_IOXPUT_16(isp, src->els_recv_dsd_a3116, &dst->els_recv_dsd_a3116);
 	ISP_IOXPUT_16(isp, src->els_recv_dsd_a4732, &dst->els_recv_dsd_a4732);
 	ISP_IOXPUT_16(isp, src->els_recv_dsd_a6348, &dst->els_recv_dsd_a6348);
 }
 
 /*
  * FC Structure Canonicalization
  */
 
 void
 isp_get_fc_hdr(ispsoftc_t *isp, fc_hdr_t *src, fc_hdr_t *dst)
 {
         ISP_IOZGET_8(isp, &src->r_ctl, dst->r_ctl);
         ISP_IOZGET_8(isp, &src->d_id[0], dst->d_id[0]);
         ISP_IOZGET_8(isp, &src->d_id[1], dst->d_id[1]);
         ISP_IOZGET_8(isp, &src->d_id[2], dst->d_id[2]);
         ISP_IOZGET_8(isp, &src->cs_ctl, dst->cs_ctl);
         ISP_IOZGET_8(isp, &src->s_id[0], dst->s_id[0]);
         ISP_IOZGET_8(isp, &src->s_id[1], dst->s_id[1]);
         ISP_IOZGET_8(isp, &src->s_id[2], dst->s_id[2]);
         ISP_IOZGET_8(isp, &src->type, dst->type);
         ISP_IOZGET_8(isp, &src->f_ctl[0], dst->f_ctl[0]);
         ISP_IOZGET_8(isp, &src->f_ctl[1], dst->f_ctl[1]);
         ISP_IOZGET_8(isp, &src->f_ctl[2], dst->f_ctl[2]);
         ISP_IOZGET_8(isp, &src->seq_id, dst->seq_id);
         ISP_IOZGET_8(isp, &src->df_ctl, dst->df_ctl);
         ISP_IOZGET_16(isp, &src->seq_cnt, dst->seq_cnt);
         ISP_IOZGET_16(isp, &src->ox_id, dst->ox_id);
         ISP_IOZGET_16(isp, &src->rx_id, dst->rx_id);
         ISP_IOZGET_32(isp, &src->parameter, dst->parameter);
 }
 
 void
 isp_put_fc_hdr(ispsoftc_t *isp, fc_hdr_t *src, fc_hdr_t *dst)
 {
         ISP_IOZPUT_8(isp, src->r_ctl, &dst->r_ctl);
         ISP_IOZPUT_8(isp, src->d_id[0], &dst->d_id[0]);
         ISP_IOZPUT_8(isp, src->d_id[1], &dst->d_id[1]);
         ISP_IOZPUT_8(isp, src->d_id[2], &dst->d_id[2]);
         ISP_IOZPUT_8(isp, src->cs_ctl, &dst->cs_ctl);
         ISP_IOZPUT_8(isp, src->s_id[0], &dst->s_id[0]);
         ISP_IOZPUT_8(isp, src->s_id[1], &dst->s_id[1]);
         ISP_IOZPUT_8(isp, src->s_id[2], &dst->s_id[2]);
         ISP_IOZPUT_8(isp, src->type, &dst->type);
         ISP_IOZPUT_8(isp, src->f_ctl[0], &dst->f_ctl[0]);
         ISP_IOZPUT_8(isp, src->f_ctl[1], &dst->f_ctl[1]);
         ISP_IOZPUT_8(isp, src->f_ctl[2], &dst->f_ctl[2]);
         ISP_IOZPUT_8(isp, src->seq_id, &dst->seq_id);
         ISP_IOZPUT_8(isp, src->df_ctl, &dst->df_ctl);
         ISP_IOZPUT_16(isp, src->seq_cnt, &dst->seq_cnt);
         ISP_IOZPUT_16(isp, src->ox_id, &dst->ox_id);
         ISP_IOZPUT_16(isp, src->rx_id, &dst->rx_id);
         ISP_IOZPUT_32(isp, src->parameter, &dst->parameter);
 }
 
 void
 isp_get_fcp_cmnd_iu(ispsoftc_t *isp, fcp_cmnd_iu_t *src, fcp_cmnd_iu_t *dst)
 {
 	int i;
 
 	for (i = 0; i < 8; i++) {
 		ISP_IOZGET_8(isp, &src->fcp_cmnd_lun[i], dst->fcp_cmnd_lun[i]);
 	}
         ISP_IOZGET_8(isp, &src->fcp_cmnd_crn, dst->fcp_cmnd_crn);
         ISP_IOZGET_8(isp, &src->fcp_cmnd_task_attribute, dst->fcp_cmnd_task_attribute);
         ISP_IOZGET_8(isp, &src->fcp_cmnd_task_management, dst->fcp_cmnd_task_management);
         ISP_IOZGET_8(isp, &src->fcp_cmnd_alen_datadir, dst->fcp_cmnd_alen_datadir);
 	for (i = 0; i < 16; i++) {
 		ISP_IOZGET_8(isp, &src->cdb_dl.sf.fcp_cmnd_cdb[i], dst->cdb_dl.sf.fcp_cmnd_cdb[i]);
 	}
 	ISP_IOZGET_32(isp, &src->cdb_dl.sf.fcp_cmnd_dl, dst->cdb_dl.sf.fcp_cmnd_dl);
 }
 
 void
 isp_put_rft_id(ispsoftc_t *isp, rft_id_t *src, rft_id_t *dst)
 {
 	int i;
 	isp_put_ct_hdr(isp, &src->rftid_hdr, &dst->rftid_hdr);
 	ISP_IOZPUT_8(isp, src->rftid_reserved, &dst->rftid_reserved);
 	for (i = 0; i < 3; i++) {
 		ISP_IOZPUT_8(isp, src->rftid_portid[i], &dst->rftid_portid[i]);
 	}
 	for (i = 0; i < 8; i++) {
 		ISP_IOZPUT_32(isp, src->rftid_fc4types[i], &dst->rftid_fc4types[i]);
 	}
 }
 
 void
 isp_get_ct_hdr(ispsoftc_t *isp, ct_hdr_t *src, ct_hdr_t *dst)
 {
 	ISP_IOZGET_8(isp, &src->ct_revision, dst->ct_revision);
 	ISP_IOZGET_8(isp, &src->ct_in_id[0], dst->ct_in_id[0]);
 	ISP_IOZGET_8(isp, &src->ct_in_id[1], dst->ct_in_id[1]);
 	ISP_IOZGET_8(isp, &src->ct_in_id[2], dst->ct_in_id[2]);
 	ISP_IOZGET_8(isp, &src->ct_fcs_type, dst->ct_fcs_type);
 	ISP_IOZGET_8(isp, &src->ct_fcs_subtype, dst->ct_fcs_subtype);
 	ISP_IOZGET_8(isp, &src->ct_options, dst->ct_options);
 	ISP_IOZGET_8(isp, &src->ct_reserved0, dst->ct_reserved0);
 	ISP_IOZGET_16(isp, &src->ct_cmd_resp, dst->ct_cmd_resp);
 	ISP_IOZGET_16(isp, &src->ct_bcnt_resid, dst->ct_bcnt_resid);
 	ISP_IOZGET_8(isp, &src->ct_reserved1, dst->ct_reserved1);
 	ISP_IOZGET_8(isp, &src->ct_reason, dst->ct_reason);
 	ISP_IOZGET_8(isp, &src->ct_explanation, dst->ct_explanation);
 	ISP_IOZGET_8(isp, &src->ct_vunique, dst->ct_vunique);
 }
 
 void
 isp_put_ct_hdr(ispsoftc_t *isp, ct_hdr_t *src, ct_hdr_t *dst)
 {
 	ISP_IOZPUT_8(isp, src->ct_revision, &dst->ct_revision);
 	ISP_IOZPUT_8(isp, src->ct_in_id[0], &dst->ct_in_id[0]);
 	ISP_IOZPUT_8(isp, src->ct_in_id[1], &dst->ct_in_id[1]);
 	ISP_IOZPUT_8(isp, src->ct_in_id[2], &dst->ct_in_id[2]);
 	ISP_IOZPUT_8(isp, src->ct_fcs_type, &dst->ct_fcs_type);
 	ISP_IOZPUT_8(isp, src->ct_fcs_subtype, &dst->ct_fcs_subtype);
 	ISP_IOZPUT_8(isp, src->ct_options, &dst->ct_options);
 	ISP_IOZPUT_8(isp, src->ct_reserved0, &dst->ct_reserved0);
 	ISP_IOZPUT_16(isp, src->ct_cmd_resp, &dst->ct_cmd_resp);
 	ISP_IOZPUT_16(isp, src->ct_bcnt_resid, &dst->ct_bcnt_resid);
 	ISP_IOZPUT_8(isp, src->ct_reserved1, &dst->ct_reserved1);
 	ISP_IOZPUT_8(isp, src->ct_reason, &dst->ct_reason);
 	ISP_IOZPUT_8(isp, src->ct_explanation, &dst->ct_explanation);
 	ISP_IOZPUT_8(isp, src->ct_vunique, &dst->ct_vunique);
 }
 
 void
 isp_put_fcp_rsp_iu(ispsoftc_t *isp, fcp_rsp_iu_t *src, fcp_rsp_iu_t *dst)
 {
 	int i;
 	for (i = 0; i < ((sizeof (src->fcp_rsp_reserved))/(sizeof (src->fcp_rsp_reserved[0]))); i++) {
 		ISP_IOZPUT_8(isp, src->fcp_rsp_reserved[i], &dst->fcp_rsp_reserved[i]);
 	}
 	ISP_IOZPUT_16(isp, src->fcp_rsp_status_qualifier, &dst->fcp_rsp_status_qualifier);
 	ISP_IOZPUT_8(isp, src->fcp_rsp_bits, &dst->fcp_rsp_bits);
 	ISP_IOZPUT_8(isp, src->fcp_rsp_scsi_status, &dst->fcp_rsp_scsi_status);
 	ISP_IOZPUT_32(isp, src->fcp_rsp_resid, &dst->fcp_rsp_resid);
 	ISP_IOZPUT_32(isp, src->fcp_rsp_snslen, &dst->fcp_rsp_snslen);
 	ISP_IOZPUT_32(isp, src->fcp_rsp_rsplen, &dst->fcp_rsp_rsplen);
 }
 
 #ifdef	ISP_TARGET_MODE
 
 /*
  * Command shipping- finish off first queue entry and do dma mapping and
  * additional segments as needed.
  *
  * Called with the first queue entry mostly filled out.
  * Our job here is to finish that and add additional data
  * segments if needed.
  *
  * We used to do synthetic entries to split data and status
  * at this level, but that started getting too tricky.
  */
 int
 isp_send_tgt_cmd(ispsoftc_t *isp, void *fqe, void *segp, uint32_t nsegs, uint32_t totalcnt, isp_ddir_t ddir, void *snsptr, uint32_t snslen)
 {
 	uint8_t storage[QENTRY_LEN];
 	uint8_t type, nqe;
 	uint32_t seg, curseg, seglim, nxt, nxtnxt;
 	ispds_t *dsp = NULL;
 	ispds64_t *dsp64 = NULL;
 	void *qe0, *qe1;
 
 	qe0 = isp_getrqentry(isp);
 	if (qe0 == NULL) {
 		return (CMD_EAGAIN);
 	}
 	nxt = ISP_NXT_QENTRY(isp->isp_reqidx, RQUEST_QUEUE_LEN(isp));
 
 	type = ((isphdr_t *)fqe)->rqs_entry_type;
 	nqe = 1;
 	seglim = 0;
 
 	/*
 	 * If we have data to transmit, figure out how many segments can fit into the first entry.
 	 */
 	if (ddir != ISP_NOXFR) {
 		/*
 		 * First, figure out how many pieces of data to transfer and what kind and how many we can put into the first queue entry.
 		 */
 		switch (type) {
 		case RQSTYPE_CTIO:
 			dsp = ((ct_entry_t *)fqe)->ct_dataseg;
 			seglim = ISP_RQDSEG;
 			break;
 		case RQSTYPE_CTIO2:
 			dsp = ((ct2_entry_t *)fqe)->rsp.m0.u.ct_dataseg;
 			seglim = ISP_RQDSEG_T2;
 			break;
 		case RQSTYPE_CTIO3:
 			dsp64 = ((ct2_entry_t *)fqe)->rsp.m0.u.ct_dataseg64;
 			seglim = ISP_RQDSEG_T3;
 			break;
 		case RQSTYPE_CTIO7:
 			dsp64 = &((ct7_entry_t *)fqe)->rsp.m0.ds;
 			seglim = 1;
 			break;
 		default:
 			return (CMD_COMPLETE);
 		}
 	}
 
 	/*
 	 * First, fill out any of the data transfer stuff that fits
 	 * in the first queue entry.
 	 */
 	if (seglim > nsegs) {
 		seglim = nsegs;
 	}
 
 	for (seg = curseg = 0; curseg < seglim; curseg++) {
 		if (dsp64) {
 			XS_GET_DMA64_SEG(dsp64++, segp, seg++);
 		} else {
 			XS_GET_DMA_SEG(dsp++, segp, seg++);
 		}
 	}
 
 	/*
 	 * Second, start building additional continuation segments as needed.
 	 */
 	while (seg < nsegs) {
 		nxtnxt = ISP_NXT_QENTRY(nxt, RQUEST_QUEUE_LEN(isp));
 		if (nxtnxt == isp->isp_reqodx) {
 			isp->isp_reqodx = ISP_READ(isp, isp->isp_rqstoutrp);
 			if (nxtnxt == isp->isp_reqodx)
 				return (CMD_EAGAIN);
 		}
 		ISP_MEMZERO(storage, QENTRY_LEN);
 		qe1 = ISP_QUEUE_ENTRY(isp->isp_rquest, nxt);
 		nxt = nxtnxt;
 		if (dsp64) {
 			ispcontreq64_t *crq = (ispcontreq64_t *) storage;
 			seglim = ISP_CDSEG64;
 			crq->req_header.rqs_entry_type = RQSTYPE_A64_CONT;
 			crq->req_header.rqs_entry_count = 1;
 			dsp64 = crq->req_dataseg;
 		} else {
 			ispcontreq_t *crq = (ispcontreq_t *) storage;
 			seglim = ISP_CDSEG;
 			crq->req_header.rqs_entry_type = RQSTYPE_DATASEG;
 			crq->req_header.rqs_entry_count = 1;
 			dsp = crq->req_dataseg;
 		}
 		if (seg + seglim > nsegs) {
 			seglim = nsegs - seg;
 		}
 		for (curseg = 0; curseg < seglim; curseg++) {
 			if (dsp64) {
 				XS_GET_DMA64_SEG(dsp64++, segp, seg++);
 			} else {
 				XS_GET_DMA_SEG(dsp++, segp, seg++);
 			}
 		}
 		if (dsp64) {
 			isp_put_cont64_req(isp, (ispcontreq64_t *)storage, qe1);
 		} else {
 			isp_put_cont_req(isp, (ispcontreq_t *)storage, qe1);
 		}
 		if (isp->isp_dblev & ISP_LOGTDEBUG1) {
 			isp_print_bytes(isp, "additional queue entry", QENTRY_LEN, storage);
 		}
 		nqe++;
         }
 
 	/*
 	 * Third, not patch up the first queue entry with the number of segments
 	 * we actually are going to be transmitting. At the same time, handle
 	 * any mode 2 requests.
 	 */
 	((isphdr_t *)fqe)->rqs_entry_count = nqe;
 	switch (type) {
 	case RQSTYPE_CTIO:
 		((ct_entry_t *)fqe)->ct_seg_count = nsegs;
 		isp_put_ctio(isp, fqe, qe0);
 		break;
 	case RQSTYPE_CTIO2:
 	case RQSTYPE_CTIO3:
 		if (((ct2_entry_t *)fqe)->ct_flags & CT2_FLAG_MODE2) {
 			((ct2_entry_t *)fqe)->ct_seg_count = 1;
 		} else {
 			((ct2_entry_t *)fqe)->ct_seg_count = nsegs;
 		}
 		if (ISP_CAP_2KLOGIN(isp)) {
 			isp_put_ctio2e(isp, fqe, qe0);
 		} else {
 			isp_put_ctio2(isp, fqe, qe0);
 		}
 		break;
 	case RQSTYPE_CTIO7:
 		if (((ct7_entry_t *)fqe)->ct_flags & CT7_FLAG_MODE2) {
 			((ct7_entry_t *)fqe)->ct_seg_count = 1;
 		} else {
 			((ct7_entry_t *)fqe)->ct_seg_count = nsegs;
 		}
 		isp_put_ctio7(isp, fqe, qe0);
 		break;
 	default:
 		return (CMD_COMPLETE);
 	}
 	if (isp->isp_dblev & ISP_LOGTDEBUG1) {
 		isp_print_bytes(isp, "first queue entry", QENTRY_LEN, fqe);
 	}
 	ISP_ADD_REQUEST(isp, nxt);
 	return (CMD_QUEUED);
 }
 
 int
 isp_allocate_xs_tgt(ispsoftc_t *isp, void *xs, uint32_t *handlep)
 {
 	isp_hdl_t *hdp;
 
 	hdp = isp->isp_tgtfree;
 	if (hdp == NULL) {
 		return (-1);
 	}
 	isp->isp_tgtfree = hdp->cmd;
 	hdp->cmd = xs;
 	hdp->handle = (hdp - isp->isp_tgtlist);
 	hdp->handle |= (ISP_HANDLE_TARGET << ISP_HANDLE_USAGE_SHIFT);
 	/*
 	 * Target handles for SCSI cards are only 16 bits, so
 	 * sequence number protection will be ommitted.
 	 */
 	if (IS_FC(isp)) {
 		hdp->handle |= (isp->isp_seqno++ << ISP_HANDLE_SEQ_SHIFT);
 	}
 	*handlep = hdp->handle;
 	return (0);
 }
 
 void *
 isp_find_xs_tgt(ispsoftc_t *isp, uint32_t handle)
 {
 	if (!ISP_VALID_TGT_HANDLE(isp, handle)) {
 		isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle);
 		return (NULL);
 	}
 	return (isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)].cmd);
 }
 
 uint32_t
 isp_find_tgt_handle(ispsoftc_t *isp, void *xs)
 {
 	uint32_t i, foundhdl = ISP_HANDLE_FREE;
 
 	if (xs != NULL) {
 		for (i = 0; i < isp->isp_maxcmds; i++) {
 			if (isp->isp_tgtlist[i].cmd != xs) {
 				continue;
 			}
 			foundhdl = isp->isp_tgtlist[i].handle;
 			break;
 		}
 	}
 	return (foundhdl);
 }
 
 void
 isp_destroy_tgt_handle(ispsoftc_t *isp, uint32_t handle)
 {
 	if (!ISP_VALID_TGT_HANDLE(isp, handle)) {
 		isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle);
 	} else {
 		isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)].handle = ISP_HANDLE_FREE;
 		isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)].cmd = isp->isp_tgtfree;
 		isp->isp_tgtfree = &isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)];
 	}
 }
 
 #endif
 
 /*
  * Find port database entries
  */
 int
 isp_find_pdb_by_wwn(ispsoftc_t *isp, int chan, uint64_t wwn, fcportdb_t **lptr)
 {
 	fcparam *fcp;
 	int i;
 
 	if (chan >= isp->isp_nchan)
 		return (0);
 	fcp = FCPARAM(isp, chan);
 	for (i = 0; i < MAX_FC_TARG; i++) {
 		fcportdb_t *lp = &fcp->portdb[i];
 
 		if (lp->state == FC_PORTDB_STATE_NIL)
 			continue;
 		if (lp->port_wwn == wwn) {
 			*lptr = lp;
 			return (1);
 		}
 	}
 	return (0);
 }
 
 #ifdef	ISP_TARGET_MODE
 
 int
 isp_find_pdb_by_handle(ispsoftc_t *isp, int chan, uint32_t handle, fcportdb_t **lptr)
 {
 	fcparam *fcp;
 	int i;
 
 	if (chan >= isp->isp_nchan)
 		return (0);
 	fcp = FCPARAM(isp, chan);
 	for (i = 0; i < MAX_FC_TARG; i++) {
 		fcportdb_t *lp = &fcp->portdb[i];
 
 		if (lp->state == FC_PORTDB_STATE_NIL)
 			continue;
 		if (lp->handle == handle) {
 			*lptr = lp;
 			return (1);
 		}
 	}
 	return (0);
 }
 
 int
 isp_find_pdb_by_sid(ispsoftc_t *isp, int chan, uint32_t sid, fcportdb_t **lptr)
 {
 	fcparam *fcp;
 	int i;
 
 	if (chan >= isp->isp_nchan)
 		return (0);
 	fcp = FCPARAM(isp, chan);
 	for (i = 0; i < MAX_FC_TARG; i++) {
 		fcportdb_t *lp = &fcp->portdb[i];
 
 		if (lp->state == FC_PORTDB_STATE_NIL)
 			continue;
 		if (lp->portid == sid) {
 			*lptr = lp;
 			return (1);
 		}
 	}
 	return (0);
 }
 
 void
 isp_find_chan_by_did(ispsoftc_t *isp, uint32_t did, uint16_t *cp)
 {
 	uint16_t chan;
 
 	*cp = ISP_NOCHAN;
 	for (chan = 0; chan < isp->isp_nchan; chan++) {
 		fcparam *fcp = FCPARAM(isp, chan);
 		if ((fcp->role & ISP_ROLE_TARGET) == 0 || fcp->isp_fwstate != FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) {
 			continue;
 		}
 		if (fcp->isp_portid == did) {
 			*cp = chan;
 			break;
 		}
 	}
 }
 
 /*
  * Add an initiator device to the port database
  */
 void
 isp_add_wwn_entry(ispsoftc_t *isp, int chan, uint64_t wwpn, uint64_t wwnn,
     uint16_t nphdl, uint32_t s_id, uint16_t prli_params)
 {
 	char buf[64];
 	fcparam *fcp;
 	fcportdb_t *lp;
 	int i, change;
 
 	fcp = FCPARAM(isp, chan);
 	if (nphdl >= MAX_NPORT_HANDLE) {
 		isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN, "Chan %d WWPN 0x%016llx "
 		    "PortID 0x%06x handle 0x%x -- bad handle",
 		    chan, (unsigned long long) wwpn, s_id, nphdl);
 		return;
 	}
 
 	/*
 	 * If valid record for requested handle already exists, update it
 	 * with new parameters.  Some cases of update can be suspicious,
 	 * so log them verbosely and dump the whole port database.
 	 */
 	if ((VALID_INI(wwpn) && isp_find_pdb_by_wwn(isp, chan, wwpn, &lp)) ||
 	    (s_id != PORT_NONE && isp_find_pdb_by_sid(isp, chan, s_id, &lp))) {
 		change = 0;
 		lp->new_portid = lp->portid;
 		lp->new_prli_word3 = lp->prli_word3;
 		if (s_id != PORT_NONE && lp->portid != s_id) {
 			if (lp->portid == PORT_NONE) {
 				isp_prt(isp, ISP_LOGTINFO,
 				    "Chan %d WWPN 0x%016llx handle 0x%x "
 				    "gets PortID 0x%06x",
 				    chan, (unsigned long long) lp->port_wwn,
 				    nphdl, s_id);
 			} else {
 				isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN,
 				    "Chan %d WWPN 0x%016llx handle 0x%x "
 				    "changes PortID 0x%06x to 0x%06x",
 				    chan, (unsigned long long) lp->port_wwn,
 				    nphdl, lp->portid, s_id);
 				if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN))
 					isp_dump_portdb(isp, chan);
 			}
 			lp->new_portid = s_id;
 			change++;
 		}
 		if (VALID_INI(wwpn) && lp->port_wwn != wwpn) {
 			if (!VALID_INI(lp->port_wwn)) {
 				isp_prt(isp, ISP_LOGTINFO,
 				    "Chan %d PortID 0x%06x handle 0x%x "
 				    "gets WWPN 0x%016llxx",
 				    chan, lp->portid, nphdl,
 				    (unsigned long long) wwpn);
 			} else if (lp->port_wwn != wwpn) {
 				isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN,
 				    "Chan %d PortID 0x%06x handle 0x%x "
 				    "changes WWPN 0x%016llx to 0x%016llx",
 				    chan, lp->portid, nphdl,
 				    (unsigned long long) lp->port_wwn,
 				    (unsigned long long) wwpn);
 				if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN))
 					isp_dump_portdb(isp, chan);
 			}
 			lp->port_wwn = wwpn;
 			change++;
 		}
 		if (VALID_INI(wwnn) && lp->node_wwn != wwnn) {
 			if (!VALID_INI(lp->node_wwn)) {
 				isp_prt(isp, ISP_LOGTINFO,
 				    "Chan %d PortID 0x%06x handle 0x%x "
 				    "gets WWNN 0x%016llxx",
 				    chan, lp->portid, nphdl,
 				    (unsigned long long) wwnn);
 			} else if (lp->port_wwn != wwnn) {
 				isp_prt(isp, ISP_LOGTINFO,
 				    "Chan %d PortID 0x%06x handle 0x%x "
 				    "changes WWNN 0x%016llx to 0x%016llx",
 				    chan, lp->portid, nphdl,
 				    (unsigned long long) lp->node_wwn,
 				    (unsigned long long) wwnn);
 			}
 			lp->node_wwn = wwnn;
 			change++;
 		}
 		if (prli_params != 0 && lp->prli_word3 != prli_params) {
 			isp_gen_role_str(buf, sizeof (buf), prli_params);
 			isp_prt(isp, ISP_LOGTINFO|ISP_LOGCONFIG,
 			    "Chan %d WWPN 0x%016llx PortID 0x%06x "
 			    "handle 0x%x changes PRLI Word 3 %s",
 			    chan, (unsigned long long) lp->port_wwn,
 			    lp->portid, lp->handle, buf);
 			lp->new_prli_word3 = prli_params;
 			change++;
 		}
 		if (lp->handle != nphdl) {
 			isp_prt(isp, ISP_LOGTINFO|ISP_LOGCONFIG,
 			    "Chan %d WWPN 0x%016llx PortID 0x%06x "
 			    "changes handle 0x%x to 0x%x",
 			    chan, (unsigned long long) lp->port_wwn,
 			    lp->portid, lp->handle, nphdl);
 			lp->handle = nphdl;
 			change++;
 		}
 		lp->state = FC_PORTDB_STATE_VALID;
 		if (change) {
 			isp_async(isp, ISPASYNC_DEV_CHANGED, chan, lp);
 			lp->portid = lp->new_portid;
 			lp->prli_word3 = lp->new_prli_word3;
 			lp->new_prli_word3 = 0;
 			lp->new_portid = 0;
 		} else {
 			isp_prt(isp, ISP_LOGTINFO,
 			    "Chan %d WWPN 0x%016llx PortID 0x%06x "
 			    "handle 0x%x reentered",
 			    chan, (unsigned long long) lp->port_wwn,
 			    lp->portid, lp->handle);
 			isp_async(isp, ISPASYNC_DEV_STAYED, chan, lp);
 		}
 		return;
 	}
 
 	/* Search for room to insert new record. */
 	for (i = 0; i < MAX_FC_TARG; i++) {
 		if (fcp->portdb[i].state == FC_PORTDB_STATE_NIL)
 			break;
 	}
 	if (i >= MAX_FC_TARG) {
 		isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN,
 		    "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x "
 		    "-- no room in port database",
 		    chan, (unsigned long long) wwpn, s_id, nphdl);
 		if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN))
 			isp_dump_portdb(isp, chan);
 		return;
 	}
 
 	/* Insert new record and mark it valid. */
 	lp = &fcp->portdb[i];
 	ISP_MEMZERO(lp, sizeof (fcportdb_t));
 	lp->handle = nphdl;
 	lp->portid = s_id;
 	lp->port_wwn = wwpn;
 	lp->node_wwn = wwnn;
 	lp->prli_word3 = (prli_params != 0) ? prli_params : PRLI_WD3_INITIATOR_FUNCTION;
 	lp->state = FC_PORTDB_STATE_VALID;
 
 	isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
 	isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx "
 	    "PortID 0x%06x handle 0x%x vtgt %d %s added", chan,
 	    (unsigned long long) wwpn, s_id, nphdl, i, buf);
 
 	/* Notify above levels about new port arrival. */
 	isp_async(isp, ISPASYNC_DEV_ARRIVED, chan, lp);
 }
 
 /*
  * Remove a target device to the port database
  */
 void
 isp_del_wwn_entry(ispsoftc_t *isp, int chan, uint64_t wwpn, uint16_t nphdl, uint32_t s_id)
 {
 	fcparam *fcp;
 	fcportdb_t *lp;
 
 	if (nphdl >= MAX_NPORT_HANDLE) {
 		isp_prt(isp, ISP_LOGWARN, "Chan %d WWPN 0x%016llx PortID 0x%06x bad handle 0x%x",
 		    chan, (unsigned long long) wwpn, s_id, nphdl);
 		return;
 	}
 
 	fcp = FCPARAM(isp, chan);
 	if (isp_find_pdb_by_handle(isp, chan, nphdl, &lp) == 0) {
 		isp_prt(isp, ISP_LOGWARN, "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x cannot be found to be deleted",
 		    chan, (unsigned long long) wwpn, s_id, nphdl);
 		isp_dump_portdb(isp, chan);
 		return;
 	}
 	isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x vtgt %d deleted",
 	    chan, (unsigned long long) lp->port_wwn, lp->portid, nphdl, FC_PORTDB_TGT(isp, chan, lp));
 	lp->state = FC_PORTDB_STATE_NIL;
 
 	/* Notify above levels about gone port. */
 	isp_async(isp, ISPASYNC_DEV_GONE, chan, lp);
 }
 
 void
 isp_del_all_wwn_entries(ispsoftc_t *isp, int chan)
 {
 	fcparam *fcp;
 	int i;
 
 	if (!IS_FC(isp)) {
 		return;
 	}
 
 	/*
 	 * Handle iterations over all channels via recursion
 	 */
 	if (chan == ISP_NOCHAN) {
 		for (chan = 0; chan < isp->isp_nchan; chan++) {
 			isp_del_all_wwn_entries(isp, chan);
 		}
 		return;
 	}
 
 	if (chan > isp->isp_nchan) {
 		return;
 	}
 
 	fcp = FCPARAM(isp, chan);
 	if (fcp == NULL) {
 		return;
 	}
 	for (i = 0; i < MAX_FC_TARG; i++) {
 		fcportdb_t *lp = &fcp->portdb[i];
 
 		if (lp->state != FC_PORTDB_STATE_NIL)
 			isp_del_wwn_entry(isp, chan, lp->port_wwn, lp->handle, lp->portid);
 	}
 }
 
 void
 isp_del_wwn_entries(ispsoftc_t *isp, isp_notify_t *mp)
 {
 	fcportdb_t *lp;
 
 	/*
 	 * Handle iterations over all channels via recursion
 	 */
 	if (mp->nt_channel == ISP_NOCHAN) {
 		for (mp->nt_channel = 0; mp->nt_channel < isp->isp_nchan; mp->nt_channel++) {
 			isp_del_wwn_entries(isp, mp);
 		}
 		mp->nt_channel = ISP_NOCHAN;
 		return;
 	}
 
 	/*
 	 * We have an entry which is only partially identified.
 	 *
 	 * It's only known by WWN, N-Port handle, or Port ID.
 	 * We need to find the actual entry so we can delete it.
 	 */
 	if (mp->nt_nphdl != NIL_HANDLE) {
 		if (isp_find_pdb_by_handle(isp, mp->nt_channel, mp->nt_nphdl, &lp)) {
 			isp_del_wwn_entry(isp, mp->nt_channel, lp->port_wwn, lp->handle, lp->portid);
 			return;
 		}
 	}
 	if (mp->nt_wwn != INI_ANY) {
 		if (isp_find_pdb_by_wwn(isp, mp->nt_channel, mp->nt_wwn, &lp)) {
 			isp_del_wwn_entry(isp, mp->nt_channel, lp->port_wwn, lp->handle, lp->portid);
 			return;
 		}
 	}
 	if (mp->nt_sid != PORT_ANY && mp->nt_sid != PORT_NONE) {
 		if (isp_find_pdb_by_sid(isp, mp->nt_channel, mp->nt_sid, &lp)) {
 			isp_del_wwn_entry(isp, mp->nt_channel, lp->port_wwn, lp->handle, lp->portid);
 			return;
 		}
 	}
 	isp_prt(isp, ISP_LOGWARN, "Chan %d unable to find entry to delete WWPN 0x%016jx PortID 0x%06x handle 0x%x",
 	    mp->nt_channel, mp->nt_wwn, mp->nt_sid, mp->nt_nphdl);
 }
 
 void
 isp_put_atio(ispsoftc_t *isp, at_entry_t *src, at_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->at_header, &dst->at_header);
 	ISP_IOXPUT_16(isp, src->at_reserved, &dst->at_reserved);
 	ISP_IOXPUT_16(isp, src->at_handle, &dst->at_handle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->at_lun, &dst->at_iid);
 		ISP_IOXPUT_8(isp, src->at_iid, &dst->at_lun);
 		ISP_IOXPUT_8(isp, src->at_cdblen, &dst->at_tgt);
 		ISP_IOXPUT_8(isp, src->at_tgt, &dst->at_cdblen);
 		ISP_IOXPUT_8(isp, src->at_status, &dst->at_scsi_status);
 		ISP_IOXPUT_8(isp, src->at_scsi_status, &dst->at_status);
 		ISP_IOXPUT_8(isp, src->at_tag_val, &dst->at_tag_type);
 		ISP_IOXPUT_8(isp, src->at_tag_type, &dst->at_tag_val);
 	} else {
 		ISP_IOXPUT_8(isp, src->at_lun, &dst->at_lun);
 		ISP_IOXPUT_8(isp, src->at_iid, &dst->at_iid);
 		ISP_IOXPUT_8(isp, src->at_cdblen, &dst->at_cdblen);
 		ISP_IOXPUT_8(isp, src->at_tgt, &dst->at_tgt);
 		ISP_IOXPUT_8(isp, src->at_status, &dst->at_status);
 		ISP_IOXPUT_8(isp, src->at_scsi_status, &dst->at_scsi_status);
 		ISP_IOXPUT_8(isp, src->at_tag_val, &dst->at_tag_val);
 		ISP_IOXPUT_8(isp, src->at_tag_type, &dst->at_tag_type);
 	}
 	ISP_IOXPUT_32(isp, src->at_flags, &dst->at_flags);
 	for (i = 0; i < ATIO_CDBLEN; i++) {
 		ISP_IOXPUT_8(isp, src->at_cdb[i], &dst->at_cdb[i]);
 	}
 	for (i = 0; i < QLTM_SENSELEN; i++) {
 		ISP_IOXPUT_8(isp, src->at_sense[i], &dst->at_sense[i]);
 	}
 }
 
 void
 isp_get_atio(ispsoftc_t *isp, at_entry_t *src, at_entry_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->at_header, &dst->at_header);
 	ISP_IOXGET_16(isp, &src->at_reserved, dst->at_reserved);
 	ISP_IOXGET_16(isp, &src->at_handle, dst->at_handle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXGET_8(isp, &src->at_lun, dst->at_iid);
 		ISP_IOXGET_8(isp, &src->at_iid, dst->at_lun);
 		ISP_IOXGET_8(isp, &src->at_cdblen, dst->at_tgt);
 		ISP_IOXGET_8(isp, &src->at_tgt, dst->at_cdblen);
 		ISP_IOXGET_8(isp, &src->at_status, dst->at_scsi_status);
 		ISP_IOXGET_8(isp, &src->at_scsi_status, dst->at_status);
 		ISP_IOXGET_8(isp, &src->at_tag_val, dst->at_tag_type);
 		ISP_IOXGET_8(isp, &src->at_tag_type, dst->at_tag_val);
 	} else {
 		ISP_IOXGET_8(isp, &src->at_lun, dst->at_lun);
 		ISP_IOXGET_8(isp, &src->at_iid, dst->at_iid);
 		ISP_IOXGET_8(isp, &src->at_cdblen, dst->at_cdblen);
 		ISP_IOXGET_8(isp, &src->at_tgt, dst->at_tgt);
 		ISP_IOXGET_8(isp, &src->at_status, dst->at_status);
 		ISP_IOXGET_8(isp, &src->at_scsi_status, dst->at_scsi_status);
 		ISP_IOXGET_8(isp, &src->at_tag_val, dst->at_tag_val);
 		ISP_IOXGET_8(isp, &src->at_tag_type, dst->at_tag_type);
 	}
 	ISP_IOXGET_32(isp, &src->at_flags, dst->at_flags);
 	for (i = 0; i < ATIO_CDBLEN; i++) {
 		ISP_IOXGET_8(isp, &src->at_cdb[i], dst->at_cdb[i]);
 	}
 	for (i = 0; i < QLTM_SENSELEN; i++) {
 		ISP_IOXGET_8(isp, &src->at_sense[i], dst->at_sense[i]);
 	}
 }
 
 void
 isp_put_atio2(ispsoftc_t *isp, at2_entry_t *src, at2_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->at_header, &dst->at_header);
 	ISP_IOXPUT_32(isp, src->at_reserved, &dst->at_reserved);
 	ISP_IOXPUT_8(isp, src->at_lun, &dst->at_lun);
 	ISP_IOXPUT_8(isp, src->at_iid, &dst->at_iid);
 	ISP_IOXPUT_16(isp, src->at_rxid, &dst->at_rxid);
 	ISP_IOXPUT_16(isp, src->at_flags, &dst->at_flags);
 	ISP_IOXPUT_16(isp, src->at_status, &dst->at_status);
 	ISP_IOXPUT_8(isp, src->at_crn, &dst->at_crn);
 	ISP_IOXPUT_8(isp, src->at_taskcodes, &dst->at_taskcodes);
 	ISP_IOXPUT_8(isp, src->at_taskflags, &dst->at_taskflags);
 	ISP_IOXPUT_8(isp, src->at_execodes, &dst->at_execodes);
 	for (i = 0; i < ATIO2_CDBLEN; i++) {
 		ISP_IOXPUT_8(isp, src->at_cdb[i], &dst->at_cdb[i]);
 	}
 	ISP_IOXPUT_32(isp, src->at_datalen, &dst->at_datalen);
 	ISP_IOXPUT_16(isp, src->at_scclun, &dst->at_scclun);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->at_wwpn[i], &dst->at_wwpn[i]);
 	}
 	for (i = 0; i < 6; i++) {
 		ISP_IOXPUT_16(isp, src->at_reserved2[i], &dst->at_reserved2[i]);
 	}
 	ISP_IOXPUT_16(isp, src->at_oxid, &dst->at_oxid);
 }
 
 void
 isp_put_atio2e(ispsoftc_t *isp, at2e_entry_t *src, at2e_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->at_header, &dst->at_header);
 	ISP_IOXPUT_32(isp, src->at_reserved, &dst->at_reserved);
 	ISP_IOXPUT_16(isp, src->at_iid, &dst->at_iid);
 	ISP_IOXPUT_16(isp, src->at_rxid, &dst->at_rxid);
 	ISP_IOXPUT_16(isp, src->at_flags, &dst->at_flags);
 	ISP_IOXPUT_16(isp, src->at_status, &dst->at_status);
 	ISP_IOXPUT_8(isp, src->at_crn, &dst->at_crn);
 	ISP_IOXPUT_8(isp, src->at_taskcodes, &dst->at_taskcodes);
 	ISP_IOXPUT_8(isp, src->at_taskflags, &dst->at_taskflags);
 	ISP_IOXPUT_8(isp, src->at_execodes, &dst->at_execodes);
 	for (i = 0; i < ATIO2_CDBLEN; i++) {
 		ISP_IOXPUT_8(isp, src->at_cdb[i], &dst->at_cdb[i]);
 	}
 	ISP_IOXPUT_32(isp, src->at_datalen, &dst->at_datalen);
 	ISP_IOXPUT_16(isp, src->at_scclun, &dst->at_scclun);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXPUT_16(isp, src->at_wwpn[i], &dst->at_wwpn[i]);
 	}
 	for (i = 0; i < 6; i++) {
 		ISP_IOXPUT_16(isp, src->at_reserved2[i], &dst->at_reserved2[i]);
 	}
 	ISP_IOXPUT_16(isp, src->at_oxid, &dst->at_oxid);
 }
 
 void
 isp_get_atio2(ispsoftc_t *isp, at2_entry_t *src, at2_entry_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->at_header, &dst->at_header);
 	ISP_IOXGET_32(isp, &src->at_reserved, dst->at_reserved);
 	ISP_IOXGET_8(isp, &src->at_lun, dst->at_lun);
 	ISP_IOXGET_8(isp, &src->at_iid, dst->at_iid);
 	ISP_IOXGET_16(isp, &src->at_rxid, dst->at_rxid);
 	ISP_IOXGET_16(isp, &src->at_flags, dst->at_flags);
 	ISP_IOXGET_16(isp, &src->at_status, dst->at_status);
 	ISP_IOXGET_8(isp, &src->at_crn, dst->at_crn);
 	ISP_IOXGET_8(isp, &src->at_taskcodes, dst->at_taskcodes);
 	ISP_IOXGET_8(isp, &src->at_taskflags, dst->at_taskflags);
 	ISP_IOXGET_8(isp, &src->at_execodes, dst->at_execodes);
 	for (i = 0; i < ATIO2_CDBLEN; i++) {
 		ISP_IOXGET_8(isp, &src->at_cdb[i], dst->at_cdb[i]);
 	}
 	ISP_IOXGET_32(isp, &src->at_datalen, dst->at_datalen);
 	ISP_IOXGET_16(isp, &src->at_scclun, dst->at_scclun);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXGET_16(isp, &src->at_wwpn[i], dst->at_wwpn[i]);
 	}
 	for (i = 0; i < 6; i++) {
 		ISP_IOXGET_16(isp, &src->at_reserved2[i], dst->at_reserved2[i]);
 	}
 	ISP_IOXGET_16(isp, &src->at_oxid, dst->at_oxid);
 }
 
 void
 isp_get_atio2e(ispsoftc_t *isp, at2e_entry_t *src, at2e_entry_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->at_header, &dst->at_header);
 	ISP_IOXGET_32(isp, &src->at_reserved, dst->at_reserved);
 	ISP_IOXGET_16(isp, &src->at_iid, dst->at_iid);
 	ISP_IOXGET_16(isp, &src->at_rxid, dst->at_rxid);
 	ISP_IOXGET_16(isp, &src->at_flags, dst->at_flags);
 	ISP_IOXGET_16(isp, &src->at_status, dst->at_status);
 	ISP_IOXGET_8(isp, &src->at_crn, dst->at_crn);
 	ISP_IOXGET_8(isp, &src->at_taskcodes, dst->at_taskcodes);
 	ISP_IOXGET_8(isp, &src->at_taskflags, dst->at_taskflags);
 	ISP_IOXGET_8(isp, &src->at_execodes, dst->at_execodes);
 	for (i = 0; i < ATIO2_CDBLEN; i++) {
 		ISP_IOXGET_8(isp, &src->at_cdb[i], dst->at_cdb[i]);
 	}
 	ISP_IOXGET_32(isp, &src->at_datalen, dst->at_datalen);
 	ISP_IOXGET_16(isp, &src->at_scclun, dst->at_scclun);
 	for (i = 0; i < 4; i++) {
 		ISP_IOXGET_16(isp, &src->at_wwpn[i], dst->at_wwpn[i]);
 	}
 	for (i = 0; i < 6; i++) {
 		ISP_IOXGET_16(isp, &src->at_reserved2[i], dst->at_reserved2[i]);
 	}
 	ISP_IOXGET_16(isp, &src->at_oxid, dst->at_oxid);
 }
 
 void
 isp_get_atio7(ispsoftc_t *isp, at7_entry_t *src, at7_entry_t *dst)
 {
 	ISP_IOXGET_8(isp, &src->at_type, dst->at_type);
 	ISP_IOXGET_8(isp, &src->at_count, dst->at_count);
 	ISP_IOXGET_16(isp, &src->at_ta_len, dst->at_ta_len);
 	ISP_IOXGET_32(isp, &src->at_rxid, dst->at_rxid);
 	isp_get_fc_hdr(isp, &src->at_hdr, &dst->at_hdr);
 	isp_get_fcp_cmnd_iu(isp, &src->at_cmnd, &dst->at_cmnd);
 }
 
 void
 isp_put_ctio(ispsoftc_t *isp, ct_entry_t *src, ct_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXPUT_16(isp, src->ct_syshandle, &dst->ct_syshandle);
 	ISP_IOXPUT_16(isp, src->ct_fwhandle, &dst->ct_fwhandle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->ct_iid, &dst->ct_lun);
 		ISP_IOXPUT_8(isp, src->ct_lun, &dst->ct_iid);
 		ISP_IOXPUT_8(isp, src->ct_tgt, &dst->ct_reserved2);
 		ISP_IOXPUT_8(isp, src->ct_reserved2, &dst->ct_tgt);
 		ISP_IOXPUT_8(isp, src->ct_status, &dst->ct_scsi_status);
 		ISP_IOXPUT_8(isp, src->ct_scsi_status, &dst->ct_status);
 		ISP_IOXPUT_8(isp, src->ct_tag_type, &dst->ct_tag_val);
 		ISP_IOXPUT_8(isp, src->ct_tag_val, &dst->ct_tag_type);
 	} else {
 		ISP_IOXPUT_8(isp, src->ct_iid, &dst->ct_iid);
 		ISP_IOXPUT_8(isp, src->ct_lun, &dst->ct_lun);
 		ISP_IOXPUT_8(isp, src->ct_tgt, &dst->ct_tgt);
 		ISP_IOXPUT_8(isp, src->ct_reserved2, &dst->ct_reserved2);
 		ISP_IOXPUT_8(isp, src->ct_scsi_status,
 		    &dst->ct_scsi_status);
 		ISP_IOXPUT_8(isp, src->ct_status, &dst->ct_status);
 		ISP_IOXPUT_8(isp, src->ct_tag_type, &dst->ct_tag_type);
 		ISP_IOXPUT_8(isp, src->ct_tag_val, &dst->ct_tag_val);
 	}
 	ISP_IOXPUT_32(isp, src->ct_flags, &dst->ct_flags);
 	ISP_IOXPUT_32(isp, src->ct_xfrlen, &dst->ct_xfrlen);
 	ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid);
 	ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout);
 	ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count);
 	for (i = 0; i < ISP_RQDSEG; i++) {
 		ISP_IOXPUT_32(isp, src->ct_dataseg[i].ds_base, &dst->ct_dataseg[i].ds_base);
 		ISP_IOXPUT_32(isp, src->ct_dataseg[i].ds_count, &dst->ct_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_get_ctio(ispsoftc_t *isp, ct_entry_t *src, ct_entry_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXGET_16(isp, &src->ct_syshandle, dst->ct_syshandle);
 	ISP_IOXGET_16(isp, &src->ct_fwhandle, dst->ct_fwhandle);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXGET_8(isp, &src->ct_lun, dst->ct_iid);
 		ISP_IOXGET_8(isp, &src->ct_iid, dst->ct_lun);
 		ISP_IOXGET_8(isp, &src->ct_reserved2, dst->ct_tgt);
 		ISP_IOXGET_8(isp, &src->ct_tgt, dst->ct_reserved2);
 		ISP_IOXGET_8(isp, &src->ct_status, dst->ct_scsi_status);
 		ISP_IOXGET_8(isp, &src->ct_scsi_status, dst->ct_status);
 		ISP_IOXGET_8(isp, &src->ct_tag_val, dst->ct_tag_type);
 		ISP_IOXGET_8(isp, &src->ct_tag_type, dst->ct_tag_val);
 	} else {
 		ISP_IOXGET_8(isp, &src->ct_lun, dst->ct_lun);
 		ISP_IOXGET_8(isp, &src->ct_iid, dst->ct_iid);
 		ISP_IOXGET_8(isp, &src->ct_reserved2, dst->ct_reserved2);
 		ISP_IOXGET_8(isp, &src->ct_tgt, dst->ct_tgt);
 		ISP_IOXGET_8(isp, &src->ct_status, dst->ct_status);
 		ISP_IOXGET_8(isp, &src->ct_scsi_status, dst->ct_scsi_status);
 		ISP_IOXGET_8(isp, &src->ct_tag_val, dst->ct_tag_val);
 		ISP_IOXGET_8(isp, &src->ct_tag_type, dst->ct_tag_type);
 	}
 	ISP_IOXGET_32(isp, &src->ct_flags, dst->ct_flags);
 	ISP_IOXGET_32(isp, &src->ct_xfrlen, dst->ct_xfrlen);
 	ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid);
 	ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout);
 	ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count);
 	for (i = 0; i < ISP_RQDSEG; i++) {
 		ISP_IOXGET_32(isp, &src->ct_dataseg[i].ds_base, dst->ct_dataseg[i].ds_base);
 		ISP_IOXGET_32(isp, &src->ct_dataseg[i].ds_count, dst->ct_dataseg[i].ds_count);
 	}
 }
 
 void
 isp_put_ctio2(ispsoftc_t *isp, ct2_entry_t *src, ct2_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXPUT_32(isp, src->ct_syshandle, &dst->ct_syshandle);
 	ISP_IOXPUT_8(isp, src->ct_lun, &dst->ct_lun);
 	ISP_IOXPUT_8(isp, src->ct_iid, &dst->ct_iid);
 	ISP_IOXPUT_16(isp, src->ct_rxid, &dst->ct_rxid);
 	ISP_IOXPUT_16(isp, src->ct_flags, &dst->ct_flags);
 	ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout);
 	ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count);
 	ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid);
 	ISP_IOXPUT_32(isp, src->ct_reloff, &dst->ct_reloff);
 	if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) {
 		ISP_IOXPUT_32(isp, src->rsp.m0._reserved, &dst->rsp.m0._reserved);
 		ISP_IOXPUT_16(isp, src->rsp.m0._reserved2, &dst->rsp.m0._reserved2);
 		ISP_IOXPUT_16(isp, src->rsp.m0.ct_scsi_status, &dst->rsp.m0.ct_scsi_status);
 		ISP_IOXPUT_32(isp, src->rsp.m0.ct_xfrlen, &dst->rsp.m0.ct_xfrlen);
 		if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			for (i = 0; i < ISP_RQDSEG_T2; i++) {
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_base, &dst->rsp.m0.u.ct_dataseg[i].ds_base);
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_count, &dst->rsp.m0.u.ct_dataseg[i].ds_count);
 			}
 		} else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO3) {
 			for (i = 0; i < ISP_RQDSEG_T3; i++) {
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_base, &dst->rsp.m0.u.ct_dataseg64[i].ds_base);
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_basehi, &dst->rsp.m0.u.ct_dataseg64[i].ds_basehi);
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_count, &dst->rsp.m0.u.ct_dataseg64[i].ds_count);
 			}
 		} else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO4) {
 			ISP_IOXPUT_16(isp, src->rsp.m0.u.ct_dslist.ds_type, &dst->rsp.m0.u.ct_dslist.ds_type); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_segment,
 			    &dst->rsp.m0.u.ct_dslist.ds_segment);
 			ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_base, &dst->rsp.m0.u.ct_dslist.ds_base);
 		}
 	} else if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) {
 		ISP_IOXPUT_16(isp, src->rsp.m1._reserved, &dst->rsp.m1._reserved);
 		ISP_IOXPUT_16(isp, src->rsp.m1._reserved2, &dst->rsp.m1._reserved2);
 		ISP_IOXPUT_16(isp, src->rsp.m1.ct_senselen, &dst->rsp.m1.ct_senselen);
 		ISP_IOXPUT_16(isp, src->rsp.m1.ct_scsi_status, &dst->rsp.m1.ct_scsi_status);
 		ISP_IOXPUT_16(isp, src->rsp.m1.ct_resplen, &dst->rsp.m1.ct_resplen);
 		for (i = 0; i < MAXRESPLEN; i++) {
 			ISP_IOXPUT_8(isp, src->rsp.m1.ct_resp[i], &dst->rsp.m1.ct_resp[i]);
 		}
 	} else {
 		ISP_IOXPUT_32(isp, src->rsp.m2._reserved, &dst->rsp.m2._reserved);
 		ISP_IOXPUT_16(isp, src->rsp.m2._reserved2, &dst->rsp.m2._reserved2);
 		ISP_IOXPUT_16(isp, src->rsp.m2._reserved3, &dst->rsp.m2._reserved3);
 		ISP_IOXPUT_32(isp, src->rsp.m2.ct_datalen, &dst->rsp.m2.ct_datalen);
 		if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base);
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count);
 		} else {
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base);
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi);
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count);
 		}
 	}
 }
 
 void
 isp_put_ctio2e(ispsoftc_t *isp, ct2e_entry_t *src, ct2e_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXPUT_32(isp, src->ct_syshandle, &dst->ct_syshandle);
 	ISP_IOXPUT_16(isp, src->ct_iid, &dst->ct_iid);
 	ISP_IOXPUT_16(isp, src->ct_rxid, &dst->ct_rxid);
 	ISP_IOXPUT_16(isp, src->ct_flags, &dst->ct_flags);
 	ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout);
 	ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count);
 	ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid);
 	ISP_IOXPUT_32(isp, src->ct_reloff, &dst->ct_reloff);
 	if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) {
 		ISP_IOXPUT_32(isp, src->rsp.m0._reserved, &dst->rsp.m0._reserved);
 		ISP_IOXPUT_16(isp, src->rsp.m0._reserved2, &dst->rsp.m0._reserved2);
 		ISP_IOXPUT_16(isp, src->rsp.m0.ct_scsi_status, &dst->rsp.m0.ct_scsi_status);
 		ISP_IOXPUT_32(isp, src->rsp.m0.ct_xfrlen, &dst->rsp.m0.ct_xfrlen);
 		if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			for (i = 0; i < ISP_RQDSEG_T2; i++) {
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_base, &dst->rsp.m0.u.ct_dataseg[i].ds_base);
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_count, &dst->rsp.m0.u.ct_dataseg[i].ds_count);
 			}
 		} else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO3) {
 			for (i = 0; i < ISP_RQDSEG_T3; i++) {
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_base, &dst->rsp.m0.u.ct_dataseg64[i].ds_base);
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_basehi, &dst->rsp.m0.u.ct_dataseg64[i].ds_basehi);
 				ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_count, &dst->rsp.m0.u.ct_dataseg64[i].ds_count);
 			}
 		} else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO4) {
 			ISP_IOXPUT_16(isp, src->rsp.m0.u.ct_dslist.ds_type, &dst->rsp.m0.u.ct_dslist.ds_type);
 			ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_segment, &dst->rsp.m0.u.ct_dslist.ds_segment);
 			ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_base, &dst->rsp.m0.u.ct_dslist.ds_base);
 		}
 	} else if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) {
 		ISP_IOXPUT_16(isp, src->rsp.m1._reserved, &dst->rsp.m1._reserved);
 		ISP_IOXPUT_16(isp, src->rsp.m1._reserved2, &dst->rsp.m1._reserved2);
 		ISP_IOXPUT_16(isp, src->rsp.m1.ct_senselen, &dst->rsp.m1.ct_senselen);
 		ISP_IOXPUT_16(isp, src->rsp.m1.ct_scsi_status, &dst->rsp.m1.ct_scsi_status);
 		ISP_IOXPUT_16(isp, src->rsp.m1.ct_resplen, &dst->rsp.m1.ct_resplen);
 		for (i = 0; i < MAXRESPLEN; i++) {
 			ISP_IOXPUT_8(isp, src->rsp.m1.ct_resp[i], &dst->rsp.m1.ct_resp[i]);
 		}
 	} else {
 		ISP_IOXPUT_32(isp, src->rsp.m2._reserved, &dst->rsp.m2._reserved);
 		ISP_IOXPUT_16(isp, src->rsp.m2._reserved2, &dst->rsp.m2._reserved2);
 		ISP_IOXPUT_16(isp, src->rsp.m2._reserved3, &dst->rsp.m2._reserved3);
 		ISP_IOXPUT_32(isp, src->rsp.m2.ct_datalen, &dst->rsp.m2.ct_datalen);
 		if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base);
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count);
 		} else {
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base);
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi);
 			ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count);
 		}
 	}
 }
 
 void
 isp_put_ctio7(ispsoftc_t *isp, ct7_entry_t *src, ct7_entry_t *dst)
 {
 	int i;
 
 	isp_put_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXPUT_32(isp, src->ct_syshandle, &dst->ct_syshandle);
 	ISP_IOXPUT_16(isp, src->ct_nphdl, &dst->ct_nphdl);
 	ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout);
 	ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count);
 	ISP_IOXPUT_8(isp, src->ct_vpidx, &dst->ct_vpidx);
 	ISP_IOXPUT_8(isp, src->ct_xflags, &dst->ct_xflags);
 	ISP_IOXPUT_16(isp, src->ct_iid_lo, &dst->ct_iid_lo);
 	ISP_IOXPUT_8(isp, src->ct_iid_hi, &dst->ct_iid_hi);
 	ISP_IOXPUT_8(isp, src->ct_reserved, &dst->ct_reserved);
 	ISP_IOXPUT_32(isp, src->ct_rxid, &dst->ct_rxid);
 	ISP_IOXPUT_16(isp, src->ct_senselen, &dst->ct_senselen);
 	ISP_IOXPUT_16(isp, src->ct_flags, &dst->ct_flags);
 	ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid);
 	ISP_IOXPUT_16(isp, src->ct_oxid, &dst->ct_oxid);
 	ISP_IOXPUT_16(isp, src->ct_scsi_status, &dst->ct_scsi_status);
 	if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE0) {
 		ISP_IOXPUT_32(isp, src->rsp.m0.reloff, &dst->rsp.m0.reloff);
 		ISP_IOXPUT_32(isp, src->rsp.m0.reserved0, &dst->rsp.m0.reserved0);
 		ISP_IOXPUT_32(isp, src->rsp.m0.ct_xfrlen, &dst->rsp.m0.ct_xfrlen);
 		ISP_IOXPUT_32(isp, src->rsp.m0.reserved1, &dst->rsp.m0.reserved1);
 		ISP_IOXPUT_32(isp, src->rsp.m0.ds.ds_base, &dst->rsp.m0.ds.ds_base);
 		ISP_IOXPUT_32(isp, src->rsp.m0.ds.ds_basehi, &dst->rsp.m0.ds.ds_basehi);
 		ISP_IOXPUT_32(isp, src->rsp.m0.ds.ds_count, &dst->rsp.m0.ds.ds_count);
 	} else if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE1) {
 		uint32_t *a, *b;
 
 		ISP_IOXPUT_16(isp, src->rsp.m1.ct_resplen, &dst->rsp.m1.ct_resplen);
 		ISP_IOXPUT_16(isp, src->rsp.m1.reserved, &dst->rsp.m1.reserved);
 		a = (uint32_t *) src->rsp.m1.ct_resp;
 		b = (uint32_t *) dst->rsp.m1.ct_resp;
 		for (i = 0; i < (ASIZE(src->rsp.m1.ct_resp) >> 2); i++) {
 			*b++ = ISP_SWAP32(isp, *a++);
 		}
 	} else {
 		ISP_IOXPUT_32(isp, src->rsp.m2.reserved0, &dst->rsp.m2.reserved0);
 		ISP_IOXPUT_32(isp, src->rsp.m2.reserved1, &dst->rsp.m2.reserved1);
 		ISP_IOXPUT_32(isp, src->rsp.m2.ct_datalen, &dst->rsp.m2.ct_datalen);
 		ISP_IOXPUT_32(isp, src->rsp.m2.reserved2, &dst->rsp.m2.reserved2);
 		ISP_IOXPUT_32(isp, src->rsp.m2.ct_fcp_rsp_iudata.ds_base, &dst->rsp.m2.ct_fcp_rsp_iudata.ds_base);
 		ISP_IOXPUT_32(isp, src->rsp.m2.ct_fcp_rsp_iudata.ds_basehi, &dst->rsp.m2.ct_fcp_rsp_iudata.ds_basehi);
 		ISP_IOXPUT_32(isp, src->rsp.m2.ct_fcp_rsp_iudata.ds_count, &dst->rsp.m2.ct_fcp_rsp_iudata.ds_count);
 	}
 }
 
 
 void
 isp_get_ctio2(ispsoftc_t *isp, ct2_entry_t *src, ct2_entry_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXGET_32(isp, &src->ct_syshandle, dst->ct_syshandle);
 	ISP_IOXGET_8(isp, &src->ct_lun, dst->ct_lun);
 	ISP_IOXGET_8(isp, &src->ct_iid, dst->ct_iid);
 	ISP_IOXGET_16(isp, &src->ct_rxid, dst->ct_rxid);
 	ISP_IOXGET_16(isp, &src->ct_flags, dst->ct_flags);
 	ISP_IOXGET_16(isp, &src->ct_status, dst->ct_status);
 	ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout);
 	ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count);
 	ISP_IOXGET_32(isp, &src->ct_reloff, dst->ct_reloff);
 	ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid);
 	if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) {
 		ISP_IOXGET_32(isp, &src->rsp.m0._reserved, dst->rsp.m0._reserved);
 		ISP_IOXGET_16(isp, &src->rsp.m0._reserved2, dst->rsp.m0._reserved2);
 		ISP_IOXGET_16(isp, &src->rsp.m0.ct_scsi_status, dst->rsp.m0.ct_scsi_status);
 		ISP_IOXGET_32(isp, &src->rsp.m0.ct_xfrlen, dst->rsp.m0.ct_xfrlen);
 		if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			for (i = 0; i < ISP_RQDSEG_T2; i++) {
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_base, dst->rsp.m0.u.ct_dataseg[i].ds_base);
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_count, dst->rsp.m0.u.ct_dataseg[i].ds_count);
 			}
 		} else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO3) {
 			for (i = 0; i < ISP_RQDSEG_T3; i++) {
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_base, dst->rsp.m0.u.ct_dataseg64[i].ds_base);
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_basehi, dst->rsp.m0.u.ct_dataseg64[i].ds_basehi);
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_count, dst->rsp.m0.u.ct_dataseg64[i].ds_count);
 			}
 		} else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO4) {
 			ISP_IOXGET_16(isp, &src->rsp.m0.u.ct_dslist.ds_type, dst->rsp.m0.u.ct_dslist.ds_type);
 			ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_segment, dst->rsp.m0.u.ct_dslist.ds_segment);
 			ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_base, dst->rsp.m0.u.ct_dslist.ds_base);
 		}
 	} else if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) {
 		ISP_IOXGET_16(isp, &src->rsp.m1._reserved, dst->rsp.m1._reserved);
 		ISP_IOXGET_16(isp, &src->rsp.m1._reserved2, dst->rsp.m1._reserved2);
 		ISP_IOXGET_16(isp, &src->rsp.m1.ct_senselen, dst->rsp.m1.ct_senselen);
 		ISP_IOXGET_16(isp, &src->rsp.m1.ct_scsi_status, dst->rsp.m1.ct_scsi_status);
 		ISP_IOXGET_16(isp, &src->rsp.m1.ct_resplen, dst->rsp.m1.ct_resplen);
 		for (i = 0; i < MAXRESPLEN; i++) {
 			ISP_IOXGET_8(isp, &src->rsp.m1.ct_resp[i], dst->rsp.m1.ct_resp[i]);
 		}
 	} else {
 		ISP_IOXGET_32(isp, &src->rsp.m2._reserved, dst->rsp.m2._reserved);
 		ISP_IOXGET_16(isp, &src->rsp.m2._reserved2, dst->rsp.m2._reserved2);
 		ISP_IOXGET_16(isp, &src->rsp.m2._reserved3, dst->rsp.m2._reserved3);
 		ISP_IOXGET_32(isp, &src->rsp.m2.ct_datalen, dst->rsp.m2.ct_datalen);
 		if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base);
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count);
 		} else {
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base);
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi);
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count);
 		}
 	}
 }
 
 void
 isp_get_ctio2e(ispsoftc_t *isp, ct2e_entry_t *src, ct2e_entry_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXGET_32(isp, &src->ct_syshandle, dst->ct_syshandle);
 	ISP_IOXGET_16(isp, &src->ct_iid, dst->ct_iid);
 	ISP_IOXGET_16(isp, &src->ct_rxid, dst->ct_rxid);
 	ISP_IOXGET_16(isp, &src->ct_flags, dst->ct_flags);
 	ISP_IOXGET_16(isp, &src->ct_status, dst->ct_status);
 	ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout);
 	ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count);
 	ISP_IOXGET_32(isp, &src->ct_reloff, dst->ct_reloff);
 	ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid);
 	if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) {
 		ISP_IOXGET_32(isp, &src->rsp.m0._reserved, dst->rsp.m0._reserved);
 		ISP_IOXGET_16(isp, &src->rsp.m0._reserved2, dst->rsp.m0._reserved2);
 		ISP_IOXGET_16(isp, &src->rsp.m0.ct_scsi_status, dst->rsp.m0.ct_scsi_status);
 		ISP_IOXGET_32(isp, &src->rsp.m0.ct_xfrlen, dst->rsp.m0.ct_xfrlen);
 		if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			for (i = 0; i < ISP_RQDSEG_T2; i++) {
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_base, dst->rsp.m0.u.ct_dataseg[i].ds_base);
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_count, dst->rsp.m0.u.ct_dataseg[i].ds_count);
 			}
 		} else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO3) {
 			for (i = 0; i < ISP_RQDSEG_T3; i++) {
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_base, dst->rsp.m0.u.ct_dataseg64[i].ds_base);
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_basehi, dst->rsp.m0.u.ct_dataseg64[i].ds_basehi);
 				ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_count, dst->rsp.m0.u.ct_dataseg64[i].ds_count);
 			}
 		} else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO4) {
 			ISP_IOXGET_16(isp, &src->rsp.m0.u.ct_dslist.ds_type, dst->rsp.m0.u.ct_dslist.ds_type);
 			ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_segment, dst->rsp.m0.u.ct_dslist.ds_segment);
 			ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_base, dst->rsp.m0.u.ct_dslist.ds_base);
 		}
 	} else if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) {
 		ISP_IOXGET_16(isp, &src->rsp.m1._reserved, dst->rsp.m1._reserved);
 		ISP_IOXGET_16(isp, &src->rsp.m1._reserved2, dst->rsp.m1._reserved2);
 		ISP_IOXGET_16(isp, &src->rsp.m1.ct_senselen, dst->rsp.m1.ct_senselen);
 		ISP_IOXGET_16(isp, &src->rsp.m1.ct_scsi_status, dst->rsp.m1.ct_scsi_status);
 		ISP_IOXGET_16(isp, &src->rsp.m1.ct_resplen, dst->rsp.m1.ct_resplen);
 		for (i = 0; i < MAXRESPLEN; i++) {
 			ISP_IOXGET_8(isp, &src->rsp.m1.ct_resp[i], dst->rsp.m1.ct_resp[i]);
 		}
 	} else {
 		ISP_IOXGET_32(isp, &src->rsp.m2._reserved, dst->rsp.m2._reserved);
 		ISP_IOXGET_16(isp, &src->rsp.m2._reserved2, dst->rsp.m2._reserved2);
 		ISP_IOXGET_16(isp, &src->rsp.m2._reserved3, dst->rsp.m2._reserved3);
 		ISP_IOXGET_32(isp, &src->rsp.m2.ct_datalen, dst->rsp.m2.ct_datalen);
 		if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) {
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base);
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count);
 		} else {
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base);
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi);
 			ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count);
 		}
 	}
 }
 
 void
 isp_get_ctio7(ispsoftc_t *isp, ct7_entry_t *src, ct7_entry_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->ct_header, &dst->ct_header);
 	ISP_IOXGET_32(isp, &src->ct_syshandle, dst->ct_syshandle);
 	ISP_IOXGET_16(isp, &src->ct_nphdl, dst->ct_nphdl);
 	ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout);
 	ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count);
 	ISP_IOXGET_8(isp, &src->ct_vpidx, dst->ct_vpidx);
 	ISP_IOXGET_8(isp, &src->ct_xflags, dst->ct_xflags);
 	ISP_IOXGET_16(isp, &src->ct_iid_lo, dst->ct_iid_lo);
 	ISP_IOXGET_8(isp, &src->ct_iid_hi, dst->ct_iid_hi);
 	ISP_IOXGET_8(isp, &src->ct_reserved, dst->ct_reserved);
 	ISP_IOXGET_32(isp, &src->ct_rxid, dst->ct_rxid);
 	ISP_IOXGET_16(isp, &src->ct_senselen, dst->ct_senselen);
 	ISP_IOXGET_16(isp, &src->ct_flags, dst->ct_flags);
 	ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid);
 	ISP_IOXGET_16(isp, &src->ct_oxid, dst->ct_oxid);
 	ISP_IOXGET_16(isp, &src->ct_scsi_status, dst->ct_scsi_status);
 	if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE0) {
 		ISP_IOXGET_32(isp, &src->rsp.m0.reloff, dst->rsp.m0.reloff);
 		ISP_IOXGET_32(isp, &src->rsp.m0.reserved0, dst->rsp.m0.reserved0);
 		ISP_IOXGET_32(isp, &src->rsp.m0.ct_xfrlen, dst->rsp.m0.ct_xfrlen);
 		ISP_IOXGET_32(isp, &src->rsp.m0.reserved1, dst->rsp.m0.reserved1);
 		ISP_IOXGET_32(isp, &src->rsp.m0.ds.ds_base, dst->rsp.m0.ds.ds_base);
 		ISP_IOXGET_32(isp, &src->rsp.m0.ds.ds_basehi, dst->rsp.m0.ds.ds_basehi);
 		ISP_IOXGET_32(isp, &src->rsp.m0.ds.ds_count, dst->rsp.m0.ds.ds_count);
 	} else if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE1) {
 		uint32_t *a, *b;
 
 		ISP_IOXGET_16(isp, &src->rsp.m1.ct_resplen, dst->rsp.m1.ct_resplen);
 		ISP_IOXGET_16(isp, &src->rsp.m1.reserved, dst->rsp.m1.reserved);
 		a = (uint32_t *) src->rsp.m1.ct_resp;
 		b = (uint32_t *) dst->rsp.m1.ct_resp;
 		for (i = 0; i < MAXRESPLEN_24XX; i++) {
 			ISP_IOXGET_8(isp, &src->rsp.m1.ct_resp[i], dst->rsp.m1.ct_resp[i]);
 		}
 		for (i = 0; i < (ASIZE(src->rsp.m1.ct_resp) >> 2); i++) {
 			*b++ = ISP_SWAP32(isp, *a++);
 		}
 	} else {
 		ISP_IOXGET_32(isp, &src->rsp.m2.reserved0, dst->rsp.m2.reserved0);
 		ISP_IOXGET_32(isp, &src->rsp.m2.ct_datalen, dst->rsp.m2.ct_datalen);
 		ISP_IOXGET_32(isp, &src->rsp.m2.reserved1, dst->rsp.m2.reserved1);
 		ISP_IOXGET_32(isp, &src->rsp.m2.ct_fcp_rsp_iudata.ds_base, dst->rsp.m2.ct_fcp_rsp_iudata.ds_base);
 		ISP_IOXGET_32(isp, &src->rsp.m2.ct_fcp_rsp_iudata.ds_basehi, dst->rsp.m2.ct_fcp_rsp_iudata.ds_basehi);
 		ISP_IOXGET_32(isp, &src->rsp.m2.ct_fcp_rsp_iudata.ds_count, dst->rsp.m2.ct_fcp_rsp_iudata.ds_count);
 	}
 }
 
 void
 isp_put_enable_lun(ispsoftc_t *isp, lun_entry_t *lesrc, lun_entry_t *ledst)
 {
 	int i;
 	isp_put_hdr(isp, &lesrc->le_header, &ledst->le_header);
 	ISP_IOXPUT_32(isp, lesrc->le_reserved, &ledst->le_reserved);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, lesrc->le_lun, &ledst->le_rsvd);
 		ISP_IOXPUT_8(isp, lesrc->le_rsvd, &ledst->le_lun);
 		ISP_IOXPUT_8(isp, lesrc->le_ops, &ledst->le_tgt);
 		ISP_IOXPUT_8(isp, lesrc->le_tgt, &ledst->le_ops);
 		ISP_IOXPUT_8(isp, lesrc->le_status, &ledst->le_reserved2);
 		ISP_IOXPUT_8(isp, lesrc->le_reserved2, &ledst->le_status);
 		ISP_IOXPUT_8(isp, lesrc->le_cmd_count, &ledst->le_in_count);
 		ISP_IOXPUT_8(isp, lesrc->le_in_count, &ledst->le_cmd_count);
 		ISP_IOXPUT_8(isp, lesrc->le_cdb6len, &ledst->le_cdb7len);
 		ISP_IOXPUT_8(isp, lesrc->le_cdb7len, &ledst->le_cdb6len);
 	} else {
 		ISP_IOXPUT_8(isp, lesrc->le_lun, &ledst->le_lun);
 		ISP_IOXPUT_8(isp, lesrc->le_rsvd, &ledst->le_rsvd);
 		ISP_IOXPUT_8(isp, lesrc->le_ops, &ledst->le_ops);
 		ISP_IOXPUT_8(isp, lesrc->le_tgt, &ledst->le_tgt);
 		ISP_IOXPUT_8(isp, lesrc->le_status, &ledst->le_status);
 		ISP_IOXPUT_8(isp, lesrc->le_reserved2, &ledst->le_reserved2);
 		ISP_IOXPUT_8(isp, lesrc->le_cmd_count, &ledst->le_cmd_count);
 		ISP_IOXPUT_8(isp, lesrc->le_in_count, &ledst->le_in_count);
 		ISP_IOXPUT_8(isp, lesrc->le_cdb6len, &ledst->le_cdb6len);
 		ISP_IOXPUT_8(isp, lesrc->le_cdb7len, &ledst->le_cdb7len);
 	}
 	ISP_IOXPUT_32(isp, lesrc->le_flags, &ledst->le_flags);
 	ISP_IOXPUT_16(isp, lesrc->le_timeout, &ledst->le_timeout);
 	for (i = 0; i < 20; i++) {
 		ISP_IOXPUT_8(isp, lesrc->le_reserved3[i], &ledst->le_reserved3[i]);
 	}
 }
 
 void
 isp_get_enable_lun(ispsoftc_t *isp, lun_entry_t *lesrc, lun_entry_t *ledst)
 {
 	int i;
 	isp_get_hdr(isp, &lesrc->le_header, &ledst->le_header);
 	ISP_IOXGET_32(isp, &lesrc->le_reserved, ledst->le_reserved);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXGET_8(isp, &lesrc->le_lun, ledst->le_rsvd);
 		ISP_IOXGET_8(isp, &lesrc->le_rsvd, ledst->le_lun);
 		ISP_IOXGET_8(isp, &lesrc->le_ops, ledst->le_tgt);
 		ISP_IOXGET_8(isp, &lesrc->le_tgt, ledst->le_ops);
 		ISP_IOXGET_8(isp, &lesrc->le_status, ledst->le_reserved2);
 		ISP_IOXGET_8(isp, &lesrc->le_reserved2, ledst->le_status);
 		ISP_IOXGET_8(isp, &lesrc->le_cmd_count, ledst->le_in_count);
 		ISP_IOXGET_8(isp, &lesrc->le_in_count, ledst->le_cmd_count);
 		ISP_IOXGET_8(isp, &lesrc->le_cdb6len, ledst->le_cdb7len);
 		ISP_IOXGET_8(isp, &lesrc->le_cdb7len, ledst->le_cdb6len);
 	} else {
 		ISP_IOXGET_8(isp, &lesrc->le_lun, ledst->le_lun);
 		ISP_IOXGET_8(isp, &lesrc->le_rsvd, ledst->le_rsvd);
 		ISP_IOXGET_8(isp, &lesrc->le_ops, ledst->le_ops);
 		ISP_IOXGET_8(isp, &lesrc->le_tgt, ledst->le_tgt);
 		ISP_IOXGET_8(isp, &lesrc->le_status, ledst->le_status);
 		ISP_IOXGET_8(isp, &lesrc->le_reserved2, ledst->le_reserved2);
 		ISP_IOXGET_8(isp, &lesrc->le_cmd_count, ledst->le_cmd_count);
 		ISP_IOXGET_8(isp, &lesrc->le_in_count, ledst->le_in_count);
 		ISP_IOXGET_8(isp, &lesrc->le_cdb6len, ledst->le_cdb6len);
 		ISP_IOXGET_8(isp, &lesrc->le_cdb7len, ledst->le_cdb7len);
 	}
 	ISP_IOXGET_32(isp, &lesrc->le_flags, ledst->le_flags);
 	ISP_IOXGET_16(isp, &lesrc->le_timeout, ledst->le_timeout);
 	for (i = 0; i < 20; i++) {
 		ISP_IOXGET_8(isp, &lesrc->le_reserved3[i], ledst->le_reserved3[i]);
 	}
 }
 
 void
 isp_put_notify(ispsoftc_t *isp, in_entry_t *src, in_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->in_lun, &dst->in_iid);
 		ISP_IOXPUT_8(isp, src->in_iid, &dst->in_lun);
 		ISP_IOXPUT_8(isp, src->in_reserved2, &dst->in_tgt);
 		ISP_IOXPUT_8(isp, src->in_tgt, &dst->in_reserved2);
 		ISP_IOXPUT_8(isp, src->in_status, &dst->in_rsvd2);
 		ISP_IOXPUT_8(isp, src->in_rsvd2, &dst->in_status);
 		ISP_IOXPUT_8(isp, src->in_tag_val, &dst->in_tag_type);
 		ISP_IOXPUT_8(isp, src->in_tag_type, &dst->in_tag_val);
 	} else {
 		ISP_IOXPUT_8(isp, src->in_lun, &dst->in_lun);
 		ISP_IOXPUT_8(isp, src->in_iid, &dst->in_iid);
 		ISP_IOXPUT_8(isp, src->in_reserved2, &dst->in_reserved2);
 		ISP_IOXPUT_8(isp, src->in_tgt, &dst->in_tgt);
 		ISP_IOXPUT_8(isp, src->in_status, &dst->in_status);
 		ISP_IOXPUT_8(isp, src->in_rsvd2, &dst->in_rsvd2);
 		ISP_IOXPUT_8(isp, src->in_tag_val, &dst->in_tag_val);
 		ISP_IOXPUT_8(isp, src->in_tag_type, &dst->in_tag_type);
 	}
 	ISP_IOXPUT_32(isp, src->in_flags, &dst->in_flags);
 	ISP_IOXPUT_16(isp, src->in_seqid, &dst->in_seqid);
 	for (i = 0; i < IN_MSGLEN; i++) {
 		ISP_IOXPUT_8(isp, src->in_msg[i], &dst->in_msg[i]);
 	}
 	for (i = 0; i < IN_RSVDLEN; i++) {
 		ISP_IOXPUT_8(isp, src->in_reserved3[i], &dst->in_reserved3[i]);
 	}
 	for (i = 0; i < QLTM_SENSELEN; i++) {
 		ISP_IOXPUT_8(isp, src->in_sense[i], &dst->in_sense[i]);
 	}
 }
 
 void
 isp_get_notify(ispsoftc_t *isp, in_entry_t *src, in_entry_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXGET_8(isp, &src->in_lun, dst->in_iid);
 		ISP_IOXGET_8(isp, &src->in_iid, dst->in_lun);
 		ISP_IOXGET_8(isp, &src->in_reserved2, dst->in_tgt);
 		ISP_IOXGET_8(isp, &src->in_tgt, dst->in_reserved2);
 		ISP_IOXGET_8(isp, &src->in_status, dst->in_rsvd2);
 		ISP_IOXGET_8(isp, &src->in_rsvd2, dst->in_status);
 		ISP_IOXGET_8(isp, &src->in_tag_val, dst->in_tag_type);
 		ISP_IOXGET_8(isp, &src->in_tag_type, dst->in_tag_val);
 	} else {
 		ISP_IOXGET_8(isp, &src->in_lun, dst->in_lun);
 		ISP_IOXGET_8(isp, &src->in_iid, dst->in_iid);
 		ISP_IOXGET_8(isp, &src->in_reserved2, dst->in_reserved2);
 		ISP_IOXGET_8(isp, &src->in_tgt, dst->in_tgt);
 		ISP_IOXGET_8(isp, &src->in_status, dst->in_status);
 		ISP_IOXGET_8(isp, &src->in_rsvd2, dst->in_rsvd2);
 		ISP_IOXGET_8(isp, &src->in_tag_val, dst->in_tag_val);
 		ISP_IOXGET_8(isp, &src->in_tag_type, dst->in_tag_type);
 	}
 	ISP_IOXGET_32(isp, &src->in_flags, dst->in_flags);
 	ISP_IOXGET_16(isp, &src->in_seqid, dst->in_seqid);
 	for (i = 0; i < IN_MSGLEN; i++) {
 		ISP_IOXGET_8(isp, &src->in_msg[i], dst->in_msg[i]);
 	}
 	for (i = 0; i < IN_RSVDLEN; i++) {
 		ISP_IOXGET_8(isp, &src->in_reserved3[i], dst->in_reserved3[i]);
 	}
 	for (i = 0; i < QLTM_SENSELEN; i++) {
 		ISP_IOXGET_8(isp, &src->in_sense[i], dst->in_sense[i]);
 	}
 }
 
 void
 isp_put_notify_fc(ispsoftc_t *isp, in_fcentry_t *src, in_fcentry_t *dst)
 {
 	isp_put_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved);
 	ISP_IOXPUT_8(isp, src->in_lun, &dst->in_lun);
 	ISP_IOXPUT_8(isp, src->in_iid, &dst->in_iid);
 	ISP_IOXPUT_16(isp, src->in_scclun, &dst->in_scclun);
 	ISP_IOXPUT_32(isp, src->in_reserved2, &dst->in_reserved2);
 	ISP_IOXPUT_16(isp, src->in_status, &dst->in_status);
 	ISP_IOXPUT_16(isp, src->in_task_flags, &dst->in_task_flags);
 	ISP_IOXPUT_16(isp, src->in_seqid, &dst->in_seqid);
 }
 
 void
 isp_put_notify_fc_e(ispsoftc_t *isp, in_fcentry_e_t *src, in_fcentry_e_t *dst)
 {
 	isp_put_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved);
 	ISP_IOXPUT_16(isp, src->in_iid, &dst->in_iid);
 	ISP_IOXPUT_16(isp, src->in_scclun, &dst->in_scclun);
 	ISP_IOXPUT_32(isp, src->in_reserved2, &dst->in_reserved2);
 	ISP_IOXPUT_16(isp, src->in_status, &dst->in_status);
 	ISP_IOXPUT_16(isp, src->in_task_flags, &dst->in_task_flags);
 	ISP_IOXPUT_16(isp, src->in_seqid, &dst->in_seqid);
 }
 
 void
 isp_put_notify_24xx(ispsoftc_t *isp, in_fcentry_24xx_t *src, in_fcentry_24xx_t *dst)
 {
 	int i;
 
 	isp_put_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved);
 	ISP_IOXPUT_16(isp, src->in_nphdl, &dst->in_nphdl);
 	ISP_IOXPUT_16(isp, src->in_reserved1, &dst->in_reserved1);
 	ISP_IOXPUT_16(isp, src->in_flags, &dst->in_flags);
 	ISP_IOXPUT_16(isp, src->in_srr_rxid, &dst->in_srr_rxid);
 	ISP_IOXPUT_16(isp, src->in_status, &dst->in_status);
 	ISP_IOXPUT_8(isp, src->in_status_subcode, &dst->in_status_subcode);
 	ISP_IOXPUT_8(isp, src->in_fwhandle, &dst->in_fwhandle);
 	ISP_IOXPUT_32(isp, src->in_rxid, &dst->in_rxid);
 	ISP_IOXPUT_16(isp, src->in_srr_reloff_hi, &dst->in_srr_reloff_hi);
 	ISP_IOXPUT_16(isp, src->in_srr_reloff_lo, &dst->in_srr_reloff_lo);
 	ISP_IOXPUT_16(isp, src->in_srr_iu, &dst->in_srr_iu);
 	ISP_IOXPUT_16(isp, src->in_srr_oxid, &dst->in_srr_oxid);
 	ISP_IOXPUT_16(isp, src->in_nport_id_hi, &dst->in_nport_id_hi);
 	ISP_IOXPUT_8(isp, src->in_nport_id_lo, &dst->in_nport_id_lo);
 	ISP_IOXPUT_8(isp, src->in_reserved3, &dst->in_reserved3);
 	ISP_IOXPUT_16(isp, src->in_np_handle, &dst->in_np_handle);
 	for (i = 0; i < ASIZE(src->in_reserved4); i++) {
 		ISP_IOXPUT_8(isp, src->in_reserved4[i], &dst->in_reserved4[i]);
 	}
 	ISP_IOXPUT_8(isp, src->in_reserved5, &dst->in_reserved5);
 	ISP_IOXPUT_8(isp, src->in_vpidx, &dst->in_vpidx);
 	ISP_IOXPUT_32(isp, src->in_reserved6, &dst->in_reserved6);
 	ISP_IOXPUT_16(isp, src->in_portid_lo, &dst->in_portid_lo);
 	ISP_IOXPUT_8(isp, src->in_portid_hi, &dst->in_portid_hi);
 	ISP_IOXPUT_8(isp, src->in_reserved7, &dst->in_reserved7);
 	ISP_IOXPUT_16(isp, src->in_reserved8, &dst->in_reserved8);
 	ISP_IOXPUT_16(isp, src->in_oxid, &dst->in_oxid);
 }
 
 void
 isp_get_notify_fc(ispsoftc_t *isp, in_fcentry_t *src, in_fcentry_t *dst)
 {
 	isp_get_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved);
 	ISP_IOXGET_8(isp, &src->in_lun, dst->in_lun);
 	ISP_IOXGET_8(isp, &src->in_iid, dst->in_iid);
 	ISP_IOXGET_16(isp, &src->in_scclun, dst->in_scclun);
 	ISP_IOXGET_32(isp, &src->in_reserved2, dst->in_reserved2);
 	ISP_IOXGET_16(isp, &src->in_status, dst->in_status);
 	ISP_IOXGET_16(isp, &src->in_task_flags, dst->in_task_flags);
 	ISP_IOXGET_16(isp, &src->in_seqid, dst->in_seqid);
 }
 
 void
 isp_get_notify_fc_e(ispsoftc_t *isp, in_fcentry_e_t *src, in_fcentry_e_t *dst)
 {
 	isp_get_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved);
 	ISP_IOXGET_16(isp, &src->in_iid, dst->in_iid);
 	ISP_IOXGET_16(isp, &src->in_scclun, dst->in_scclun);
 	ISP_IOXGET_32(isp, &src->in_reserved2, dst->in_reserved2);
 	ISP_IOXGET_16(isp, &src->in_status, dst->in_status);
 	ISP_IOXGET_16(isp, &src->in_task_flags, dst->in_task_flags);
 	ISP_IOXGET_16(isp, &src->in_seqid, dst->in_seqid);
 }
 
 void
 isp_get_notify_24xx(ispsoftc_t *isp, in_fcentry_24xx_t *src, in_fcentry_24xx_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->in_header, &dst->in_header);
 	ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved);
 	ISP_IOXGET_16(isp, &src->in_nphdl, dst->in_nphdl);
 	ISP_IOXGET_16(isp, &src->in_reserved1, dst->in_reserved1);
 	ISP_IOXGET_16(isp, &src->in_flags, dst->in_flags);
 	ISP_IOXGET_16(isp, &src->in_srr_rxid, dst->in_srr_rxid);
 	ISP_IOXGET_16(isp, &src->in_status, dst->in_status);
 	ISP_IOXGET_8(isp, &src->in_status_subcode, dst->in_status_subcode);
 	ISP_IOXGET_8(isp, &src->in_fwhandle, dst->in_fwhandle);
 	ISP_IOXGET_32(isp, &src->in_rxid, dst->in_rxid);
 	ISP_IOXGET_16(isp, &src->in_srr_reloff_hi, dst->in_srr_reloff_hi);
 	ISP_IOXGET_16(isp, &src->in_srr_reloff_lo, dst->in_srr_reloff_lo);
 	ISP_IOXGET_16(isp, &src->in_srr_iu, dst->in_srr_iu);
 	ISP_IOXGET_16(isp, &src->in_srr_oxid, dst->in_srr_oxid);
 	ISP_IOXGET_16(isp, &src->in_nport_id_hi, dst->in_nport_id_hi);
 	ISP_IOXGET_8(isp, &src->in_nport_id_lo, dst->in_nport_id_lo);
 	ISP_IOXGET_8(isp, &src->in_reserved3, dst->in_reserved3);
 	ISP_IOXGET_16(isp, &src->in_np_handle, dst->in_np_handle);
 	for (i = 0; i < ASIZE(src->in_reserved4); i++) {
 		ISP_IOXGET_8(isp, &src->in_reserved4[i], dst->in_reserved4[i]);
 	}
 	ISP_IOXGET_8(isp, &src->in_reserved5, dst->in_reserved5);
 	ISP_IOXGET_8(isp, &src->in_vpidx, dst->in_vpidx);
 	ISP_IOXGET_32(isp, &src->in_reserved6, dst->in_reserved6);
 	ISP_IOXGET_16(isp, &src->in_portid_lo, dst->in_portid_lo);
 	ISP_IOXGET_8(isp, &src->in_portid_hi, dst->in_portid_hi);
 	ISP_IOXGET_8(isp, &src->in_reserved7, dst->in_reserved7);
 	ISP_IOXGET_16(isp, &src->in_reserved8, dst->in_reserved8);
 	ISP_IOXGET_16(isp, &src->in_oxid, dst->in_oxid);
 }
 
 void
 isp_put_notify_ack(ispsoftc_t *isp, na_entry_t *src,  na_entry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXPUT_32(isp, src->na_reserved, &dst->na_reserved);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXPUT_8(isp, src->na_lun, &dst->na_iid);
 		ISP_IOXPUT_8(isp, src->na_iid, &dst->na_lun);
 		ISP_IOXPUT_8(isp, src->na_status, &dst->na_event);
 		ISP_IOXPUT_8(isp, src->na_event, &dst->na_status);
 	} else {
 		ISP_IOXPUT_8(isp, src->na_lun, &dst->na_lun);
 		ISP_IOXPUT_8(isp, src->na_iid, &dst->na_iid);
 		ISP_IOXPUT_8(isp, src->na_status, &dst->na_status);
 		ISP_IOXPUT_8(isp, src->na_event, &dst->na_event);
 	}
 	ISP_IOXPUT_32(isp, src->na_flags, &dst->na_flags);
 	for (i = 0; i < NA_RSVDLEN; i++) {
 		ISP_IOXPUT_16(isp, src->na_reserved3[i], &dst->na_reserved3[i]);
 	}
 }
 
 void
 isp_get_notify_ack(ispsoftc_t *isp, na_entry_t *src, na_entry_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXGET_32(isp, &src->na_reserved, dst->na_reserved);
 	if (ISP_IS_SBUS(isp)) {
 		ISP_IOXGET_8(isp, &src->na_lun, dst->na_iid);
 		ISP_IOXGET_8(isp, &src->na_iid, dst->na_lun);
 		ISP_IOXGET_8(isp, &src->na_status, dst->na_event);
 		ISP_IOXGET_8(isp, &src->na_event, dst->na_status);
 	} else {
 		ISP_IOXGET_8(isp, &src->na_lun, dst->na_lun);
 		ISP_IOXGET_8(isp, &src->na_iid, dst->na_iid);
 		ISP_IOXGET_8(isp, &src->na_status, dst->na_status);
 		ISP_IOXGET_8(isp, &src->na_event, dst->na_event);
 	}
 	ISP_IOXGET_32(isp, &src->na_flags, dst->na_flags);
 	for (i = 0; i < NA_RSVDLEN; i++) {
 		ISP_IOXGET_16(isp, &src->na_reserved3[i], dst->na_reserved3[i]);
 	}
 }
 
 void
 isp_put_notify_ack_fc(ispsoftc_t *isp, na_fcentry_t *src, na_fcentry_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXPUT_32(isp, src->na_reserved, &dst->na_reserved);
 	ISP_IOXPUT_8(isp, src->na_reserved1, &dst->na_reserved1);
 	ISP_IOXPUT_8(isp, src->na_iid, &dst->na_iid);
 	ISP_IOXPUT_16(isp, src->na_response, &dst->na_response);
 	ISP_IOXPUT_16(isp, src->na_flags, &dst->na_flags);
 	ISP_IOXPUT_16(isp, src->na_reserved2, &dst->na_reserved2);
 	ISP_IOXPUT_16(isp, src->na_status, &dst->na_status);
 	ISP_IOXPUT_16(isp, src->na_task_flags, &dst->na_task_flags);
 	ISP_IOXPUT_16(isp, src->na_seqid, &dst->na_seqid);
 	for (i = 0; i < NA2_RSVDLEN; i++) {
 		ISP_IOXPUT_16(isp, src->na_reserved3[i], &dst->na_reserved3[i]);
 	}
 }
 
 void
 isp_put_notify_ack_fc_e(ispsoftc_t *isp, na_fcentry_e_t *src, na_fcentry_e_t *dst)
 {
 	int i;
 	isp_put_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXPUT_32(isp, src->na_reserved, &dst->na_reserved);
 	ISP_IOXPUT_16(isp, src->na_iid, &dst->na_iid);
 	ISP_IOXPUT_16(isp, src->na_response, &dst->na_response);
 	ISP_IOXPUT_16(isp, src->na_flags, &dst->na_flags);
 	ISP_IOXPUT_16(isp, src->na_reserved2, &dst->na_reserved2);
 	ISP_IOXPUT_16(isp, src->na_status, &dst->na_status);
 	ISP_IOXPUT_16(isp, src->na_task_flags, &dst->na_task_flags);
 	ISP_IOXPUT_16(isp, src->na_seqid, &dst->na_seqid);
 	for (i = 0; i < NA2_RSVDLEN; i++) {
 		ISP_IOXPUT_16(isp, src->na_reserved3[i], &dst->na_reserved3[i]);
 	}
 }
 
 void
 isp_put_notify_24xx_ack(ispsoftc_t *isp, na_fcentry_24xx_t *src, na_fcentry_24xx_t *dst)
 {
 	int i;
 
 	isp_put_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXPUT_32(isp, src->na_handle, &dst->na_handle);
 	ISP_IOXPUT_16(isp, src->na_nphdl, &dst->na_nphdl);
 	ISP_IOXPUT_16(isp, src->na_reserved1, &dst->na_reserved1);
 	ISP_IOXPUT_16(isp, src->na_flags, &dst->na_flags);
 	ISP_IOXPUT_16(isp, src->na_srr_rxid, &dst->na_srr_rxid);
 	ISP_IOXPUT_16(isp, src->na_status, &dst->na_status);
 	ISP_IOXPUT_8(isp, src->na_status_subcode, &dst->na_status_subcode);
 	ISP_IOXPUT_8(isp, src->na_fwhandle, &dst->na_fwhandle);
 	ISP_IOXPUT_32(isp, src->na_rxid, &dst->na_rxid);
 	ISP_IOXPUT_16(isp, src->na_srr_reloff_hi, &dst->na_srr_reloff_hi);
 	ISP_IOXPUT_16(isp, src->na_srr_reloff_lo, &dst->na_srr_reloff_lo);
 	ISP_IOXPUT_16(isp, src->na_srr_iu, &dst->na_srr_iu);
 	ISP_IOXPUT_16(isp, src->na_srr_flags, &dst->na_srr_flags);
 	for (i = 0; i < 18; i++) {
 		ISP_IOXPUT_8(isp, src->na_reserved3[i], &dst->na_reserved3[i]);
 	}
 	ISP_IOXPUT_8(isp, src->na_reserved4, &dst->na_reserved4);
 	ISP_IOXPUT_8(isp, src->na_vpidx, &dst->na_vpidx);
 	ISP_IOXPUT_8(isp, src->na_srr_reject_vunique, &dst->na_srr_reject_vunique);
 	ISP_IOXPUT_8(isp, src->na_srr_reject_explanation, &dst->na_srr_reject_explanation);
 	ISP_IOXPUT_8(isp, src->na_srr_reject_code, &dst->na_srr_reject_code);
 	ISP_IOXPUT_8(isp, src->na_reserved5, &dst->na_reserved5);
 	for (i = 0; i < 6; i++) {
 		ISP_IOXPUT_8(isp, src->na_reserved6[i], &dst->na_reserved6[i]);
 	}
 	ISP_IOXPUT_16(isp, src->na_oxid, &dst->na_oxid);
 }
 
 void
 isp_get_notify_ack_fc(ispsoftc_t *isp, na_fcentry_t *src, na_fcentry_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXGET_32(isp, &src->na_reserved, dst->na_reserved);
 	ISP_IOXGET_8(isp, &src->na_reserved1, dst->na_reserved1);
 	ISP_IOXGET_8(isp, &src->na_iid, dst->na_iid);
 	ISP_IOXGET_16(isp, &src->na_response, dst->na_response);
 	ISP_IOXGET_16(isp, &src->na_flags, dst->na_flags);
 	ISP_IOXGET_16(isp, &src->na_reserved2, dst->na_reserved2);
 	ISP_IOXGET_16(isp, &src->na_status, dst->na_status);
 	ISP_IOXGET_16(isp, &src->na_task_flags, dst->na_task_flags);
 	ISP_IOXGET_16(isp, &src->na_seqid, dst->na_seqid);
 	for (i = 0; i < NA2_RSVDLEN; i++) {
 		ISP_IOXGET_16(isp, &src->na_reserved3[i], dst->na_reserved3[i]);
 	}
 }
 
 void
 isp_get_notify_ack_fc_e(ispsoftc_t *isp, na_fcentry_e_t *src, na_fcentry_e_t *dst)
 {
 	int i;
 	isp_get_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXGET_32(isp, &src->na_reserved, dst->na_reserved);
 	ISP_IOXGET_16(isp, &src->na_iid, dst->na_iid);
 	ISP_IOXGET_16(isp, &src->na_response, dst->na_response);
 	ISP_IOXGET_16(isp, &src->na_flags, dst->na_flags);
 	ISP_IOXGET_16(isp, &src->na_reserved2, dst->na_reserved2);
 	ISP_IOXGET_16(isp, &src->na_status, dst->na_status);
 	ISP_IOXGET_16(isp, &src->na_task_flags, dst->na_task_flags);
 	ISP_IOXGET_16(isp, &src->na_seqid, dst->na_seqid);
 	for (i = 0; i < NA2_RSVDLEN; i++) {
 		ISP_IOXGET_16(isp, &src->na_reserved3[i], dst->na_reserved3[i]);
 	}
 }
 
 void
 isp_get_notify_ack_24xx(ispsoftc_t *isp, na_fcentry_24xx_t *src, na_fcentry_24xx_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->na_header, &dst->na_header);
 	ISP_IOXGET_32(isp, &src->na_handle, dst->na_handle);
 	ISP_IOXGET_16(isp, &src->na_nphdl, dst->na_nphdl);
 	ISP_IOXGET_16(isp, &src->na_reserved1, dst->na_reserved1);
 	ISP_IOXGET_16(isp, &src->na_flags, dst->na_flags);
 	ISP_IOXGET_16(isp, &src->na_srr_rxid, dst->na_srr_rxid);
 	ISP_IOXGET_16(isp, &src->na_status, dst->na_status);
 	ISP_IOXGET_8(isp, &src->na_status_subcode, dst->na_status_subcode);
 	ISP_IOXGET_8(isp, &src->na_fwhandle, dst->na_fwhandle);
 	ISP_IOXGET_32(isp, &src->na_rxid, dst->na_rxid);
 	ISP_IOXGET_16(isp, &src->na_srr_reloff_hi, dst->na_srr_reloff_hi);
 	ISP_IOXGET_16(isp, &src->na_srr_reloff_lo, dst->na_srr_reloff_lo);
 	ISP_IOXGET_16(isp, &src->na_srr_iu, dst->na_srr_iu);
 	ISP_IOXGET_16(isp, &src->na_srr_flags, dst->na_srr_flags);
 	for (i = 0; i < 18; i++) {
 		ISP_IOXGET_8(isp, &src->na_reserved3[i], dst->na_reserved3[i]);
 	}
 	ISP_IOXGET_8(isp, &src->na_reserved4, dst->na_reserved4);
 	ISP_IOXGET_8(isp, &src->na_vpidx, dst->na_vpidx);
 	ISP_IOXGET_8(isp, &src->na_srr_reject_vunique, dst->na_srr_reject_vunique);
 	ISP_IOXGET_8(isp, &src->na_srr_reject_explanation, dst->na_srr_reject_explanation);
 	ISP_IOXGET_8(isp, &src->na_srr_reject_code, dst->na_srr_reject_code);
 	ISP_IOXGET_8(isp, &src->na_reserved5, dst->na_reserved5);
 	for (i = 0; i < 6; i++) {
 		ISP_IOXGET_8(isp, &src->na_reserved6[i], dst->na_reserved6[i]);
 	}
 	ISP_IOXGET_16(isp, &src->na_oxid, dst->na_oxid);
 }
 
 void
 isp_get_abts(ispsoftc_t *isp, abts_t *src, abts_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->abts_header, &dst->abts_header);
 	for (i = 0; i < 6; i++) {
 		ISP_IOXGET_8(isp, &src->abts_reserved0[i], dst->abts_reserved0[i]);
 	}
 	ISP_IOXGET_16(isp, &src->abts_nphdl, dst->abts_nphdl);
 	ISP_IOXGET_16(isp, &src->abts_reserved1, dst->abts_reserved1);
 	ISP_IOXGET_16(isp, &src->abts_sof, dst->abts_sof);
 	ISP_IOXGET_32(isp, &src->abts_rxid_abts, dst->abts_rxid_abts);
 	ISP_IOXGET_16(isp, &src->abts_did_lo, dst->abts_did_lo);
 	ISP_IOXGET_8(isp, &src->abts_did_hi, dst->abts_did_hi);
 	ISP_IOXGET_8(isp, &src->abts_r_ctl, dst->abts_r_ctl);
 	ISP_IOXGET_16(isp, &src->abts_sid_lo, dst->abts_sid_lo);
 	ISP_IOXGET_8(isp, &src->abts_sid_hi, dst->abts_sid_hi);
 	ISP_IOXGET_8(isp, &src->abts_cs_ctl, dst->abts_cs_ctl);
 	ISP_IOXGET_16(isp, &src->abts_fs_ctl, dst->abts_fs_ctl);
 	ISP_IOXGET_8(isp, &src->abts_f_ctl, dst->abts_f_ctl);
 	ISP_IOXGET_8(isp, &src->abts_type, dst->abts_type);
 	ISP_IOXGET_16(isp, &src->abts_seq_cnt, dst->abts_seq_cnt);
 	ISP_IOXGET_8(isp, &src->abts_df_ctl, dst->abts_df_ctl);
 	ISP_IOXGET_8(isp, &src->abts_seq_id, dst->abts_seq_id);
 	ISP_IOXGET_16(isp, &src->abts_rx_id, dst->abts_rx_id);
 	ISP_IOXGET_16(isp, &src->abts_ox_id, dst->abts_ox_id);
 	ISP_IOXGET_32(isp, &src->abts_param, dst->abts_param);
 	for (i = 0; i < 16; i++) {
 		ISP_IOXGET_8(isp, &src->abts_reserved2[i], dst->abts_reserved2[i]);
 	}
 	ISP_IOXGET_32(isp, &src->abts_rxid_task, dst->abts_rxid_task);
 }
 
 void
 isp_put_abts_rsp(ispsoftc_t *isp, abts_rsp_t *src, abts_rsp_t *dst)
 {
 	int i;
 
 	isp_put_hdr(isp, &src->abts_rsp_header, &dst->abts_rsp_header);
 	ISP_IOXPUT_32(isp, src->abts_rsp_handle, &dst->abts_rsp_handle);
 	ISP_IOXPUT_16(isp, src->abts_rsp_status, &dst->abts_rsp_status);
 	ISP_IOXPUT_16(isp, src->abts_rsp_nphdl, &dst->abts_rsp_nphdl);
 	ISP_IOXPUT_16(isp, src->abts_rsp_ctl_flags, &dst->abts_rsp_ctl_flags);
 	ISP_IOXPUT_16(isp, src->abts_rsp_sof, &dst->abts_rsp_sof);
 	ISP_IOXPUT_32(isp, src->abts_rsp_rxid_abts, &dst->abts_rsp_rxid_abts);
 	ISP_IOXPUT_16(isp, src->abts_rsp_did_lo, &dst->abts_rsp_did_lo);
 	ISP_IOXPUT_8(isp, src->abts_rsp_did_hi, &dst->abts_rsp_did_hi);
 	ISP_IOXPUT_8(isp, src->abts_rsp_r_ctl, &dst->abts_rsp_r_ctl);
 	ISP_IOXPUT_16(isp, src->abts_rsp_sid_lo, &dst->abts_rsp_sid_lo);
 	ISP_IOXPUT_8(isp, src->abts_rsp_sid_hi, &dst->abts_rsp_sid_hi);
 	ISP_IOXPUT_8(isp, src->abts_rsp_cs_ctl, &dst->abts_rsp_cs_ctl);
 	ISP_IOXPUT_16(isp, src->abts_rsp_f_ctl_lo, &dst->abts_rsp_f_ctl_lo);
 	ISP_IOXPUT_8(isp, src->abts_rsp_f_ctl_hi, &dst->abts_rsp_f_ctl_hi);
 	ISP_IOXPUT_8(isp, src->abts_rsp_type, &dst->abts_rsp_type);
 	ISP_IOXPUT_16(isp, src->abts_rsp_seq_cnt, &dst->abts_rsp_seq_cnt);
 	ISP_IOXPUT_8(isp, src->abts_rsp_df_ctl, &dst->abts_rsp_df_ctl);
 	ISP_IOXPUT_8(isp, src->abts_rsp_seq_id, &dst->abts_rsp_seq_id);
 	ISP_IOXPUT_16(isp, src->abts_rsp_rx_id, &dst->abts_rsp_rx_id);
 	ISP_IOXPUT_16(isp, src->abts_rsp_ox_id, &dst->abts_rsp_ox_id);
 	ISP_IOXPUT_32(isp, src->abts_rsp_param, &dst->abts_rsp_param);
 	if (src->abts_rsp_r_ctl == BA_ACC) {
 		ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.reserved, &dst->abts_rsp_payload.ba_acc.reserved);
 		ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_acc.last_seq_id, &dst->abts_rsp_payload.ba_acc.last_seq_id);
 		ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_acc.seq_id_valid, &dst->abts_rsp_payload.ba_acc.seq_id_valid);
 		ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.aborted_rx_id, &dst->abts_rsp_payload.ba_acc.aborted_rx_id);
 		ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.aborted_ox_id, &dst->abts_rsp_payload.ba_acc.aborted_ox_id);
 		ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.high_seq_cnt, &dst->abts_rsp_payload.ba_acc.high_seq_cnt);
 		ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.low_seq_cnt, &dst->abts_rsp_payload.ba_acc.low_seq_cnt);
 		for (i = 0; i < 4; i++) {
 			ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.reserved2[i], &dst->abts_rsp_payload.ba_acc.reserved2[i]);
 		}
 	} else if (src->abts_rsp_r_ctl == BA_RJT) {
 		ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.vendor_unique, &dst->abts_rsp_payload.ba_rjt.vendor_unique);
 		ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.explanation, &dst->abts_rsp_payload.ba_rjt.explanation);
 		ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.reason, &dst->abts_rsp_payload.ba_rjt.reason);
 		ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.reserved, &dst->abts_rsp_payload.ba_rjt.reserved);
 		for (i = 0; i < 12; i++) {
 			ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_rjt.reserved2[i], &dst->abts_rsp_payload.ba_rjt.reserved2[i]);
 		}
 	} else {
 		for (i = 0; i < 16; i++) {
 			ISP_IOXPUT_8(isp, src->abts_rsp_payload.reserved[i], &dst->abts_rsp_payload.reserved[i]);
 		}
 	}
 	ISP_IOXPUT_32(isp, src->abts_rsp_rxid_task, &dst->abts_rsp_rxid_task);
 }
 
 void
 isp_get_abts_rsp(ispsoftc_t *isp, abts_rsp_t *src, abts_rsp_t *dst)
 {
 	int i;
 
 	isp_get_hdr(isp, &src->abts_rsp_header, &dst->abts_rsp_header);
 	ISP_IOXGET_32(isp, &src->abts_rsp_handle, dst->abts_rsp_handle);
 	ISP_IOXGET_16(isp, &src->abts_rsp_status, dst->abts_rsp_status);
 	ISP_IOXGET_16(isp, &src->abts_rsp_nphdl, dst->abts_rsp_nphdl);
 	ISP_IOXGET_16(isp, &src->abts_rsp_ctl_flags, dst->abts_rsp_ctl_flags);
 	ISP_IOXGET_16(isp, &src->abts_rsp_sof, dst->abts_rsp_sof);
 	ISP_IOXGET_32(isp, &src->abts_rsp_rxid_abts, dst->abts_rsp_rxid_abts);
 	ISP_IOXGET_16(isp, &src->abts_rsp_did_lo, dst->abts_rsp_did_lo);
 	ISP_IOXGET_8(isp, &src->abts_rsp_did_hi, dst->abts_rsp_did_hi);
 	ISP_IOXGET_8(isp, &src->abts_rsp_r_ctl, dst->abts_rsp_r_ctl);
 	ISP_IOXGET_16(isp, &src->abts_rsp_sid_lo, dst->abts_rsp_sid_lo);
 	ISP_IOXGET_8(isp, &src->abts_rsp_sid_hi, dst->abts_rsp_sid_hi);
 	ISP_IOXGET_8(isp, &src->abts_rsp_cs_ctl, dst->abts_rsp_cs_ctl);
 	ISP_IOXGET_16(isp, &src->abts_rsp_f_ctl_lo, dst->abts_rsp_f_ctl_lo);
 	ISP_IOXGET_8(isp, &src->abts_rsp_f_ctl_hi, dst->abts_rsp_f_ctl_hi);
 	ISP_IOXGET_8(isp, &src->abts_rsp_type, dst->abts_rsp_type);
 	ISP_IOXGET_16(isp, &src->abts_rsp_seq_cnt, dst->abts_rsp_seq_cnt);
 	ISP_IOXGET_8(isp, &src->abts_rsp_df_ctl, dst->abts_rsp_df_ctl);
 	ISP_IOXGET_8(isp, &src->abts_rsp_seq_id, dst->abts_rsp_seq_id);
 	ISP_IOXGET_16(isp, &src->abts_rsp_rx_id, dst->abts_rsp_rx_id);
 	ISP_IOXGET_16(isp, &src->abts_rsp_ox_id, dst->abts_rsp_ox_id);
 	ISP_IOXGET_32(isp, &src->abts_rsp_param, dst->abts_rsp_param);
 	for (i = 0; i < 8; i++) {
 		ISP_IOXGET_8(isp, &src->abts_rsp_payload.rsp.reserved[i], dst->abts_rsp_payload.rsp.reserved[i]);
 	}
 	ISP_IOXGET_32(isp, &src->abts_rsp_payload.rsp.subcode1, dst->abts_rsp_payload.rsp.subcode1);
 	ISP_IOXGET_32(isp, &src->abts_rsp_payload.rsp.subcode2, dst->abts_rsp_payload.rsp.subcode2);
 	ISP_IOXGET_32(isp, &src->abts_rsp_rxid_task, dst->abts_rsp_rxid_task);
 }
 #endif	/* ISP_TARGET_MODE */
 /*
  * vim:ts=8:sw=8
  */
Index: projects/powernv/dev/isp/ispmbox.h
===================================================================
--- projects/powernv/dev/isp/ispmbox.h	(revision 290990)
+++ projects/powernv/dev/isp/ispmbox.h	(revision 290991)
@@ -1,2648 +1,2643 @@
 /* $FreeBSD$ */
 /*-
  *  Copyright (c) 1997-2009 by Matthew Jacob
  *  All rights reserved.
  * 
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  * 
  *  1. Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  * 
  *  THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  *  ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  *  SUCH DAMAGE.
  * 
  */
 
 /*
  * Mailbox and Queue Entry Definitions for for Qlogic ISP SCSI adapters.
  */
 #ifndef	_ISPMBOX_H
 #define	_ISPMBOX_H
 
 /*
  * Mailbox Command Opcodes
  */
 #define MBOX_NO_OP			0x0000
 #define MBOX_LOAD_RAM			0x0001
 #define MBOX_EXEC_FIRMWARE		0x0002
 #define MBOX_DUMP_RAM			0x0003
 #define MBOX_WRITE_RAM_WORD		0x0004
 #define MBOX_READ_RAM_WORD		0x0005
 #define MBOX_MAILBOX_REG_TEST		0x0006
 #define MBOX_VERIFY_CHECKSUM		0x0007
 #define MBOX_ABOUT_FIRMWARE		0x0008
 #define	MBOX_LOAD_RISC_RAM_2100		0x0009
 					/*   a */
 #define	MBOX_LOAD_RISC_RAM		0x000b
 					/*   c */
 #define MBOX_WRITE_RAM_WORD_EXTENDED	0x000d
 #define MBOX_CHECK_FIRMWARE		0x000e
 #define	MBOX_READ_RAM_WORD_EXTENDED	0x000f
 #define MBOX_INIT_REQ_QUEUE		0x0010
 #define MBOX_INIT_RES_QUEUE		0x0011
 #define MBOX_EXECUTE_IOCB		0x0012
 #define MBOX_WAKE_UP			0x0013
 #define MBOX_STOP_FIRMWARE		0x0014
 #define MBOX_ABORT			0x0015
 #define MBOX_ABORT_DEVICE		0x0016
 #define MBOX_ABORT_TARGET		0x0017
 #define MBOX_BUS_RESET			0x0018
 #define MBOX_STOP_QUEUE			0x0019
 #define MBOX_START_QUEUE		0x001a
 #define MBOX_SINGLE_STEP_QUEUE		0x001b
 #define MBOX_ABORT_QUEUE		0x001c
 #define MBOX_GET_DEV_QUEUE_STATUS	0x001d
 					/*  1e */
 #define MBOX_GET_FIRMWARE_STATUS	0x001f
 #define MBOX_GET_INIT_SCSI_ID		0x0020
 #define MBOX_GET_SELECT_TIMEOUT		0x0021
 #define MBOX_GET_RETRY_COUNT		0x0022
 #define MBOX_GET_TAG_AGE_LIMIT		0x0023
 #define MBOX_GET_CLOCK_RATE		0x0024
 #define MBOX_GET_ACT_NEG_STATE		0x0025
 #define MBOX_GET_ASYNC_DATA_SETUP_TIME	0x0026
 #define MBOX_GET_SBUS_PARAMS		0x0027
 #define		MBOX_GET_PCI_PARAMS	MBOX_GET_SBUS_PARAMS
 #define MBOX_GET_TARGET_PARAMS		0x0028
 #define MBOX_GET_DEV_QUEUE_PARAMS	0x0029
 #define	MBOX_GET_RESET_DELAY_PARAMS	0x002a
 					/*  2b */
 					/*  2c */
 					/*  2d */
 					/*  2e */
 					/*  2f */
 #define MBOX_SET_INIT_SCSI_ID		0x0030
 #define MBOX_SET_SELECT_TIMEOUT		0x0031
 #define MBOX_SET_RETRY_COUNT		0x0032
 #define MBOX_SET_TAG_AGE_LIMIT		0x0033
 #define MBOX_SET_CLOCK_RATE		0x0034
 #define MBOX_SET_ACT_NEG_STATE		0x0035
 #define MBOX_SET_ASYNC_DATA_SETUP_TIME	0x0036
 #define MBOX_SET_SBUS_CONTROL_PARAMS	0x0037
 #define		MBOX_SET_PCI_PARAMETERS	0x0037
 #define MBOX_SET_TARGET_PARAMS		0x0038
 #define MBOX_SET_DEV_QUEUE_PARAMS	0x0039
 #define	MBOX_SET_RESET_DELAY_PARAMS	0x003a
 					/*  3b */
 					/*  3c */
 					/*  3d */
 					/*  3e */
 					/*  3f */
 #define	MBOX_RETURN_BIOS_BLOCK_ADDR	0x0040
 #define	MBOX_WRITE_FOUR_RAM_WORDS	0x0041
 #define	MBOX_EXEC_BIOS_IOCB		0x0042
 #define	MBOX_SET_FW_FEATURES		0x004a
 #define	MBOX_GET_FW_FEATURES		0x004b
 #define		FW_FEATURE_FAST_POST	0x1
 #define		FW_FEATURE_LVD_NOTIFY	0x2
 #define		FW_FEATURE_RIO_32BIT	0x4
 #define		FW_FEATURE_RIO_16BIT	0x8
 
 #define	MBOX_INIT_REQ_QUEUE_A64		0x0052
 #define	MBOX_INIT_RES_QUEUE_A64		0x0053
 
 #define	MBOX_ENABLE_TARGET_MODE		0x0055
 #define		ENABLE_TARGET_FLAG	0x8000
 #define		ENABLE_TQING_FLAG	0x0004
 #define		ENABLE_MANDATORY_DISC	0x0002
 #define	MBOX_GET_TARGET_STATUS		0x0056
 
 /* These are for the ISP2X00 FC cards */
 #define	MBOX_GET_LOOP_ID		0x0020
 /* for 24XX cards, outgoing mailbox 7 has these values for F or FL topologies */
 #define		ISP24XX_INORDER		0x0100
 #define		ISP24XX_NPIV_SAN	0x0400
 #define		ISP24XX_VSAN_SAN	0x1000
 #define		ISP24XX_FC_SP_SAN	0x2000
 
 #define	MBOX_GET_FIRMWARE_OPTIONS	0x0028
 #define	MBOX_SET_FIRMWARE_OPTIONS	0x0038
 #define	MBOX_GET_RESOURCE_COUNT		0x0042
 #define	MBOX_REQUEST_OFFLINE_MODE	0x0043
 #define	MBOX_ENHANCED_GET_PDB		0x0047
 #define	MBOX_INIT_FIRMWARE_MULTI_ID	0x0048	/* 2400 only */
 #define	MBOX_GET_VP_DATABASE		0x0049	/* 2400 only */
 #define	MBOX_GET_VP_DATABASE_ENTRY	0x004a	/* 2400 only */
 #define	MBOX_EXEC_COMMAND_IOCB_A64	0x0054
 #define	MBOX_INIT_FIRMWARE		0x0060
 #define	MBOX_GET_INIT_CONTROL_BLOCK	0x0061
 #define	MBOX_INIT_LIP			0x0062
 #define	MBOX_GET_FC_AL_POSITION_MAP	0x0063
 #define	MBOX_GET_PORT_DB		0x0064
 #define	MBOX_CLEAR_ACA			0x0065
 #define	MBOX_TARGET_RESET		0x0066
 #define	MBOX_CLEAR_TASK_SET		0x0067
 #define	MBOX_ABORT_TASK_SET		0x0068
 #define	MBOX_GET_FW_STATE		0x0069
 #define	MBOX_GET_PORT_NAME		0x006A
 #define	MBOX_GET_LINK_STATUS		0x006B
 #define	MBOX_INIT_LIP_RESET		0x006C
 #define	MBOX_SEND_SNS			0x006E
 #define	MBOX_FABRIC_LOGIN		0x006F
 #define	MBOX_SEND_CHANGE_REQUEST	0x0070
 #define	MBOX_FABRIC_LOGOUT		0x0071
 #define	MBOX_INIT_LIP_LOGIN		0x0072
 #define	MBOX_GET_PORT_NODE_NAME_LIST	0x0075
 #define	MBOX_GET_ID_LIST		0x007C
 #define	MBOX_LUN_RESET			0x007E
 
 #define	MBOX_DRIVER_HEARTBEAT		0x005B
 #define	MBOX_FW_HEARTBEAT		0x005C
 
 #define	MBOX_GET_SET_DATA_RATE		0x005D	/* 24XX/23XX only */
 #define		MBGSD_GET_RATE		0
 #define		MBGSD_SET_RATE		1
 #define		MBGSD_SET_RATE_NOW	2	/* 24XX only */
 #define		MBGSD_ONEGB	0
 #define		MBGSD_TWOGB	1
 #define		MBGSD_AUTO	2
 #define		MBGSD_FOURGB	3		/* 24XX only */
 #define		MBGSD_EIGHTGB	4		/* 25XX only */
 
 
 #define	ISP2100_SET_PCI_PARAM		0x00ff
 
 #define	MBOX_BUSY			0x04
 
 /*
  * Mailbox Command Complete Status Codes
  */
 #define	MBOX_COMMAND_COMPLETE		0x4000
 #define	MBOX_INVALID_COMMAND		0x4001
 #define	MBOX_HOST_INTERFACE_ERROR	0x4002
 #define	MBOX_TEST_FAILED		0x4003
 #define	MBOX_COMMAND_ERROR		0x4005
 #define	MBOX_COMMAND_PARAM_ERROR	0x4006
 #define	MBOX_PORT_ID_USED		0x4007
 #define	MBOX_LOOP_ID_USED		0x4008
 #define	MBOX_ALL_IDS_USED		0x4009
 #define	MBOX_NOT_LOGGED_IN		0x400A
 #define	MBOX_LINK_DOWN_ERROR		0x400B
 #define	MBOX_LOOPBACK_ERROR		0x400C
 #define	MBOX_CHECKSUM_ERROR		0x4010
 #define	MBOX_INVALID_PRODUCT_KEY	0x4020
 /* pseudo mailbox completion codes */
 #define	MBOX_REGS_BUSY			0x6000	/* registers in use */
 #define	MBOX_TIMEOUT			0x6001	/* command timed out */
 
 #define	MBLOGALL			0xffffffff
 #define	MBLOGNONE			0x00000000
 #define	MBLOGMASK(x)			(1 << (((x) - 1) & 0x1f))
 
 /*
  * Asynchronous event status codes
  */
 #define	ASYNC_BUS_RESET			0x8001
 #define	ASYNC_SYSTEM_ERROR		0x8002
 #define	ASYNC_RQS_XFER_ERR		0x8003
 #define	ASYNC_RSP_XFER_ERR		0x8004
 #define	ASYNC_QWAKEUP			0x8005
 #define	ASYNC_TIMEOUT_RESET		0x8006
 #define	ASYNC_DEVICE_RESET		0x8007
 #define	ASYNC_EXTMSG_UNDERRUN		0x800A
 #define	ASYNC_SCAM_INT			0x800B
 #define	ASYNC_HUNG_SCSI			0x800C
 #define	ASYNC_KILLED_BUS		0x800D
 #define	ASYNC_BUS_TRANSIT		0x800E	/* LVD -> HVD, eg. */
 #define	ASYNC_LIP_OCCURRED		0x8010
 #define	ASYNC_LOOP_UP			0x8011
 #define	ASYNC_LOOP_DOWN			0x8012
 #define	ASYNC_LOOP_RESET		0x8013
 #define	ASYNC_PDB_CHANGED		0x8014
 #define	ASYNC_CHANGE_NOTIFY		0x8015
 #define	ASYNC_LIP_F8			0x8016
 #define	ASYNC_LIP_ERROR			0x8017
 #define	ASYNC_SECURITY_UPDATE		0x801B
 #define	ASYNC_CMD_CMPLT			0x8020
 #define	ASYNC_CTIO_DONE			0x8021
 #define	ASYNC_RIO32_1			0x8021
 #define	ASYNC_RIO32_2			0x8022
 #define	ASYNC_IP_XMIT_DONE		0x8022
 #define	ASYNC_IP_RECV_DONE		0x8023
 #define	ASYNC_IP_BROADCAST		0x8024
 #define	ASYNC_IP_RCVQ_LOW		0x8025
 #define	ASYNC_IP_RCVQ_EMPTY		0x8026
 #define	ASYNC_IP_RECV_DONE_ALIGNED	0x8027
 #define	ASYNC_PTPMODE			0x8030
 #define	ASYNC_RIO16_1			0x8031
 #define	ASYNC_RIO16_2			0x8032
 #define	ASYNC_RIO16_3			0x8033
 #define	ASYNC_RIO16_4			0x8034
 #define	ASYNC_RIO16_5			0x8035
 #define	ASYNC_CONNMODE			0x8036
 #define		ISP_CONN_LOOP		1
 #define		ISP_CONN_PTP		2
 #define		ISP_CONN_BADLIP		3
 #define		ISP_CONN_FATAL		4
 #define		ISP_CONN_LOOPBACK	5
 #define	ASYNC_RIOZIO_STALL		0x8040	/* there's a RIO/ZIO entry that hasn't been serviced */
 #define	ASYNC_RIO32_2_2200		0x8042	/* same as ASYNC_RIO32_2, but for 2100/2200 */
 #define	ASYNC_RCV_ERR			0x8048
 
 /*
  * Firmware Options. There are a lot of them.
  *
  * IFCOPTN - ISP Fibre Channel Option Word N
  */
 #define	IFCOPT1_EQFQASYNC	(1 << 13)	/* enable QFULL notification */
 #define	IFCOPT1_EAABSRCVD	(1 << 12)
 #define	IFCOPT1_RJTASYNC	(1 << 11)	/* enable 8018 notification */
 #define	IFCOPT1_ENAPURE		(1 << 10)
 #define	IFCOPT1_ENA8017		(1 << 7)
 #define	IFCOPT1_DISGPIO67	(1 << 6)
 #define	IFCOPT1_LIPLOSSIMM	(1 << 5)
 #define	IFCOPT1_DISF7SWTCH	(1 << 4)
 #define	IFCOPT1_CTIO_RETRY	(1 << 3)
 #define	IFCOPT1_LIPASYNC	(1 << 1)
 #define	IFCOPT1_LIPF8		(1 << 0)
 
 #define	IFCOPT2_LOOPBACK	(1 << 1)
 #define	IFCOPT2_ATIO3_ONLY	(1 << 0)
 
 #define	IFCOPT3_NOPRLI		(1 << 4)	/* disable automatic sending of PRLI on local loops */
 #define	IFCOPT3_RNDASYNC	(1 << 1)
 /*
  * 2.01.31 2200 Only. Need Bit 13 in Mailbox 1 for Set Firmware Options
  * mailbox command to enable this.
  */
 #define	ASYNC_QFULL_SENT		0x8049
 
 /*
  * Needs to be enabled
  */
 #define	ASYNC_AUTO_PLOGI_RJT		0x8018
 /*
  * 24XX only
  */
 #define	ASYNC_RJT_SENT			0x8049
 
 /*
  * All IOCB Queue entries are this size
  */
 #define	QENTRY_LEN			64
 
 /*
  * Command Structure Definitions
  */
 
 typedef struct {
 	uint32_t	ds_base;
 	uint32_t	ds_count;
 } ispds_t;
 
 typedef struct {
 	uint32_t	ds_base;
 	uint32_t	ds_basehi;
 	uint32_t	ds_count;
 } ispds64_t;
 
 #define	DSTYPE_32BIT	0
 #define	DSTYPE_64BIT	1
 typedef struct {
 	uint16_t	ds_type;	/* 0-> ispds_t, 1-> ispds64_t */
 	uint32_t	ds_segment;	/* unused */
 	uint32_t	ds_base;	/* 32 bit address of DSD list */
 } ispdslist_t;
 
 
 typedef struct {
 	uint8_t		rqs_entry_type;
 	uint8_t		rqs_entry_count;
 	uint8_t		rqs_seqno;
 	uint8_t		rqs_flags;
 } isphdr_t;
 
 /* RQS Flag definitions */
 #define	RQSFLAG_CONTINUATION	0x01
 #define	RQSFLAG_FULL		0x02
 #define	RQSFLAG_BADHEADER	0x04
 #define	RQSFLAG_BADPACKET	0x08
 #define	RQSFLAG_BADCOUNT	0x10
 #define	RQSFLAG_BADORDER	0x20
 #define	RQSFLAG_MASK		0x3f
 
 /* RQS entry_type definitions */
 #define	RQSTYPE_REQUEST		0x01
 #define	RQSTYPE_DATASEG		0x02
 #define	RQSTYPE_RESPONSE	0x03
 #define	RQSTYPE_MARKER		0x04
 #define	RQSTYPE_CMDONLY		0x05
 #define	RQSTYPE_ATIO		0x06	/* Target Mode */
 #define	RQSTYPE_CTIO		0x07	/* Target Mode */
 #define	RQSTYPE_SCAM		0x08
 #define	RQSTYPE_A64		0x09
 #define	RQSTYPE_A64_CONT	0x0a
 #define	RQSTYPE_ENABLE_LUN	0x0b	/* Target Mode */
 #define	RQSTYPE_MODIFY_LUN	0x0c	/* Target Mode */
 #define	RQSTYPE_NOTIFY		0x0d	/* Target Mode */
 #define	RQSTYPE_NOTIFY_ACK	0x0e	/* Target Mode */
 #define	RQSTYPE_CTIO1		0x0f	/* Target Mode */
 #define	RQSTYPE_STATUS_CONT	0x10
 #define	RQSTYPE_T2RQS		0x11
 #define	RQSTYPE_CTIO7		0x12
 #define	RQSTYPE_IP_XMIT		0x13
 #define	RQSTYPE_TSK_MGMT	0x14
 #define	RQSTYPE_T4RQS		0x15
 #define	RQSTYPE_ATIO2		0x16	/* Target Mode */
 #define	RQSTYPE_CTIO2		0x17	/* Target Mode */
 #define	RQSTYPE_T7RQS		0x18
 #define	RQSTYPE_T3RQS		0x19
 #define	RQSTYPE_IP_XMIT_64	0x1b
 #define	RQSTYPE_CTIO4		0x1e	/* Target Mode */
 #define	RQSTYPE_CTIO3		0x1f	/* Target Mode */
 #define	RQSTYPE_RIO1		0x21
 #define	RQSTYPE_RIO2		0x22
 #define	RQSTYPE_IP_RECV		0x23
 #define	RQSTYPE_IP_RECV_CONT	0x24
 #define	RQSTYPE_CT_PASSTHRU	0x29
 #define	RQSTYPE_MS_PASSTHRU	0x29
 #define	RQSTYPE_VP_CTRL		0x30	/* 24XX only */
 #define	RQSTYPE_VP_MODIFY	0x31	/* 24XX only */
 #define	RQSTYPE_RPT_ID_ACQ	0x32	/* 24XX only */
 #define	RQSTYPE_ABORT_IO	0x33
 #define	RQSTYPE_T6RQS		0x48
 #define	RQSTYPE_LOGIN		0x52
 #define	RQSTYPE_ABTS_RCVD	0x54	/* 24XX only */
 #define	RQSTYPE_ABTS_RSP	0x55	/* 24XX only */
 
 
 #define	ISP_RQDSEG	4
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint8_t		req_lun_trn;
 	uint8_t		req_target;
 	uint16_t	req_cdblen;
 	uint16_t	req_flags;
 	uint16_t	req_reserved;
 	uint16_t	req_time;
 	uint16_t	req_seg_count;
 	uint8_t		req_cdb[12];
 	ispds_t		req_dataseg[ISP_RQDSEG];
 } ispreq_t;
 #define	ISP_RQDSEG_A64	2
 
 typedef struct {
 	isphdr_t	mrk_header;
 	uint32_t	mrk_handle;
 	uint8_t		mrk_reserved0;
 	uint8_t		mrk_target;
 	uint16_t	mrk_modifier;
 	uint16_t	mrk_flags;
 	uint16_t	mrk_lun;
 	uint8_t		mrk_reserved1[48];
 } isp_marker_t;
 	
 typedef struct {
 	isphdr_t	mrk_header;
 	uint32_t	mrk_handle;
 	uint16_t	mrk_nphdl;
 	uint8_t		mrk_modifier;
 	uint8_t		mrk_reserved0;
 	uint8_t		mrk_reserved1;
 	uint8_t		mrk_vphdl;
 	uint16_t	mrk_reserved2;
 	uint8_t		mrk_lun[8];
 	uint8_t		mrk_reserved3[40];
 } isp_marker_24xx_t;
 	
 
 #define SYNC_DEVICE	0
 #define SYNC_TARGET	1
 #define SYNC_ALL	2
 #define SYNC_LIP	3
 
 #define	ISP_RQDSEG_T2		3
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint8_t		req_lun_trn;
 	uint8_t		req_target;
 	uint16_t	req_scclun;
 	uint16_t	req_flags;
 	uint8_t		req_crn;
 	uint8_t		req_reserved;
 	uint16_t	req_time;
 	uint16_t	req_seg_count;
 	uint8_t		req_cdb[16];
 	uint32_t	req_totalcnt;
 	ispds_t		req_dataseg[ISP_RQDSEG_T2];
 } ispreqt2_t;
 
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint16_t	req_target;
 	uint16_t	req_scclun;
 	uint16_t	req_flags;
 	uint16_t	req_reserved;
 	uint16_t	req_time;
 	uint16_t	req_seg_count;
 	uint8_t		req_cdb[16];
 	uint32_t	req_totalcnt;
 	ispds_t		req_dataseg[ISP_RQDSEG_T2];
 } ispreqt2e_t;
 
 #define	ISP_RQDSEG_T3		2
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint8_t		req_lun_trn;
 	uint8_t		req_target;
 	uint16_t	req_scclun;
 	uint16_t	req_flags;
 	uint8_t		req_crn;
 	uint8_t		req_reserved;
 	uint16_t	req_time;
 	uint16_t	req_seg_count;
 	uint8_t		req_cdb[16];
 	uint32_t	req_totalcnt;
 	ispds64_t	req_dataseg[ISP_RQDSEG_T3];
 } ispreqt3_t;
 #define	ispreq64_t	ispreqt3_t	/* same as.... */
 
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint16_t	req_target;
 	uint16_t	req_scclun;
 	uint16_t	req_flags;
 	uint8_t		req_crn;
 	uint8_t		req_reserved;
 	uint16_t	req_time;
 	uint16_t	req_seg_count;
 	uint8_t		req_cdb[16];
 	uint32_t	req_totalcnt;
 	ispds64_t	req_dataseg[ISP_RQDSEG_T3];
 } ispreqt3e_t;
 
 /* req_flag values */
 #define	REQFLAG_NODISCON	0x0001
 #define	REQFLAG_HTAG		0x0002
 #define	REQFLAG_OTAG		0x0004
 #define	REQFLAG_STAG		0x0008
 #define	REQFLAG_TARGET_RTN	0x0010
 
 #define	REQFLAG_NODATA		0x0000
 #define	REQFLAG_DATA_IN		0x0020
 #define	REQFLAG_DATA_OUT	0x0040
 #define	REQFLAG_DATA_UNKNOWN	0x0060
 
 #define	REQFLAG_DISARQ		0x0100
 #define	REQFLAG_FRC_ASYNC	0x0200
 #define	REQFLAG_FRC_SYNC	0x0400
 #define	REQFLAG_FRC_WIDE	0x0800
 #define	REQFLAG_NOPARITY	0x1000
 #define	REQFLAG_STOPQ		0x2000
 #define	REQFLAG_XTRASNS		0x4000
 #define	REQFLAG_PRIORITY	0x8000
 
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint8_t		req_lun_trn;
 	uint8_t		req_target;
 	uint16_t	req_cdblen;
 	uint16_t	req_flags;
 	uint16_t	req_reserved;
 	uint16_t	req_time;
 	uint16_t	req_seg_count;
 	uint8_t		req_cdb[44];
 } ispextreq_t;
 
 
 /*
  * ISP24XX structures
  */
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint16_t	req_nphdl;
 	uint16_t	req_time;
 	uint16_t	req_seg_count;
 	uint16_t	req_reserved;
 	uint8_t		req_lun[8];
 	uint8_t		req_alen_datadir;
 	uint8_t		req_task_management;
 	uint8_t		req_task_attribute;
 	uint8_t		req_crn;
 	uint8_t		req_cdb[16];
 	uint32_t	req_dl;
 	uint16_t	req_tidlo;
 	uint8_t		req_tidhi;
 	uint8_t		req_vpidx;
 	ispds64_t	req_dataseg;
 } ispreqt7_t;
 
 /* Task Management Request Function */
 typedef struct {
 	isphdr_t	tmf_header;
 	uint32_t	tmf_handle;
 	uint16_t	tmf_nphdl;
 	uint8_t		tmf_reserved0[2];
 	uint16_t	tmf_delay;
 	uint16_t	tmf_timeout;
 	uint8_t		tmf_lun[8];
 	uint32_t	tmf_flags;
 	uint8_t		tmf_reserved1[20];
 	uint16_t	tmf_tidlo;
 	uint8_t		tmf_tidhi;
 	uint8_t		tmf_vpidx;
 	uint8_t		tmf_reserved2[12];
 } isp24xx_tmf_t;
 
 #define	ISP24XX_TMF_NOSEND		0x80000000
 
 #define	ISP24XX_TMF_LUN_RESET		0x00000010
 #define	ISP24XX_TMF_ABORT_TASK_SET	0x00000008
 #define	ISP24XX_TMF_CLEAR_TASK_SET	0x00000004
 #define	ISP24XX_TMF_TARGET_RESET	0x00000002
 #define	ISP24XX_TMF_CLEAR_ACA		0x00000001
 
 /* I/O Abort Structure */
 typedef struct {
 	isphdr_t	abrt_header;
 	uint32_t	abrt_handle;
 	uint16_t	abrt_nphdl;
 	uint16_t	abrt_options;
 	uint32_t	abrt_cmd_handle;
 	uint16_t	abrt_queue_number;
 	uint8_t		abrt_reserved[30];
 	uint16_t	abrt_tidlo;
 	uint8_t		abrt_tidhi;
 	uint8_t		abrt_vpidx;
 	uint8_t		abrt_reserved1[12];
 } isp24xx_abrt_t;
 
 #define	ISP24XX_ABRT_NOSEND	0x01	/* don't actually send ABTS */
 #define	ISP24XX_ABRT_OKAY	0x00	/* in nphdl on return */
 #define	ISP24XX_ABRT_ENXIO	0x31	/* in nphdl on return */
 
 #define	ISP_CDSEG	7
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_reserved;
 	ispds_t		req_dataseg[ISP_CDSEG];
 } ispcontreq_t;
 
 #define	ISP_CDSEG64	5
 typedef struct {
 	isphdr_t	req_header;
 	ispds64_t	req_dataseg[ISP_CDSEG64];
 } ispcontreq64_t;
 
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint16_t	req_scsi_status;
 	uint16_t	req_completion_status;
 	uint16_t	req_state_flags;
 	uint16_t	req_status_flags;
 	uint16_t	req_time;
 #define	req_response_len	req_time	/* FC only */
 	uint16_t	req_sense_len;
 	uint32_t	req_resid;
 	uint8_t		req_response[8];	/* FC only */
 	uint8_t		req_sense_data[32];
 } ispstatusreq_t;
 
 /*
  * Status Continuation
  */
 typedef struct {
 	isphdr_t	req_header;
 	uint8_t		req_sense_data[60];
 } ispstatus_cont_t;
 
 /*
  * 24XX Type 0 status
  */
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handle;
 	uint16_t	req_completion_status;
 	uint16_t	req_oxid;
 	uint32_t	req_resid;
 	uint16_t	req_reserved0;
 	uint16_t	req_state_flags;
 	uint16_t	req_retry_delay;	/* aka Status Qualifier */
 	uint16_t	req_scsi_status;
 	uint32_t	req_fcp_residual;
 	uint32_t	req_sense_len;
 	uint32_t	req_response_len;
 	uint8_t		req_rsp_sense[28];
 } isp24xx_statusreq_t;
 
 /* 
  * For Qlogic 2X00, the high order byte of SCSI status has
  * additional meaning.
  */
 #define	RQCS_CR	0x1000	/* Confirmation Request */
 #define	RQCS_RU	0x0800	/* Residual Under */
 #define	RQCS_RO	0x0400	/* Residual Over */
 #define	RQCS_RESID	(RQCS_RU|RQCS_RO)
 #define	RQCS_SV	0x0200	/* Sense Length Valid */
 #define	RQCS_RV	0x0100	/* FCP Response Length Valid */
 
 /*
  * CT Passthru IOCB
  */
 typedef struct {
 	isphdr_t	ctp_header;
 	uint32_t	ctp_handle;
 	uint16_t	ctp_status;
 	uint16_t	ctp_nphdl;	/* n-port handle */
 	uint16_t	ctp_cmd_cnt;	/* Command DSD count */
 	uint8_t		ctp_vpidx;
 	uint8_t		ctp_reserved0;
 	uint16_t	ctp_time;
 	uint16_t	ctp_reserved1;
 	uint16_t	ctp_rsp_cnt;	/* Response DSD count */
 	uint16_t	ctp_reserved2[5];
 	uint32_t	ctp_rsp_bcnt;	/* Response byte count */
 	uint32_t	ctp_cmd_bcnt;	/* Command byte count */
 	ispds64_t	ctp_dataseg[2];
 } isp_ct_pt_t;
 
 /*
  * MS Passthru IOCB
  */
 typedef struct {
 	isphdr_t	ms_header;
 	uint32_t	ms_handle;
 	uint16_t	ms_nphdl;	/* handle in high byte for !2k f/w */
 	uint16_t	ms_status;
 	uint16_t	ms_flags;
 	uint16_t	ms_reserved1;	/* low 8 bits */
 	uint16_t	ms_time;
 	uint16_t	ms_cmd_cnt;	/* Command DSD count */
 	uint16_t	ms_tot_cnt;	/* Total DSD Count */
 	uint8_t		ms_type;	/* MS type */
 	uint8_t		ms_r_ctl;	/* R_CTL */
 	uint16_t	ms_rxid;	/* RX_ID */
 	uint16_t	ms_reserved2;
 	uint32_t	ms_handle2;
 	uint32_t	ms_rsp_bcnt;	/* Response byte count */
 	uint32_t	ms_cmd_bcnt;	/* Command byte count */
 	ispds64_t	ms_dataseg[2];
 } isp_ms_t;
 
 /* 
  * Completion Status Codes.
  */
 #define RQCS_COMPLETE			0x0000
 #define RQCS_DMA_ERROR			0x0002
 #define RQCS_RESET_OCCURRED		0x0004
 #define RQCS_ABORTED			0x0005
 #define RQCS_TIMEOUT			0x0006
 #define RQCS_DATA_OVERRUN		0x0007
 #define RQCS_DATA_UNDERRUN		0x0015
 #define	RQCS_QUEUE_FULL			0x001C
 
 /* 1X00 Only Completion Codes */
 #define RQCS_INCOMPLETE			0x0001
 #define RQCS_TRANSPORT_ERROR		0x0003
 #define RQCS_COMMAND_OVERRUN		0x0008
 #define RQCS_STATUS_OVERRUN		0x0009
 #define RQCS_BAD_MESSAGE		0x000a
 #define RQCS_NO_MESSAGE_OUT		0x000b
 #define RQCS_EXT_ID_FAILED		0x000c
 #define RQCS_IDE_MSG_FAILED		0x000d
 #define RQCS_ABORT_MSG_FAILED		0x000e
 #define RQCS_REJECT_MSG_FAILED		0x000f
 #define RQCS_NOP_MSG_FAILED		0x0010
 #define RQCS_PARITY_ERROR_MSG_FAILED	0x0011
 #define RQCS_DEVICE_RESET_MSG_FAILED	0x0012
 #define RQCS_ID_MSG_FAILED		0x0013
 #define RQCS_UNEXP_BUS_FREE		0x0014
 #define	RQCS_XACT_ERR1			0x0018
 #define	RQCS_XACT_ERR2			0x0019
 #define	RQCS_XACT_ERR3			0x001A
 #define	RQCS_BAD_ENTRY			0x001B
 #define	RQCS_PHASE_SKIPPED		0x001D
 #define	RQCS_ARQS_FAILED		0x001E
 #define	RQCS_WIDE_FAILED		0x001F
 #define	RQCS_SYNCXFER_FAILED		0x0020
 #define	RQCS_LVD_BUSERR			0x0021
 
 /* 2X00 Only Completion Codes */
 #define	RQCS_PORT_UNAVAILABLE		0x0028
 #define	RQCS_PORT_LOGGED_OUT		0x0029
 #define	RQCS_PORT_CHANGED		0x002A
 #define	RQCS_PORT_BUSY			0x002B
 
 /* 24XX Only Completion Codes */
 #define	RQCS_24XX_DRE			0x0011	/* data reassembly error */
 #define	RQCS_24XX_TABORT		0x0013	/* aborted by target */
 #define	RQCS_24XX_ENOMEM		0x002C	/* f/w resource unavailable */
 #define	RQCS_24XX_TMO			0x0030	/* task management overrun */
 
 
 /*
  * 1X00 specific State Flags 
  */
 #define RQSF_GOT_BUS			0x0100
 #define RQSF_GOT_TARGET			0x0200
 #define RQSF_SENT_CDB			0x0400
 #define RQSF_XFRD_DATA			0x0800
 #define RQSF_GOT_STATUS			0x1000
 #define RQSF_GOT_SENSE			0x2000
 #define	RQSF_XFER_COMPLETE		0x4000
 
 /*
  * 2X00 specific State Flags
  * (same as 1X00 except RQSF_GOT_BUS/RQSF_GOT_TARGET are not available)
  */
 #define	RQSF_DATA_IN			0x0020
 #define	RQSF_DATA_OUT			0x0040
 #define	RQSF_STAG			0x0008
 #define	RQSF_OTAG			0x0004
 #define	RQSF_HTAG			0x0002
 /*
  * 1X00 Status Flags
  */
 #define RQSTF_DISCONNECT		0x0001
 #define RQSTF_SYNCHRONOUS		0x0002
 #define RQSTF_PARITY_ERROR		0x0004
 #define RQSTF_BUS_RESET			0x0008
 #define RQSTF_DEVICE_RESET		0x0010
 #define RQSTF_ABORTED			0x0020
 #define RQSTF_TIMEOUT			0x0040
 #define RQSTF_NEGOTIATION		0x0080
 
 /*
  * 2X00 specific state flags
  */
 /* RQSF_SENT_CDB	*/
 /* RQSF_XFRD_DATA	*/
 /* RQSF_GOT_STATUS	*/
 /* RQSF_XFER_COMPLETE	*/
 
 /*
  * 2X00 specific status flags
  */
 /* RQSTF_ABORTED */
 /* RQSTF_TIMEOUT */
 #define	RQSTF_DMA_ERROR			0x0080
 #define	RQSTF_LOGOUT			0x2000
 
 /*
  * Miscellaneous
  */
 #ifndef	ISP_EXEC_THROTTLE
 #define	ISP_EXEC_THROTTLE	16
 #endif
 
 /*
  * About Firmware returns an 'attribute' word in mailbox 6.
  * These attributes are for 2200 and 2300.
  */
 #define	ISP_FW_ATTR_TMODE	0x0001
 #define	ISP_FW_ATTR_SCCLUN	0x0002
 #define	ISP_FW_ATTR_FABRIC	0x0004
 #define	ISP_FW_ATTR_CLASS2	0x0008
 #define	ISP_FW_ATTR_FCTAPE	0x0010
 #define	ISP_FW_ATTR_IP		0x0020
 #define	ISP_FW_ATTR_VI		0x0040
 #define	ISP_FW_ATTR_VI_SOLARIS	0x0080
 #define	ISP_FW_ATTR_2KLOGINS	0x0100	/* just a guess... */
 
 /* and these are for the 2400 */
 #define	ISP2400_FW_ATTR_CLASS2	0x0001
 #define	ISP2400_FW_ATTR_IP	0x0002
 #define	ISP2400_FW_ATTR_MULTIID	0x0004
 #define	ISP2400_FW_ATTR_SB2	0x0008
 #define	ISP2400_FW_ATTR_T10CRC	0x0010
 #define	ISP2400_FW_ATTR_VI	0x0020
 #define	ISP2400_FW_ATTR_MQ	0x0040
 #define	ISP2400_FW_ATTR_MSIX	0x0080
 #define	ISP2400_FW_ATTR_FCOE	0x0800
 #define	ISP2400_FW_ATTR_VP0	0x1000
 #define	ISP2400_FW_ATTR_EXPFW	0x2000
 #define	ISP2400_FW_ATTR_HOTFW	0x4000
 #define	ISP2400_FW_ATTR_EXTNDED	0x8000
 #define	ISP2400_FW_ATTR_EXTVP	0x00010000
 #define	ISP2400_FW_ATTR_VN2VN	0x00040000
 #define	ISP2400_FW_ATTR_EXMOFF	0x00080000
 #define	ISP2400_FW_ATTR_NPMOFF	0x00100000
 #define	ISP2400_FW_ATTR_DIFCHOP	0x00400000
 #define	ISP2400_FW_ATTR_SRIOV	0x02000000
 #define	ISP2400_FW_ATTR_ASICTMP	0x0200000000
 #define	ISP2400_FW_ATTR_ATIOMQ	0x0400000000
 
 /*
  * These are either manifestly true or are dependent on f/w attributes
  */
 #define	ISP_CAP_TMODE(isp)	\
 	(IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_TMODE))
 #define	ISP_CAP_SCCFW(isp)	\
 	(IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_SCCLUN))
 #define	ISP_CAP_2KLOGIN(isp)	\
 	(IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_2KLOGINS))
 
 /*
  * This is only true for 24XX cards with this f/w attribute
  */
 #define	ISP_CAP_MULTI_ID(isp)	\
 	(IS_24XX(isp)? (isp->isp_fwattr & ISP2400_FW_ATTR_MULTIID) : 0)
 #define	ISP_GET_VPIDX(isp, tag) \
 	(ISP_CAP_MULTI_ID(isp) ? tag : 0)
 #define	ISP_CAP_VP0(isp)	\
 	(IS_24XX(isp)? (isp->isp_fwattr & ISP2400_FW_ATTR_VP0) : 0)
 
 /*
  * This is true manifestly or is dependent on a f/w attribute
  * but may or may not actually be *enabled*. In any case, it
  * is enabled on a per-channel basis.
  */
 #define	ISP_CAP_FCTAPE(isp)	\
 	(IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_FCTAPE))
 
 #define	ISP_FCTAPE_ENABLED(isp, chan)	\
 	(IS_24XX(isp)? (FCPARAM(isp, chan)->isp_xfwoptions & ICB2400_OPT2_FCTAPE) != 0 : (FCPARAM(isp, chan)->isp_xfwoptions & ICBXOPT_FCTAPE) != 0)
 
 /*
  * Reduced Interrupt Operation Response Queue Entries
  */
 
 typedef struct {
 	isphdr_t	req_header;
 	uint32_t	req_handles[15];
 } isp_rio1_t;
 
 typedef struct {
 	isphdr_t	req_header;
 	uint16_t	req_handles[30];
 } isp_rio2_t;
 
 /*
  * FC (ISP2100/ISP2200/ISP2300/ISP2400) specific data structures
  */
 
 /*
  * Initialization Control Block
  *
  * Version One (prime) format.
  */
 typedef struct {
 	uint8_t		icb_version;
 	uint8_t		icb_reserved0;
 	uint16_t	icb_fwoptions;
 	uint16_t	icb_maxfrmlen;
 	uint16_t	icb_maxalloc;
 	uint16_t	icb_execthrottle;
 	uint8_t		icb_retry_count;
 	uint8_t		icb_retry_delay;
 	uint8_t		icb_portname[8];
 	uint16_t	icb_hardaddr;
 	uint8_t		icb_iqdevtype;
 	uint8_t		icb_logintime;
 	uint8_t		icb_nodename[8];
 	uint16_t	icb_rqstout;
 	uint16_t	icb_rspnsin;
 	uint16_t	icb_rqstqlen;
 	uint16_t	icb_rsltqlen;
 	uint16_t	icb_rqstaddr[4];
 	uint16_t	icb_respaddr[4];
 	uint16_t	icb_lunenables;
 	uint8_t		icb_ccnt;
 	uint8_t		icb_icnt;
 	uint16_t	icb_lunetimeout;
 	uint16_t	icb_reserved1;
 	uint16_t	icb_xfwoptions;
 	uint8_t		icb_racctimer;
 	uint8_t		icb_idelaytimer;
 	uint16_t	icb_zfwoptions;
 	uint16_t	icb_reserved2[13];
 } isp_icb_t;
 
 #define	ICB_VERSION1	1
 
 #define	ICBOPT_EXTENDED		0x8000
 #define	ICBOPT_BOTH_WWNS	0x4000
 #define	ICBOPT_FULL_LOGIN	0x2000
 #define	ICBOPT_STOP_ON_QFULL	0x1000	/* 2200/2100 only */
 #define	ICBOPT_PREV_ADDRESS	0x0800
 #define	ICBOPT_SRCHDOWN		0x0400
 #define	ICBOPT_NOLIP		0x0200
 #define	ICBOPT_PDBCHANGE_AE	0x0100
 #define	ICBOPT_TGT_TYPE		0x0080
 #define	ICBOPT_INI_ADISC	0x0040
 #define	ICBOPT_INI_DISABLE	0x0020
 #define	ICBOPT_TGT_ENABLE	0x0010
 #define	ICBOPT_FAST_POST	0x0008
 #define	ICBOPT_FULL_DUPLEX	0x0004
 #define	ICBOPT_FAIRNESS		0x0002
 #define	ICBOPT_HARD_ADDRESS	0x0001
 
 #define	ICBXOPT_NO_LOGOUT	0x8000	/* no logout on link failure */
 #define	ICBXOPT_FCTAPE_CCQ	0x4000	/* FC-Tape Command Queueing */
 #define	ICBXOPT_FCTAPE_CONFIRM	0x2000
 #define	ICBXOPT_FCTAPE		0x1000
 #define	ICBXOPT_CLASS2_ACK0	0x0200
 #define	ICBXOPT_CLASS2		0x0100
 #define	ICBXOPT_NO_PLAY		0x0080	/* don't play if can't get hard addr */
 #define	ICBXOPT_TOPO_MASK	0x0070
 #define	ICBXOPT_LOOP_ONLY	0x0000
 #define	ICBXOPT_PTP_ONLY	0x0010
 #define	ICBXOPT_LOOP_2_PTP	0x0020
 #define	ICBXOPT_PTP_2_LOOP	0x0030
 /*
  * The lower 4 bits of the xfwoptions field are the OPERATION MODE bits.
  * RIO is not defined for the 23XX cards (just 2200)
  */
 #define	ICBXOPT_RIO_OFF		0
 #define	ICBXOPT_RIO_16BIT	1
 #define	ICBXOPT_RIO_32BIT	2
 #define	ICBXOPT_RIO_16BIT_IOCB	3
 #define	ICBXOPT_RIO_32BIT_IOCB	4
 #define	ICBXOPT_ZIO		5	
 #define	ICBXOPT_TIMER_MASK	0x7
 
 #define	ICBZOPT_RATE_MASK	0xC000
 #define	ICBZOPT_RATE_ONEGB	0x0000
 #define	ICBZOPT_RATE_AUTO	0x8000
 #define	ICBZOPT_RATE_TWOGB	0x4000
 #define	ICBZOPT_50_OHM		0x2000
 #define	ICBZOPT_ENA_OOF		0x0040	/* out of order frame handling */
 #define	ICBZOPT_RSPSZ_MASK	0x0030
 #define	ICBZOPT_RSPSZ_24	0x0000
 #define	ICBZOPT_RSPSZ_12	0x0010
 #define	ICBZOPT_RSPSZ_24A	0x0020
 #define	ICBZOPT_RSPSZ_32	0x0030
 #define	ICBZOPT_SOFTID		0x0002
 #define	ICBZOPT_ENA_RDXFR_RDY	0x0001
 
 /* 2400 F/W options */
 #define	ICB2400_OPT1_BOTH_WWNS		0x00004000
 #define	ICB2400_OPT1_FULL_LOGIN		0x00002000
 #define	ICB2400_OPT1_PREV_ADDRESS	0x00000800
 #define	ICB2400_OPT1_SRCHDOWN		0x00000400
 #define	ICB2400_OPT1_NOLIP		0x00000200
 #define	ICB2400_OPT1_INI_DISABLE	0x00000020
 #define	ICB2400_OPT1_TGT_ENABLE		0x00000010
 #define	ICB2400_OPT1_FULL_DUPLEX	0x00000004
 #define	ICB2400_OPT1_FAIRNESS		0x00000002
 #define	ICB2400_OPT1_HARD_ADDRESS	0x00000001
 
 #define	ICB2400_OPT2_ENA_ATIOMQ		0x08000000
 #define	ICB2400_OPT2_ENA_IHA		0x04000000
 #define	ICB2400_OPT2_QOS		0x02000000
 #define	ICB2400_OPT2_IOCBS		0x01000000
 #define	ICB2400_OPT2_ENA_IHR		0x00400000
 #define	ICB2400_OPT2_ENA_VMS		0x00200000
 #define	ICB2400_OPT2_ENA_TA		0x00100000
 #define	ICB2400_OPT2_TPRLIC		0x00004000
 #define	ICB2400_OPT2_FCTAPE		0x00001000
 #define	ICB2400_OPT2_FCSP		0x00000800
 #define	ICB2400_OPT2_CLASS2_ACK0	0x00000200
 #define	ICB2400_OPT2_CLASS2		0x00000100
 #define	ICB2400_OPT2_NO_PLAY		0x00000080
 #define	ICB2400_OPT2_TOPO_MASK		0x00000070
 #define	ICB2400_OPT2_LOOP_ONLY		0x00000000
 #define	ICB2400_OPT2_PTP_ONLY		0x00000010
 #define	ICB2400_OPT2_LOOP_2_PTP		0x00000020
 #define	ICB2400_OPT2_TIMER_MASK		0x0000000f
 #define	ICB2400_OPT2_ZIO		0x00000005
 #define	ICB2400_OPT2_ZIO1		0x00000006
 
 #define	ICB2400_OPT3_NO_CTXDIS		0x40000000
 #define	ICB2400_OPT3_ENA_ETH_RESP	0x08000000
 #define	ICB2400_OPT3_ENA_ETH_ATIO	0x04000000
 #define	ICB2400_OPT3_ENA_MFCF		0x00020000
 #define	ICB2400_OPT3_SKIP_FOURGB	0x00010000
 #define	ICB2400_OPT3_RATE_MASK		0x0000E000
 #define	ICB2400_OPT3_RATE_ONEGB		0x00000000
 #define	ICB2400_OPT3_RATE_TWOGB		0x00002000
 #define	ICB2400_OPT3_RATE_AUTO		0x00004000
 #define	ICB2400_OPT3_RATE_FOURGB	0x00006000
 #define	ICB2400_OPT3_RATE_EIGHTGB	0x00008000
 #define	ICB2400_OPT3_RATE_SIXTEENGB	0x0000A000
 #define	ICB2400_OPT3_ENA_OOF_XFRDY	0x00000200
 #define	ICB2400_OPT3_NO_N2N_LOGI	0x00000100
 #define	ICB2400_OPT3_NO_LOCAL_PLOGI	0x00000080
 #define	ICB2400_OPT3_ENA_OOF		0x00000040
 /* note that a response size flag of zero is reserved! */
 #define	ICB2400_OPT3_RSPSZ_MASK		0x00000030
 #define	ICB2400_OPT3_RSPSZ_12		0x00000010
 #define	ICB2400_OPT3_RSPSZ_24		0x00000020
 #define	ICB2400_OPT3_RSPSZ_32		0x00000030
 #define	ICB2400_OPT3_SOFTID		0x00000002
 
 #define	ICB_MIN_FRMLEN		256
 #define	ICB_MAX_FRMLEN		2112
 #define	ICB_DFLT_FRMLEN		1024
 #define	ICB_DFLT_ALLOC		256
 #define	ICB_DFLT_THROTTLE	16
 #define	ICB_DFLT_RDELAY		5
 #define	ICB_DFLT_RCOUNT		3
 
 #define	ICB_LOGIN_TOV		30
 #define	ICB_LUN_ENABLE_TOV	15
 
 
 /*
  * And somebody at QLogic had a great idea that you could just change
  * the structure *and* keep the version number the same as the other cards.
  */
 typedef struct {
 	uint16_t	icb_version;
 	uint16_t	icb_reserved0;
 	uint16_t	icb_maxfrmlen;
 	uint16_t	icb_execthrottle;
 	uint16_t	icb_xchgcnt;
 	uint16_t	icb_hardaddr;
 	uint8_t		icb_portname[8];
 	uint8_t		icb_nodename[8];
 	uint16_t	icb_rspnsin;
 	uint16_t	icb_rqstout;
 	uint16_t	icb_retry_count;
 	uint16_t	icb_priout;
 	uint16_t	icb_rsltqlen;
 	uint16_t	icb_rqstqlen;
 	uint16_t	icb_ldn_nols;
 	uint16_t	icb_prqstqlen;
 	uint16_t	icb_rqstaddr[4];
 	uint16_t	icb_respaddr[4];
 	uint16_t	icb_priaddr[4];
 	uint16_t	icb_msixresp;
 	uint16_t	icb_msixatio;
 	uint16_t	icb_reserved1[2];
 	uint16_t	icb_atio_in;
 	uint16_t	icb_atioqlen;
 	uint16_t	icb_atioqaddr[4];
 	uint16_t	icb_idelaytimer;
 	uint16_t	icb_logintime;
 	uint32_t	icb_fwoptions1;
 	uint32_t	icb_fwoptions2;
 	uint32_t	icb_fwoptions3;
 	uint16_t	icb_qos;
 	uint16_t	icb_reserved2[3];
 	uint16_t	icb_enodemac[3];
 	uint16_t	icb_disctime;
 	uint16_t	icb_reserved3[4];
 } isp_icb_2400_t;
 
 #define	RQRSP_ADDR0015	0
 #define	RQRSP_ADDR1631	1
 #define	RQRSP_ADDR3247	2
 #define	RQRSP_ADDR4863	3
 
 
 #define	ICB_NNM0	7
 #define	ICB_NNM1	6
 #define	ICB_NNM2	5
 #define	ICB_NNM3	4
 #define	ICB_NNM4	3
 #define	ICB_NNM5	2
 #define	ICB_NNM6	1
 #define	ICB_NNM7	0
 
 #define	MAKE_NODE_NAME_FROM_WWN(array, wwn)	\
 	array[ICB_NNM0] = (uint8_t) ((wwn >>  0) & 0xff), \
 	array[ICB_NNM1] = (uint8_t) ((wwn >>  8) & 0xff), \
 	array[ICB_NNM2] = (uint8_t) ((wwn >> 16) & 0xff), \
 	array[ICB_NNM3] = (uint8_t) ((wwn >> 24) & 0xff), \
 	array[ICB_NNM4] = (uint8_t) ((wwn >> 32) & 0xff), \
 	array[ICB_NNM5] = (uint8_t) ((wwn >> 40) & 0xff), \
 	array[ICB_NNM6] = (uint8_t) ((wwn >> 48) & 0xff), \
 	array[ICB_NNM7] = (uint8_t) ((wwn >> 56) & 0xff)
 
 #define	MAKE_WWN_FROM_NODE_NAME(wwn, array)	\
 	wwn =	((uint64_t) array[ICB_NNM0]) | \
 		((uint64_t) array[ICB_NNM1] <<  8) | \
 		((uint64_t) array[ICB_NNM2] << 16) | \
 		((uint64_t) array[ICB_NNM3] << 24) | \
 		((uint64_t) array[ICB_NNM4] << 32) | \
 		((uint64_t) array[ICB_NNM5] << 40) | \
 		((uint64_t) array[ICB_NNM6] << 48) | \
 		((uint64_t) array[ICB_NNM7] << 56)
 
 
 /*
  * For MULTI_ID firmware, this describes a
  * virtual port entity for getting status.
  */
 typedef struct {
 	uint16_t	vp_port_status;
 	uint8_t		vp_port_options;
 	uint8_t		vp_port_loopid;
 	uint8_t		vp_port_portname[8];
 	uint8_t		vp_port_nodename[8];
 	uint16_t	vp_port_portid_lo;	/* not present when trailing icb */
 	uint16_t	vp_port_portid_hi;	/* not present when trailing icb */
 } vp_port_info_t;
 
 #define	ICB2400_VPOPT_ENA_SNSLOGIN	0x00000040	/* Enable SNS Login and SCR for Virtual Ports */
 #define	ICB2400_VPOPT_TGT_DISABLE	0x00000020	/* Target Mode Disabled */
 #define	ICB2400_VPOPT_INI_ENABLE	0x00000010	/* Initiator Mode Enabled */
 #define	ICB2400_VPOPT_ENABLED		0x00000008	/* VP Enabled */
 #define	ICB2400_VPOPT_NOPLAY		0x00000004	/* ID Not Acquired */
 #define	ICB2400_VPOPT_PREV_ADDRESS	0x00000002	/* Previously Assigned ID */
 #define	ICB2400_VPOPT_HARD_ADDRESS	0x00000001	/* Hard Assigned ID */
 
 #define	ICB2400_VPOPT_WRITE_SIZE	20
 
 /*
  * For MULTI_ID firmware, we append this structure
  * to the isp_icb_2400_t above, followed by a list
  * structures that are *most* of the vp_port_info_t.
  */
 typedef struct {
 	uint16_t	vp_count;
 	uint16_t	vp_global_options;
 } isp_icb_2400_vpinfo_t;
 
 #define	ICB2400_VPINFO_OFF	0x80	/* offset from start of ICB */
 #define	ICB2400_VPINFO_PORT_OFF(chan)		\
     (ICB2400_VPINFO_OFF + 			\
      sizeof (isp_icb_2400_vpinfo_t) + (chan * ICB2400_VPOPT_WRITE_SIZE))
 
 #define	ICB2400_VPGOPT_FCA		0x01	/* Assume Clean Address bit in FLOGI ACC set (works only in static configurations) */
 #define	ICB2400_VPGOPT_MID_DISABLE	0x02	/* when set, connection mode2 will work with NPIV-capable switched */
 #define	ICB2400_VPGOPT_VP0_DECOUPLE	0x04	/* Allow VP0 decoupling if firmware supports it */
 #define	ICB2400_VPGOPT_SUSP_FDISK	0x10	/* Suspend FDISC for Enabled VPs */
 #define	ICB2400_VPGOPT_GEN_RIDA		0x20	/* Generate RIDA if FLOGI Fails */
 
 typedef struct {
 	isphdr_t	vp_ctrl_hdr;
 	uint32_t	vp_ctrl_handle;
 	uint16_t	vp_ctrl_index_fail;
 	uint16_t	vp_ctrl_status;
 	uint16_t	vp_ctrl_command;
 	uint16_t	vp_ctrl_vp_count;
 	uint16_t	vp_ctrl_idmap[16];
 	uint16_t	vp_ctrl_reserved[7];
 	uint16_t	vp_ctrl_fcf_index;
 } vp_ctrl_info_t;
 
 #define	VP_CTRL_CMD_ENABLE_VP			0x00
 #define	VP_CTRL_CMD_DISABLE_VP			0x08
 #define	VP_CTRL_CMD_DISABLE_VP_REINIT_LINK	0x09
 #define	VP_CTRL_CMD_DISABLE_VP_LOGO		0x0A
 #define	VP_CTRL_CMD_DISABLE_VP_LOGO_ALL		0x0B
 
 /*
  * We can use this structure for modifying either one or two VP ports after initialization
  */
 typedef struct {
 	isphdr_t	vp_mod_hdr;
 	uint32_t	vp_mod_hdl;
 	uint16_t	vp_mod_reserved0;
 	uint16_t	vp_mod_status;
 	uint8_t		vp_mod_cmd;
 	uint8_t		vp_mod_cnt;
 	uint8_t		vp_mod_idx0;
 	uint8_t		vp_mod_idx1;
 	struct {
 		uint8_t		options;
 		uint8_t		loopid;
 		uint16_t	reserved1;
 		uint8_t		wwpn[8];
 		uint8_t		wwnn[8];
 	} vp_mod_ports[2];
 	uint8_t		vp_mod_reserved2[8];
 } vp_modify_t;
 
 #define	VP_STS_OK	0x00
 #define	VP_STS_ERR	0x01
 #define	VP_CNT_ERR	0x02
 #define	VP_GEN_ERR	0x03
 #define	VP_IDX_ERR	0x04
 #define	VP_STS_BSY	0x05
 
 #define	VP_MODIFY	0x00
 #define	VP_MODIFY_ENA	0x01
 #define	VP_MODIFY_OPT	0x02
 #define	VP_RESUME	0x03
 
 /*
  * Port Data Base Element
  */
 
 typedef struct {
 	uint16_t	pdb_options;
 	uint8_t		pdb_mstate;
 	uint8_t		pdb_sstate;
 	uint8_t		pdb_hardaddr_bits[4];
 	uint8_t		pdb_portid_bits[4];
 	uint8_t		pdb_nodename[8];
 	uint8_t		pdb_portname[8];
 	uint16_t	pdb_execthrottle;
 	uint16_t	pdb_exec_count;
 	uint8_t		pdb_retry_count;
 	uint8_t		pdb_retry_delay;
 	uint16_t	pdb_resalloc;
 	uint16_t	pdb_curalloc;
 	uint16_t	pdb_qhead;
 	uint16_t	pdb_qtail;
 	uint16_t	pdb_tl_next;
 	uint16_t	pdb_tl_last;
 	uint16_t	pdb_features;	/* PLOGI, Common Service */
 	uint16_t	pdb_pconcurrnt;	/* PLOGI, Common Service */
 	uint16_t	pdb_roi;	/* PLOGI, Common Service */
 	uint8_t		pdb_target;
 	uint8_t		pdb_initiator;	/* PLOGI, Class 3 Control Flags */
 	uint16_t	pdb_rdsiz;	/* PLOGI, Class 3 */
 	uint16_t	pdb_ncseq;	/* PLOGI, Class 3 */
 	uint16_t	pdb_noseq;	/* PLOGI, Class 3 */
 	uint16_t	pdb_labrtflg;
 	uint16_t	pdb_lstopflg;
 	uint16_t	pdb_sqhead;
 	uint16_t	pdb_sqtail;
 	uint16_t	pdb_ptimer;
 	uint16_t	pdb_nxt_seqid;
 	uint16_t	pdb_fcount;
 	uint16_t	pdb_prli_len;
 	uint16_t	pdb_prli_svc0;
 	uint16_t	pdb_prli_svc3;
 	uint16_t	pdb_loopid;
 	uint16_t	pdb_il_ptr;
 	uint16_t	pdb_sl_ptr;
 } isp_pdb_21xx_t;
 
 #define	PDB_OPTIONS_XMITTING	(1<<11)
 #define	PDB_OPTIONS_LNKXMIT	(1<<10)
 #define	PDB_OPTIONS_ABORTED	(1<<9)
 #define	PDB_OPTIONS_ADISC	(1<<1)
 
 #define	PDB_STATE_DISCOVERY	0
 #define	PDB_STATE_WDISC_ACK	1
 #define	PDB_STATE_PLOGI		2
 #define	PDB_STATE_PLOGI_ACK	3
 #define	PDB_STATE_PRLI		4
 #define	PDB_STATE_PRLI_ACK	5
 #define	PDB_STATE_LOGGED_IN	6
 #define	PDB_STATE_PORT_UNAVAIL	7
 #define	PDB_STATE_PRLO		8
 #define	PDB_STATE_PRLO_ACK	9
 #define	PDB_STATE_PLOGO		10
 #define	PDB_STATE_PLOG_ACK	11
 
 #define	SVC3_ROLE_MASK		0x30
 #define	SVC3_ROLE_SHIFT		4
 
 #define	BITS2WORD(x)		((x)[0] << 16 | (x)[3] << 8 | (x)[2])
 #define	BITS2WORD_24XX(x)	((x)[0] << 16 | (x)[1] << 8 | (x)[2])
 
 /*
  * Port Data Base Element- 24XX cards
  */
 typedef struct {
 	uint16_t	pdb_flags;
 	uint8_t		pdb_curstate;
 	uint8_t		pdb_laststate;
 	uint8_t		pdb_hardaddr_bits[4];
 	uint8_t		pdb_portid_bits[4];
 #define		pdb_nxt_seqid_2400	pdb_portid_bits[3]
 	uint16_t	pdb_retry_timer;
 	uint16_t	pdb_handle;
 	uint16_t	pdb_rcv_dsize;
 	uint16_t	pdb_reserved0;
 	uint16_t	pdb_prli_svc0;
 	uint16_t	pdb_prli_svc3;
 	uint8_t		pdb_portname[8];
 	uint8_t		pdb_nodename[8];
 	uint8_t		pdb_reserved1[24];
 } isp_pdb_24xx_t;
 
 #define	PDB2400_TID_SUPPORTED	0x4000
 #define	PDB2400_FC_TAPE		0x0080
 #define	PDB2400_CLASS2_ACK0	0x0040
 #define	PDB2400_FCP_CONF	0x0020
 #define	PDB2400_CLASS2		0x0010
 #define	PDB2400_ADDR_VALID	0x0002
 
 #define	PDB2400_STATE_PLOGI_PEND	0x03
 #define	PDB2400_STATE_PLOGI_DONE	0x04
 #define	PDB2400_STATE_PRLI_PEND		0x05
 #define	PDB2400_STATE_LOGGED_IN		0x06
 #define	PDB2400_STATE_PORT_UNAVAIL	0x07
 #define	PDB2400_STATE_PRLO_PEND		0x09
 #define	PDB2400_STATE_LOGO_PEND		0x0B
 
 /*
  * Common elements from the above two structures that are actually useful to us.
  */
 typedef struct {
 	uint16_t	handle;
 	uint16_t	prli_word3;
 	uint32_t		: 8,
 			portid	: 24;
 	uint8_t		portname[8];
 	uint8_t		nodename[8];
 } isp_pdb_t;
 
 /*
  * Port/Node Name List Element
  */
 typedef struct {
 	uint8_t		pnnle_name[8];
 	uint16_t	pnnle_handle;
 	uint16_t	pnnle_reserved;
 } isp_pnnle_t;
 
 #define	PNNL_OPTIONS_NODE_NAMES	(1<<0)
 #define	PNNL_OPTIONS_PORT_DATA	(1<<2)
 #define	PNNL_OPTIONS_INITIATORS	(1<<3)
 
 /*
  * Port and N-Port Handle List Element
  */
 typedef struct {
 	uint16_t	pnhle_port_id_lo;
 	uint16_t	pnhle_port_id_hi_handle;
 } isp_pnhle_21xx_t;
 
 typedef struct {
 	uint16_t	pnhle_port_id_lo;
 	uint16_t	pnhle_port_id_hi;
 	uint16_t	pnhle_handle;
 } isp_pnhle_23xx_t;
 
 typedef struct {
 	uint16_t	pnhle_port_id_lo;
 	uint16_t	pnhle_port_id_hi;
 	uint16_t	pnhle_handle;
 	uint16_t	pnhle_reserved;
 } isp_pnhle_24xx_t;
 
 /*
  * Port Database Changed Async Event information for 24XX cards
  */
 #define	PDB24XX_AE_OK		0x00
 #define	PDB24XX_AE_IMPL_LOGO_1	0x01
 #define	PDB24XX_AE_IMPL_LOGO_2	0x02
 #define	PDB24XX_AE_IMPL_LOGO_3	0x03
 #define	PDB24XX_AE_PLOGI_RCVD	0x04
 #define	PDB24XX_AE_PLOGI_RJT	0x05
 #define	PDB24XX_AE_PRLI_RCVD	0x06
 #define	PDB24XX_AE_PRLI_RJT	0x07
 #define	PDB24XX_AE_TPRLO	0x08
 #define	PDB24XX_AE_TPRLO_RJT	0x09
 #define	PDB24XX_AE_PRLO_RCVD	0x0a
 #define	PDB24XX_AE_LOGO_RCVD	0x0b
 #define	PDB24XX_AE_TOPO_CHG	0x0c
 #define	PDB24XX_AE_NPORT_CHG	0x0d
 #define	PDB24XX_AE_FLOGI_RJT	0x0e
 #define	PDB24XX_AE_BAD_FANN	0x0f
 #define	PDB24XX_AE_FLOGI_TIMO	0x10
 #define	PDB24XX_AE_ABX_LOGO	0x11
 #define	PDB24XX_AE_PLOGI_DONE	0x12
 #define	PDB24XX_AE_PRLI_DONJE	0x13
 #define	PDB24XX_AE_OPN_1	0x14
 #define	PDB24XX_AE_OPN_2	0x15
 #define	PDB24XX_AE_TXERR	0x16
 #define	PDB24XX_AE_FORCED_LOGO	0x17
 #define	PDB24XX_AE_DISC_TIMO	0x18
 
 /*
  * Genericized Port Login/Logout software structure
  */
 typedef struct {
 	uint16_t	handle;
 	uint16_t	channel;
 	uint32_t
 		flags	: 8,
 		portid	: 24;
 } isp_plcmd_t;
 /* the flags to use are those for PLOGX_FLG_* below */
 
 /*
  * ISP24XX- Login/Logout Port IOCB
  */
 typedef struct {
 	isphdr_t	plogx_header;
 	uint32_t	plogx_handle;
 	uint16_t	plogx_status;
 	uint16_t	plogx_nphdl;
 	uint16_t	plogx_flags;
 	uint16_t	plogx_vphdl;		/* low 8 bits */
 	uint16_t	plogx_portlo;		/* low 16 bits */
 	uint16_t	plogx_rspsz_porthi;
 	struct {
 		uint16_t	lo16;
 		uint16_t	hi16;
 	} plogx_ioparm[11];
 } isp_plogx_t;
 
 #define	PLOGX_STATUS_OK		0x00
 #define	PLOGX_STATUS_UNAVAIL	0x28
 #define	PLOGX_STATUS_LOGOUT	0x29
 #define	PLOGX_STATUS_IOCBERR	0x31
 
 #define	PLOGX_IOCBERR_NOLINK	0x01
 #define	PLOGX_IOCBERR_NOIOCB	0x02
 #define	PLOGX_IOCBERR_NOXGHG	0x03
 #define	PLOGX_IOCBERR_FAILED	0x04	/* further info in IOPARM 1 */
 #define	PLOGX_IOCBERR_NOFABRIC	0x05
 #define	PLOGX_IOCBERR_NOTREADY	0x07
 #define	PLOGX_IOCBERR_NOLOGIN	0x09	/* further info in IOPARM 1 */
 #define	PLOGX_IOCBERR_NOPCB	0x0a
 #define	PLOGX_IOCBERR_REJECT	0x18	/* further info in IOPARM 1 */
 #define	PLOGX_IOCBERR_EINVAL	0x19	/* further info in IOPARM 1 */
 #define	PLOGX_IOCBERR_PORTUSED	0x1a	/* further info in IOPARM 1 */
 #define	PLOGX_IOCBERR_HNDLUSED	0x1b	/* further info in IOPARM 1 */
 #define	PLOGX_IOCBERR_NOHANDLE	0x1c
 #define	PLOGX_IOCBERR_NOFLOGI	0x1f	/* further info in IOPARM 1 */
 
 #define	PLOGX_FLG_CMD_MASK	0xf
 #define	PLOGX_FLG_CMD_PLOGI	0
 #define	PLOGX_FLG_CMD_PRLI	1
 #define	PLOGX_FLG_CMD_PDISC	2
 #define	PLOGX_FLG_CMD_LOGO	8
 #define	PLOGX_FLG_CMD_PRLO	9
 #define	PLOGX_FLG_CMD_TPRLO	10
 
 #define	PLOGX_FLG_COND_PLOGI		0x10	/* if with PLOGI */
 #define	PLOGX_FLG_IMPLICIT		0x10	/* if with LOGO, PRLO, TPRLO */
 #define	PLOGX_FLG_SKIP_PRLI		0x20	/* if with PLOGI */
 #define	PLOGX_FLG_IMPLICIT_LOGO_ALL	0x20	/* if with LOGO */
 #define	PLOGX_FLG_EXPLICIT_LOGO		0x40	/* if with LOGO */
 #define	PLOGX_FLG_COMMON_FEATURES	0x80	/* if with PLOGI */
 #define	PLOGX_FLG_FREE_NPHDL		0x80	/* if with with LOGO */
 
 #define	PLOGX_FLG_CLASS2		0x100	/* if with PLOGI */
 #define	PLOGX_FLG_FCP2_OVERRIDE		0x200	/* if with PRLOG, PRLI */
 
 /*
  * Report ID Acquisistion (24XX multi-id firmware)
  */
 typedef struct {
 	isphdr_t	ridacq_hdr;
 	uint32_t	ridacq_handle;
-	union {
-		struct {
-			uint8_t		ridacq_vp_acquired;
-			uint8_t		ridacq_vp_setup;
-			uint16_t	ridacq_reserved0;
-		} type0;	/* type 0 */
-		struct {
-			uint16_t	ridacq_vp_count;
-			uint8_t		ridacq_vp_index;
-			uint8_t		ridacq_vp_status;
-		} type1;	/* type 1 */
-	} un;
+	uint8_t		ridacq_vp_acquired;
+	uint8_t		ridacq_vp_setup;
+	uint8_t		ridacq_vp_index;
+	uint8_t		ridacq_vp_status;
 	uint16_t	ridacq_vp_port_lo;
 	uint8_t		ridacq_vp_port_hi;
 	uint8_t		ridacq_format;		/* 0 or 1 */
 	uint16_t	ridacq_map[8];
 	uint8_t		ridacq_reserved1[32];
 } isp_ridacq_t;
 
 #define	RIDACQ_STS_COMPLETE	0
 #define	RIDACQ_STS_UNACQUIRED	1
-#define	RIDACQ_STS_CHANGED	20
-
+#define	RIDACQ_STS_CHANGED	2
+#define	RIDACQ_STS_SNS_TIMEOUT	3
+#define	RIDACQ_STS_SNS_REJECTED	4
+#define	RIDACQ_STS_SCR_TIMEOUT	5
+#define	RIDACQ_STS_SCR_REJECTED	6
 
 /*
  * Simple Name Server Data Structures
  */
 #define	SNS_GA_NXT	0x100
 #define	SNS_GPN_ID	0x112
 #define	SNS_GNN_ID	0x113
 #define	SNS_GFF_ID	0x11F
 #define	SNS_GID_FT	0x171
 #define	SNS_RFT_ID	0x217
 typedef struct {
 	uint16_t	snscb_rblen;	/* response buffer length (words) */
 	uint16_t	snscb_reserved0;
 	uint16_t	snscb_addr[4];	/* response buffer address */
 	uint16_t	snscb_sblen;	/* subcommand buffer length (words) */
 	uint16_t	snscb_reserved1;
 	uint16_t	snscb_data[];	/* variable data */
 } sns_screq_t;	/* Subcommand Request Structure */
 
 typedef struct {
 	uint16_t	snscb_rblen;	/* response buffer length (words) */
 	uint16_t	snscb_reserved0;
 	uint16_t	snscb_addr[4];	/* response buffer address */
 	uint16_t	snscb_sblen;	/* subcommand buffer length (words) */
 	uint16_t	snscb_reserved1;
 	uint16_t	snscb_cmd;
 	uint16_t	snscb_reserved2;
 	uint32_t	snscb_reserved3;
 	uint32_t	snscb_port;
 } sns_ga_nxt_req_t;
 #define	SNS_GA_NXT_REQ_SIZE	(sizeof (sns_ga_nxt_req_t))
 
 typedef struct {
 	uint16_t	snscb_rblen;	/* response buffer length (words) */
 	uint16_t	snscb_reserved0;
 	uint16_t	snscb_addr[4];	/* response buffer address */
 	uint16_t	snscb_sblen;	/* subcommand buffer length (words) */
 	uint16_t	snscb_reserved1;
 	uint16_t	snscb_cmd;
 	uint16_t	snscb_reserved2;
 	uint32_t	snscb_reserved3;
 	uint32_t	snscb_portid;
 } sns_gxn_id_req_t;
 #define	SNS_GXN_ID_REQ_SIZE	(sizeof (sns_gxn_id_req_t))
 
 typedef struct {
 	uint16_t	snscb_rblen;	/* response buffer length (words) */
 	uint16_t	snscb_reserved0;
 	uint16_t	snscb_addr[4];	/* response buffer address */
 	uint16_t	snscb_sblen;	/* subcommand buffer length (words) */
 	uint16_t	snscb_reserved1;
 	uint16_t	snscb_cmd;
 	uint16_t	snscb_mword_div_2;
 	uint32_t	snscb_reserved3;
 	uint32_t	snscb_fc4_type;
 } sns_gid_ft_req_t;
 #define	SNS_GID_FT_REQ_SIZE	(sizeof (sns_gid_ft_req_t))
 
 typedef struct {
 	uint16_t	snscb_rblen;	/* response buffer length (words) */
 	uint16_t	snscb_reserved0;
 	uint16_t	snscb_addr[4];	/* response buffer address */
 	uint16_t	snscb_sblen;	/* subcommand buffer length (words) */
 	uint16_t	snscb_reserved1;
 	uint16_t	snscb_cmd;
 	uint16_t	snscb_reserved2;
 	uint32_t	snscb_reserved3;
 	uint32_t	snscb_port;
 	uint32_t	snscb_fc4_types[8];
 } sns_rft_id_req_t;
 #define	SNS_RFT_ID_REQ_SIZE	(sizeof (sns_rft_id_req_t))
 
 typedef struct {
 	ct_hdr_t	snscb_cthdr;
 	uint8_t		snscb_port_type;
 	uint8_t		snscb_port_id[3];
 	uint8_t		snscb_portname[8];
 	uint16_t	snscb_data[];	/* variable data */
 } sns_scrsp_t;	/* Subcommand Response Structure */
 
 typedef struct {
 	ct_hdr_t	snscb_cthdr;
 	uint8_t		snscb_port_type;
 	uint8_t		snscb_port_id[3];
 	uint8_t		snscb_portname[8];
 	uint8_t		snscb_pnlen;		/* symbolic port name length */
 	uint8_t		snscb_pname[255];	/* symbolic port name */
 	uint8_t		snscb_nodename[8];
 	uint8_t		snscb_nnlen;		/* symbolic node name length */
 	uint8_t		snscb_nname[255];	/* symbolic node name */
 	uint8_t		snscb_ipassoc[8];
 	uint8_t		snscb_ipaddr[16];
 	uint8_t		snscb_svc_class[4];
 	uint8_t		snscb_fc4_types[32];
 	uint8_t		snscb_fpname[8];
 	uint8_t		snscb_reserved;
 	uint8_t		snscb_hardaddr[3];
 } sns_ga_nxt_rsp_t;	/* Subcommand Response Structure */
 #define	SNS_GA_NXT_RESP_SIZE	(sizeof (sns_ga_nxt_rsp_t))
 
 typedef struct {
 	ct_hdr_t	snscb_cthdr;
 	uint8_t		snscb_wwn[8];
 } sns_gxn_id_rsp_t;
 #define	SNS_GXN_ID_RESP_SIZE	(sizeof (sns_gxn_id_rsp_t))
 
 typedef struct {
 	ct_hdr_t	snscb_cthdr;
 	uint32_t	snscb_fc4_features[32];
 } sns_gff_id_rsp_t;
 #define	SNS_GFF_ID_RESP_SIZE	(sizeof (sns_gff_id_rsp_t))
 
 typedef struct {
 	ct_hdr_t	snscb_cthdr;
 	struct {
 		uint8_t		control;
 		uint8_t		portid[3];
 	} snscb_ports[1];
 } sns_gid_ft_rsp_t;
 #define	SNS_GID_FT_RESP_SIZE(x)	((sizeof (sns_gid_ft_rsp_t)) + ((x - 1) << 2))
 #define	SNS_RFT_ID_RESP_SIZE	(sizeof (ct_hdr_t))
 
 /*
  * Other Misc Structures
  */
 
 /* ELS Pass Through */
 typedef struct {
 	isphdr_t	els_hdr;
 	uint32_t	els_handle;
 	uint16_t	els_status;
 	uint16_t	els_nphdl;
 	uint16_t	els_xmit_dsd_count;	/* outgoing only */
 	uint8_t		els_vphdl;
 	uint8_t		els_sof;
 	uint32_t	els_rxid;
 	uint16_t	els_recv_dsd_count;	/* outgoing only */
 	uint8_t		els_opcode;
 	uint8_t		els_reserved1;
 	uint8_t		els_did_lo;
 	uint8_t		els_did_mid;
 	uint8_t		els_did_hi;
 	uint8_t		els_reserved2;
 	uint16_t	els_reserved3;
 	uint16_t	els_ctl_flags;
 	union {
 		struct {
 			uint32_t	_els_bytecnt;
 			uint32_t	_els_subcode1;
 			uint32_t	_els_subcode2;
 			uint8_t		_els_reserved4[20];
 		} in;
 		struct {
 			uint32_t	_els_recv_bytecnt;
 			uint32_t	_els_xmit_bytecnt;
 			uint32_t	_els_xmit_dsd_length;
 			uint16_t	_els_xmit_dsd_a1500;
 			uint16_t	_els_xmit_dsd_a3116;
 			uint16_t	_els_xmit_dsd_a4732;
 			uint16_t	_els_xmit_dsd_a6348;
 			uint32_t	_els_recv_dsd_length;
 			uint16_t	_els_recv_dsd_a1500;
 			uint16_t	_els_recv_dsd_a3116;
 			uint16_t	_els_recv_dsd_a4732;
 			uint16_t	_els_recv_dsd_a6348;
 		} out;
 	} inout;
 #define	els_bytecnt		inout.in._els_bytecnt
 #define	els_subcode1		inout.in._els_subcode1
 #define	els_subcode2		inout.in._els_subcode2
 #define	els_reserved4		inout.in._els_reserved4
 #define	els_recv_bytecnt	inout.out._els_recv_bytecnt
 #define	els_xmit_bytecnt	inout.out._els_xmit_bytecnt
 #define	els_xmit_dsd_length	inout.out._els_xmit_dsd_length
 #define	els_xmit_dsd_a1500	inout.out._els_xmit_dsd_a1500
 #define	els_xmit_dsd_a3116	inout.out._els_xmit_dsd_a3116
 #define	els_xmit_dsd_a4732	inout.out._els_xmit_dsd_a4732
 #define	els_xmit_dsd_a6348	inout.out._els_xmit_dsd_a6348
 #define	els_recv_dsd_length	inout.out._els_recv_dsd_length
 #define	els_recv_dsd_a1500	inout.out._els_recv_dsd_a1500
 #define	els_recv_dsd_a3116	inout.out._els_recv_dsd_a3116
 #define	els_recv_dsd_a4732	inout.out._els_recv_dsd_a4732
 #define	els_recv_dsd_a6348	inout.out._els_recv_dsd_a6348
 } els_t;
 
 /*
  * A handy package structure for running FC-SCSI commands internally
  */
 typedef struct {
 	uint16_t	handle;
 	uint16_t	lun;
 	uint32_t	
 		channel : 8,
 		portid	: 24;
 	uint32_t	timeout;
 	union {
 		struct {
 			uint32_t data_length;
 			uint32_t 
 				no_wait : 1,
 				do_read : 1;
 			uint8_t cdb[16];
 			void *data_ptr;
 		} beg;
 		struct {
 			uint32_t data_residual;
 			uint8_t status;
 			uint8_t pad;
 			uint16_t sense_length;
 			uint8_t sense_data[32];
 		} end;
 	} fcd;
 } isp_xcmd_t;
 
 /*
  * Target Mode related definitions
  */
 #define	QLTM_SENSELEN	18	/* non-FC cards only */
 #define QLTM_SVALID	0x80
 
 /*
  * Structure for Enable Lun and Modify Lun queue entries
  */
 typedef struct {
 	isphdr_t	le_header;
 	uint32_t	le_reserved;
 	uint8_t		le_lun;
 	uint8_t		le_rsvd;
 	uint8_t		le_ops;		/* Modify LUN only */
 	uint8_t		le_tgt;		/* Not for FC */
 	uint32_t	le_flags;	/* Not for FC */
 	uint8_t		le_status;
 	uint8_t		le_reserved2;
 	uint8_t		le_cmd_count;
 	uint8_t		le_in_count;
 	uint8_t		le_cdb6len;	/* Not for FC */
 	uint8_t		le_cdb7len;	/* Not for FC */
 	uint16_t	le_timeout;
 	uint16_t	le_reserved3[20];
 } lun_entry_t;
 
 /*
  * le_flags values
  */
 #define LUN_TQAE	0x00000002	/* bit1  Tagged Queue Action Enable */
 #define LUN_DSSM	0x01000000	/* bit24 Disable Sending SDP Message */
 #define	LUN_DISAD	0x02000000	/* bit25 Disable autodisconnect */
 #define LUN_DM		0x40000000	/* bit30 Disconnects Mandatory */
 
 /*
  * le_ops values
  */
 #define LUN_CCINCR	0x01	/* increment command count */
 #define LUN_CCDECR	0x02	/* decrement command count */
 #define LUN_ININCR	0x40	/* increment immed. notify count */
 #define LUN_INDECR	0x80	/* decrement immed. notify count */
 
 /*
  * le_status values
  */
 #define	LUN_OK		0x01	/* we be rockin' */
 #define LUN_ERR		0x04	/* request completed with error */
 #define LUN_INVAL	0x06	/* invalid request */
 #define LUN_NOCAP	0x16	/* can't provide requested capability */
 #define LUN_ENABLED	0x3E	/* LUN already enabled */
 
 /*
  * Immediate Notify Entry structure
  */
 #define IN_MSGLEN	8	/* 8 bytes */
 #define IN_RSVDLEN	8	/* 8 words */
 typedef struct {
 	isphdr_t	in_header;
 	uint32_t	in_reserved;
 	uint8_t		in_lun;		/* lun */
 	uint8_t		in_iid;		/* initiator */
 	uint8_t		in_reserved2;
 	uint8_t		in_tgt;		/* target */
 	uint32_t	in_flags;
 	uint8_t		in_status;
 	uint8_t		in_rsvd2;
 	uint8_t		in_tag_val;	/* tag value */
 	uint8_t		in_tag_type;	/* tag type */
 	uint16_t	in_seqid;	/* sequence id */
 	uint8_t		in_msg[IN_MSGLEN];	/* SCSI message bytes */
 	uint16_t	in_reserved3[IN_RSVDLEN];
 	uint8_t		in_sense[QLTM_SENSELEN];/* suggested sense data */
 } in_entry_t;
 
 typedef struct {
 	isphdr_t	in_header;
 	uint32_t	in_reserved;
 	uint8_t		in_lun;		/* lun */
 	uint8_t		in_iid;		/* initiator */
 	uint16_t	in_scclun;
 	uint32_t	in_reserved2;
 	uint16_t	in_status;
 	uint16_t	in_task_flags;
 	uint16_t	in_seqid;	/* sequence id */
 } in_fcentry_t;
 
 typedef struct {
 	isphdr_t	in_header;
 	uint32_t	in_reserved;
 	uint16_t	in_iid;		/* initiator */
 	uint16_t	in_scclun;
 	uint32_t	in_reserved2;
 	uint16_t	in_status;
 	uint16_t	in_task_flags;
 	uint16_t	in_seqid;	/* sequence id */
 } in_fcentry_e_t;
 
 /*
  * Values for the in_status field
  */
 #define	IN_REJECT	0x0D	/* Message Reject message received */
 #define IN_RESET	0x0E	/* Bus Reset occurred */
 #define IN_NO_RCAP	0x16	/* requested capability not available */
 #define IN_IDE_RECEIVED	0x33	/* Initiator Detected Error msg received */
 #define IN_RSRC_UNAVAIL	0x34	/* resource unavailable */
 #define IN_MSG_RECEIVED	0x36	/* SCSI message received */
 #define	IN_ABORT_TASK	0x20	/* task named in RX_ID is being aborted (FC) */
 #define	IN_PORT_LOGOUT	0x29	/* port has logged out (FC) */
 #define	IN_PORT_CHANGED	0x2A	/* port changed */
 #define	IN_GLOBAL_LOGO	0x2E	/* all ports logged out */
 #define	IN_NO_NEXUS	0x3B	/* Nexus not established */
 #define	IN_SRR_RCVD	0x45	/* SRR received */
 
 /*
  * Values for the in_task_flags field- should only get one at a time!
  */
 #define	TASK_FLAGS_RESERVED_MASK	(0xe700)
 #define	TASK_FLAGS_CLEAR_ACA		(1<<14)
 #define	TASK_FLAGS_TARGET_RESET		(1<<13)
 #define	TASK_FLAGS_LUN_RESET		(1<<12)
 #define	TASK_FLAGS_CLEAR_TASK_SET	(1<<10)
 #define	TASK_FLAGS_ABORT_TASK_SET	(1<<9)
 
 /*
  * ISP24XX Immediate Notify
  */
 typedef struct {
 	isphdr_t	in_header;
 	uint32_t	in_reserved;
 	uint16_t	in_nphdl;
 	uint16_t	in_reserved1;
 	uint16_t	in_flags;
 	uint16_t	in_srr_rxid;
 	uint16_t	in_status;
 	uint8_t		in_status_subcode;
 	uint8_t		in_fwhandle;
 	uint32_t	in_rxid;
 	uint16_t	in_srr_reloff_lo;
 	uint16_t	in_srr_reloff_hi;
 	uint16_t	in_srr_iu;
 	uint16_t	in_srr_oxid;
 	/*
 	 * If bit 2 is set in in_flags, the N-Port and
 	 * handle tags are valid. If the received ELS is
 	 * a LOGO, then these tags contain the N Port ID
 	 * from the LOGO payload. If the received ELS
 	 * request is TPRLO, these tags contain the
 	 * Third Party Originator N Port ID.
 	 */
 	uint16_t	in_nport_id_hi;
 #define	in_prli_options in_nport_id_hi
 	uint8_t		in_nport_id_lo;
 	uint8_t		in_reserved3;
 	uint16_t	in_np_handle;
 	uint8_t		in_reserved4[12];
 	uint8_t		in_reserved5;
 	uint8_t		in_vpidx;
 	uint32_t	in_reserved6;
 	uint16_t	in_portid_lo;
 	uint8_t		in_portid_hi;
 	uint8_t		in_reserved7;
 	uint16_t	in_reserved8;
 	uint16_t	in_oxid;
 } in_fcentry_24xx_t;
 
 #define	IN24XX_FLAG_PUREX_IOCB		0x1
 #define	IN24XX_FLAG_GLOBAL_LOGOUT	0x2
 #define	IN24XX_FLAG_NPHDL_VALID		0x4
 #define	IN24XX_FLAG_N2N_PRLI		0x8
 #define	IN24XX_FLAG_PN_NN_VALID		0x10
 
 #define	IN24XX_LIP_RESET	0x0E
 #define	IN24XX_LINK_RESET	0x0F
 #define	IN24XX_PORT_LOGOUT	0x29
 #define	IN24XX_PORT_CHANGED	0x2A
 #define	IN24XX_LINK_FAILED	0x2E
 #define	IN24XX_SRR_RCVD		0x45
 #define	IN24XX_ELS_RCVD		0x46	/*
 					 * login-affectin ELS received- check
 					 * subcode for specific opcode
 					 */
 
 /*
  * For f/w > 4.0.25, these offsets in the Immediate Notify contain
  * the WWNN/WWPN if the ELS is PLOGI, PDISC or ADISC. The WWN is in
  * Big Endian format.
  */
 #define	IN24XX_PRLI_WWNN_OFF	0x18
 #define	IN24XX_PRLI_WWPN_OFF	0x28
 #define	IN24XX_PLOGI_WWNN_OFF	0x20
 #define	IN24XX_PLOGI_WWPN_OFF	0x28
 
 /*
  * For f/w > 4.0.25, this offset in the Immediate Notify contain
  * the WWPN if the ELS is LOGO. The WWN is in Big Endian format.
  */
 #define	IN24XX_LOGO_WWPN_OFF	0x28
 
 /*
  * Immediate Notify Status Subcodes for IN24XX_PORT_LOGOUT
  */
 #define	IN24XX_PORT_LOGOUT_PDISC_TMO	0x00
 #define	IN24XX_PORT_LOGOUT_UXPR_DISC	0x01
 #define	IN24XX_PORT_LOGOUT_OWN_OPN	0x02
 #define	IN24XX_PORT_LOGOUT_OWN_OPN_SFT	0x03
 #define	IN24XX_PORT_LOGOUT_ABTS_TMO	0x04
 #define	IN24XX_PORT_LOGOUT_DISC_RJT	0x05
 #define	IN24XX_PORT_LOGOUT_LOGIN_NEEDED	0x06
 #define	IN24XX_PORT_LOGOUT_BAD_DISC	0x07
 #define	IN24XX_PORT_LOGOUT_LOST_ALPA	0x08
 #define	IN24XX_PORT_LOGOUT_XMIT_FAILURE	0x09
 
 /*
  * Immediate Notify Status Subcodes for IN24XX_PORT_CHANGED
  */
 #define	IN24XX_PORT_CHANGED_BADFAN	0x00
 #define	IN24XX_PORT_CHANGED_TOPO_CHANGE	0x01
 #define	IN24XX_PORT_CHANGED_FLOGI_ACC	0x02
 #define	IN24XX_PORT_CHANGED_FLOGI_RJT	0x03
 #define	IN24XX_PORT_CHANGED_TIMEOUT	0x04
 #define	IN24XX_PORT_CHANGED_PORT_CHANGE	0x05
 
 /*
  * Notify Acknowledge Entry structure
  */
 #define NA_RSVDLEN	22
 typedef struct {
 	isphdr_t	na_header;
 	uint32_t	na_reserved;
 	uint8_t		na_lun;		/* lun */
 	uint8_t		na_iid;		/* initiator */
 	uint8_t		na_reserved2;
 	uint8_t		na_tgt;		/* target */
 	uint32_t	na_flags;
 	uint8_t		na_status;
 	uint8_t		na_event;
 	uint16_t	na_seqid;	/* sequence id */
 	uint16_t	na_reserved3[NA_RSVDLEN];
 } na_entry_t;
 
 /*
  * Value for the na_event field
  */
 #define NA_RST_CLRD	0x80	/* Clear an async event notification */
 #define	NA_OK		0x01	/* Notify Acknowledge Succeeded */
 #define	NA_INVALID	0x06	/* Invalid Notify Acknowledge */
 
 #define	NA2_RSVDLEN	21
 typedef struct {
 	isphdr_t	na_header;
 	uint32_t	na_reserved;
 	uint8_t		na_reserved1;
 	uint8_t		na_iid;		/* initiator loop id */
 	uint16_t	na_response;
 	uint16_t	na_flags;
 	uint16_t	na_reserved2;
 	uint16_t	na_status;
 	uint16_t	na_task_flags;
 	uint16_t	na_seqid;	/* sequence id */
 	uint16_t	na_reserved3[NA2_RSVDLEN];
 } na_fcentry_t;
 
 typedef struct {
 	isphdr_t	na_header;
 	uint32_t	na_reserved;
 	uint16_t	na_iid;		/* initiator loop id */
 	uint16_t	na_response;	/* response code */
 	uint16_t	na_flags;
 	uint16_t	na_reserved2;
 	uint16_t	na_status;
 	uint16_t	na_task_flags;
 	uint16_t	na_seqid;	/* sequence id */
 	uint16_t	na_reserved3[NA2_RSVDLEN];
 } na_fcentry_e_t;
 
 #define	NAFC_RCOUNT	0x80	/* increment resource count */
 #define NAFC_RST_CLRD	0x20	/* Clear LIP Reset */
 #define	NAFC_TVALID	0x10	/* task mangement response code is valid */
 
 /*
  * ISP24XX Notify Acknowledge
  */
 
 typedef struct {
 	isphdr_t	na_header;
 	uint32_t	na_handle;
 	uint16_t	na_nphdl;
 	uint16_t	na_reserved1;
 	uint16_t	na_flags;
 	uint16_t	na_srr_rxid;
 	uint16_t	na_status;
 	uint8_t		na_status_subcode;
 	uint8_t		na_fwhandle;
 	uint32_t	na_rxid;
 	uint16_t	na_srr_reloff_lo;
 	uint16_t	na_srr_reloff_hi;
 	uint16_t	na_srr_iu;
 	uint16_t	na_srr_flags;
 	uint8_t		na_reserved3[18];
 	uint8_t		na_reserved4;
 	uint8_t		na_vpidx;
 	uint8_t		na_srr_reject_vunique;
 	uint8_t		na_srr_reject_explanation;
 	uint8_t		na_srr_reject_code;
 	uint8_t		na_reserved5;
 	uint8_t		na_reserved6[6];
 	uint16_t	na_oxid;
 } na_fcentry_24xx_t;
 
 /*
  * Accept Target I/O Entry structure
  */
 #define ATIO_CDBLEN	26
 
 typedef struct {
 	isphdr_t	at_header;
 	uint16_t	at_reserved;
 	uint16_t	at_handle;
 	uint8_t		at_lun;		/* lun */
 	uint8_t		at_iid;		/* initiator */
 	uint8_t		at_cdblen; 	/* cdb length */
 	uint8_t		at_tgt;		/* target */
 	uint32_t	at_flags;
 	uint8_t		at_status;	/* firmware status */
 	uint8_t		at_scsi_status;	/* scsi status */
 	uint8_t		at_tag_val;	/* tag value */
 	uint8_t		at_tag_type;	/* tag type */
 	uint8_t		at_cdb[ATIO_CDBLEN];	/* received CDB */
 	uint8_t		at_sense[QLTM_SENSELEN];/* suggested sense data */
 } at_entry_t;
 
 /*
  * at_flags values
  */
 #define AT_NODISC	0x00008000	/* disconnect disabled */
 #define AT_TQAE		0x00000002	/* Tagged Queue Action enabled */
 
 /*
  * at_status values
  */
 #define AT_PATH_INVALID	0x07	/* ATIO sent to firmware for disabled lun */
 #define	AT_RESET	0x0E	/* SCSI Bus Reset Occurred */
 #define AT_PHASE_ERROR	0x14	/* Bus phase sequence error */
 #define AT_NOCAP	0x16	/* Requested capability not available */
 #define AT_BDR_MSG	0x17	/* Bus Device Reset msg received */
 #define AT_CDB		0x3D	/* CDB received */
 /*
  * Macros to create and fetch and test concatenated handle and tag value macros
  * (SPI only)
  */
 #define	AT_MAKE_TAGID(tid, aep)						\
 	tid = aep->at_handle;						\
 	if (aep->at_flags & AT_TQAE) {					\
 		tid |= (aep->at_tag_val << 16);				\
 		tid |= (1 << 24);					\
 	}
 
 #define	CT_MAKE_TAGID(tid, ct)						\
 	tid = ct->ct_fwhandle;						\
 	if (ct->ct_flags & CT_TQAE) {					\
 		tid |= (ct->ct_tag_val << 16);				\
 		tid |= (1 << 24);					\
 	}
 
 #define	AT_HAS_TAG(val)		((val) & (1 << 24))
 #define	AT_GET_TAG(val)		(((val) >> 16) & 0xff)
 #define	AT_GET_HANDLE(val)	((val) & 0xffff)
 
 #define	IN_MAKE_TAGID(tid, inp)						\
 	tid = inp->in_seqid;						\
 	tid |= (inp->in_tag_val << 16);					\
 	tid |= (1 << 24)
 
 /*
  * Accept Target I/O Entry structure, Type 2
  */
 #define ATIO2_CDBLEN	16
 
 typedef struct {
 	isphdr_t	at_header;
 	uint32_t	at_reserved;
 	uint8_t		at_lun;		/* lun or reserved */
 	uint8_t		at_iid;		/* initiator */
 	uint16_t	at_rxid; 	/* response ID */
 	uint16_t	at_flags;
 	uint16_t	at_status;	/* firmware status */
 	uint8_t		at_crn;		/* command reference number */
 	uint8_t		at_taskcodes;
 	uint8_t		at_taskflags;
 	uint8_t		at_execodes;
 	uint8_t		at_cdb[ATIO2_CDBLEN];	/* received CDB */
 	uint32_t	at_datalen;		/* allocated data len */
 	uint16_t	at_scclun;		/* SCC Lun or reserved */
 	uint16_t	at_wwpn[4];		/* WWPN of initiator */
 	uint16_t	at_reserved2[6];
 	uint16_t	at_oxid;
 } at2_entry_t;
 
 typedef struct {
 	isphdr_t	at_header;
 	uint32_t	at_reserved;
 	uint16_t	at_iid;		/* initiator */
 	uint16_t	at_rxid; 	/* response ID */
 	uint16_t	at_flags;
 	uint16_t	at_status;	/* firmware status */
 	uint8_t		at_crn;		/* command reference number */
 	uint8_t		at_taskcodes;
 	uint8_t		at_taskflags;
 	uint8_t		at_execodes;
 	uint8_t		at_cdb[ATIO2_CDBLEN];	/* received CDB */
 	uint32_t	at_datalen;		/* allocated data len */
 	uint16_t	at_scclun;		/* SCC Lun or reserved */
 	uint16_t	at_wwpn[4];		/* WWPN of initiator */
 	uint16_t	at_reserved2[6];
 	uint16_t	at_oxid;
 } at2e_entry_t;
 
 #define	ATIO2_WWPN_OFFSET	0x2A
 #define	ATIO2_OXID_OFFSET	0x3E
 
 #define	ATIO2_TC_ATTR_MASK	0x7
 #define	ATIO2_TC_ATTR_SIMPLEQ	0
 #define	ATIO2_TC_ATTR_HEADOFQ	1
 #define	ATIO2_TC_ATTR_ORDERED	2
 #define	ATIO2_TC_ATTR_ACAQ	4
 #define	ATIO2_TC_ATTR_UNTAGGED	5
 
 #define	ATIO2_EX_WRITE		0x1
 #define	ATIO2_EX_READ		0x2
 /*
  * Macros to create and fetch and test concatenated handle and tag value macros
  */
 #define	AT2_MAKE_TAGID(tid, bus, inst, aep)				\
 	tid = aep->at_rxid;						\
 	tid |= (((uint64_t)inst) << 32);				\
 	tid |= (((uint64_t)bus) << 48)
 
 #define	CT2_MAKE_TAGID(tid, bus, inst, ct)				\
 	tid = ct->ct_rxid;						\
 	tid |= (((uint64_t)inst) << 32);				\
 	tid |= (((uint64_t)(bus & 0xff)) << 48)
 
 #define	AT2_HAS_TAG(val)	1
 #define	AT2_GET_TAG(val)	((val) & 0xffffffff)
 #define	AT2_GET_INST(val)	(((val) >> 32) & 0xffff)
 #define	AT2_GET_HANDLE		AT2_GET_TAG
 #define	AT2_GET_BUS(val)	(((val) >> 48) & 0xff)
 
 #define	FC_HAS_TAG	AT2_HAS_TAG
 #define	FC_GET_TAG	AT2_GET_TAG
 #define	FC_GET_INST	AT2_GET_INST
 #define	FC_GET_HANDLE	AT2_GET_HANDLE
 
 #define	IN_FC_MAKE_TAGID(tid, bus, inst, seqid)				\
 	tid = seqid;							\
 	tid |= (((uint64_t)inst) << 32);				\
 	tid |= (((uint64_t)(bus & 0xff)) << 48)
 
 #define	FC_TAG_INSERT_INST(tid, inst)					\
 	tid &= ~0x0000ffff00000000ull;					\
 	tid |= (((uint64_t)inst) << 32)
 
 /*
  * 24XX ATIO Definition
  *
  * This is *quite* different from other entry types.
  * First of all, it has its own queue it comes in on.
  *
  * Secondly, it doesn't have a normal header.
  *
  * Thirdly, it's just a passthru of the FCP CMND IU
  * which is recorded in big endian mode.
  */
 typedef struct {
 	uint8_t		at_type;
 	uint8_t		at_count;
 	/*
 	 * Task attribute in high four bits,
 	 * the rest is the FCP CMND IU Length.
 	 * NB: the command can extend past the
 	 * length for a single queue entry.
 	 */
 	uint16_t	at_ta_len;
 	uint32_t	at_rxid;
 	fc_hdr_t	at_hdr;
 	fcp_cmnd_iu_t	at_cmnd;
 } at7_entry_t;
 #define	AT7_NORESRC_RXID	0xffffffff
 
 
 /*
  * Continue Target I/O Entry structure
  * Request from driver. The response from the
  * ISP firmware is the same except that the last 18
  * bytes are overwritten by suggested sense data if
  * the 'autosense valid' bit is set in the status byte.
  */
 typedef struct {
 	isphdr_t	ct_header;
 	uint16_t	ct_syshandle;
 	uint16_t	ct_fwhandle;	/* required by f/w */
 	uint8_t		ct_lun;	/* lun */
 	uint8_t		ct_iid;	/* initiator id */
 	uint8_t		ct_reserved2;
 	uint8_t		ct_tgt;	/* our target id */
 	uint32_t	ct_flags;
 	uint8_t 	ct_status;	/* isp status */
 	uint8_t 	ct_scsi_status;	/* scsi status */
 	uint8_t 	ct_tag_val;	/* tag value */
 	uint8_t 	ct_tag_type;	/* tag type */
 	uint32_t	ct_xfrlen;	/* transfer length */
 	uint32_t	ct_resid;	/* residual length */
 	uint16_t	ct_timeout;
 	uint16_t	ct_seg_count;
 	ispds_t		ct_dataseg[ISP_RQDSEG];
 } ct_entry_t;
 
 /*
  * For some of the dual port SCSI adapters, port (bus #) is reported
  * in the MSbit of ct_iid. Bit fields are a bit too awkward here.
  *
  * Note that this does not apply to FC adapters at all which can and
  * do report IIDs between 0x81 && 0xfe (or 0x7ff) which represent devices
  * that have logged in across a SCSI fabric.
  */
 #define	GET_IID_VAL(x)		(x & 0x3f)
 #define	GET_BUS_VAL(x)		((x >> 7) & 0x1)
 #define	SET_IID_VAL(y, x)	y = ((y & ~0x3f) | (x & 0x3f))
 #define	SET_BUS_VAL(y, x)	y = ((y & 0x3f) | ((x & 0x1) << 7))
 
 /*
  * ct_flags values
  */
 #define CT_TQAE		0x00000002	/* bit  1, Tagged Queue Action enable */
 #define CT_DATA_IN	0x00000040	/* bits 6&7, Data direction - *to* initiator */
 #define CT_DATA_OUT	0x00000080	/* bits 6&7, Data direction - *from* initiator */
 #define CT_NO_DATA	0x000000C0	/* bits 6&7, Data direction */
 #define	CT_CCINCR	0x00000100	/* bit 8, autoincrement atio count */
 #define CT_DATAMASK	0x000000C0	/* bits 6&7, Data direction */
 #define	CT_INISYNCWIDE	0x00004000	/* bit 14, Do Sync/Wide Negotiation */
 #define CT_NODISC	0x00008000	/* bit 15, Disconnects disabled */
 #define CT_DSDP		0x01000000	/* bit 24, Disable Save Data Pointers */
 #define CT_SENDRDP	0x04000000	/* bit 26, Send Restore Pointers msg */
 #define CT_SENDSTATUS	0x80000000	/* bit 31, Send SCSI status byte */
 
 /*
  * ct_status values
  * - set by the firmware when it returns the CTIO
  */
 #define CT_OK		0x01	/* completed without error */
 #define CT_ABORTED	0x02	/* aborted by host */
 #define CT_ERR		0x04	/* see sense data for error */
 #define CT_INVAL	0x06	/* request for disabled lun */
 #define CT_NOPATH	0x07	/* invalid ITL nexus */
 #define	CT_INVRXID	0x08	/* (FC only) Invalid RX_ID */
 #define	CT_DATA_OVER	0x09	/* (FC only) Data Overrun */
 #define CT_RSELTMO	0x0A	/* reselection timeout after 2 tries */
 #define CT_TIMEOUT	0x0B	/* timed out */
 #define CT_RESET	0x0E	/* SCSI Bus Reset occurred */
 #define	CT_PARITY	0x0F	/* Uncorrectable Parity Error */
 #define	CT_BUS_ERROR	0x10	/* (FC Only) DMA PCI Error */
 #define	CT_PANIC	0x13	/* Unrecoverable Error */
 #define CT_PHASE_ERROR	0x14	/* Bus phase sequence error */
 #define	CT_DATA_UNDER	0x15	/* (FC only) Data Underrun */
 #define CT_BDR_MSG	0x17	/* Bus Device Reset msg received */
 #define CT_TERMINATED	0x19	/* due to Terminate Transfer mbox cmd */
 #define	CT_PORTUNAVAIL	0x28	/* port not available */
 #define	CT_LOGOUT	0x29	/* port logout */
 #define	CT_PORTCHANGED	0x2A	/* port changed */
 #define	CT_IDE		0x33	/* Initiator Detected Error */
 #define CT_NOACK	0x35	/* Outstanding Immed. Notify. entry */
 #define	CT_SRR		0x45	/* SRR Received */
 #define	CT_LUN_RESET	0x48	/* Lun Reset Received */
 
 #define	CT_HBA_RESET	0xffff	/* pseudo error - command destroyed by HBA reset*/
 
 /*
  * When the firmware returns a CTIO entry, it may overwrite the last
  * part of the structure with sense data. This starts at offset 0x2E
  * into the entry, which is in the middle of ct_dataseg[1]. Rather
  * than define a new struct for this, I'm just using the sense data
  * offset.
  */
 #define CTIO_SENSE_OFFSET	0x2E
 
 /*
  * Entry length in u_longs. All entries are the same size so
  * any one will do as the numerator.
  */
 #define UINT32_ENTRY_SIZE	(sizeof(at_entry_t)/sizeof(uint32_t))
 
 /*
  * QLA2100 CTIO (type 2) entry
  */
 #define	MAXRESPLEN	26
 typedef struct {
 	isphdr_t	ct_header;
 	uint32_t	ct_syshandle;
 	uint8_t		ct_lun;		/* lun */
 	uint8_t		ct_iid;		/* initiator id */
 	uint16_t	ct_rxid;	/* response ID */
 	uint16_t	ct_flags;
 	uint16_t 	ct_status;	/* isp status */
 	uint16_t	ct_timeout;
 	uint16_t	ct_seg_count;
 	uint32_t	ct_reloff;	/* relative offset */
 	uint32_t	ct_resid;	/* residual length */
 	union {
 		/*
 		 * The three different modes that the target driver
 		 * can set the CTIO{2,3,4} up as.
 		 *
 		 * The first is for sending FCP_DATA_IUs as well as
 		 * (optionally) sending a terminal SCSI status FCP_RSP_IU.
 		 *
 		 * The second is for sending SCSI sense data in an FCP_RSP_IU.
 		 * Note that no FCP_DATA_IUs will be sent.
 		 *
 		 * The third is for sending FCP_RSP_IUs as built specifically
 		 * in system memory as located by the isp_dataseg.
 		 */
 		struct {
 			uint32_t _reserved;
 			uint16_t _reserved2;
 			uint16_t ct_scsi_status;
 			uint32_t ct_xfrlen;
 			union {
 				ispds_t ct_dataseg[ISP_RQDSEG_T2];
 				ispds64_t ct_dataseg64[ISP_RQDSEG_T3];
 				ispdslist_t ct_dslist;
 			} u;
 		} m0;
 		struct {
 			uint16_t _reserved;
 			uint16_t _reserved2;
 			uint16_t ct_senselen;
 			uint16_t ct_scsi_status;
 			uint16_t ct_resplen;
 			uint8_t  ct_resp[MAXRESPLEN];
 		} m1;
 		struct {
 			uint32_t _reserved;
 			uint16_t _reserved2;
 			uint16_t _reserved3;
 			uint32_t ct_datalen;
 			union {
 				ispds_t	ct_fcp_rsp_iudata_32;
 				ispds64_t ct_fcp_rsp_iudata_64;
 			} u;
 		} m2;
 	} rsp;
 } ct2_entry_t;
 
 typedef struct {
 	isphdr_t	ct_header;
 	uint32_t	ct_syshandle;
 	uint16_t	ct_iid;		/* initiator id */
 	uint16_t	ct_rxid;	/* response ID */
 	uint16_t	ct_flags;
 	uint16_t 	ct_status;	/* isp status */
 	uint16_t	ct_timeout;
 	uint16_t	ct_seg_count;
 	uint32_t	ct_reloff;	/* relative offset */
 	uint32_t	ct_resid;	/* residual length */
 	union {
 		struct {
 			uint32_t _reserved;
 			uint16_t _reserved2;
 			uint16_t ct_scsi_status;
 			uint32_t ct_xfrlen;
 			union {
 				ispds_t ct_dataseg[ISP_RQDSEG_T2];
 				ispds64_t ct_dataseg64[ISP_RQDSEG_T3];
 				ispdslist_t ct_dslist;
 			} u;
 		} m0;
 		struct {
 			uint16_t _reserved;
 			uint16_t _reserved2;
 			uint16_t ct_senselen;
 			uint16_t ct_scsi_status;
 			uint16_t ct_resplen;
 			uint8_t  ct_resp[MAXRESPLEN];
 		} m1;
 		struct {
 			uint32_t _reserved;
 			uint16_t _reserved2;
 			uint16_t _reserved3;
 			uint32_t ct_datalen;
 			union {
 				ispds_t	ct_fcp_rsp_iudata_32;
 				ispds64_t ct_fcp_rsp_iudata_64;
 			} u;
 		} m2;
 	} rsp;
 } ct2e_entry_t;
 
 /*
  * ct_flags values for CTIO2
  */
 #define	CT2_FLAG_MODE0	0x0000
 #define	CT2_FLAG_MODE1	0x0001
 #define	CT2_FLAG_MODE2	0x0002
 #define		CT2_FLAG_MMASK	0x0003
 #define CT2_DATA_IN	0x0040	/* *to* initiator */
 #define CT2_DATA_OUT	0x0080	/* *from* initiator */
 #define CT2_NO_DATA	0x00C0
 #define 	CT2_DATAMASK	0x00C0
 #define	CT2_CCINCR	0x0100
 #define	CT2_FASTPOST	0x0200
 #define	CT2_CONFIRM	0x2000
 #define	CT2_TERMINATE	0x4000
 #define CT2_SENDSTATUS	0x8000
 
 /*
  * ct_status values are (mostly) the same as that for ct_entry.
  */
 
 /*
  * ct_scsi_status values- the low 8 bits are the normal SCSI status
  * we know and love. The upper 8 bits are validity markers for FCP_RSP_IU
  * fields.
  */
 #define	CT2_RSPLEN_VALID	0x0100
 #define	CT2_SNSLEN_VALID	0x0200
 #define	CT2_DATA_OVER		0x0400
 #define	CT2_DATA_UNDER		0x0800
 
 /*
  * ISP24XX CTIO
  */
 #define	MAXRESPLEN_24XX	24
 typedef struct {
 	isphdr_t	ct_header;
 	uint32_t	ct_syshandle;
 	uint16_t	ct_nphdl;	/* status on returned CTIOs */
 	uint16_t	ct_timeout;
 	uint16_t	ct_seg_count;
 	uint8_t		ct_vpidx;
 	uint8_t		ct_xflags;
 	uint16_t	ct_iid_lo;	/* low 16 bits of portid */
 	uint8_t		ct_iid_hi;	/* hi 8 bits of portid */
 	uint8_t		ct_reserved;
 	uint32_t	ct_rxid;
 	uint16_t	ct_senselen;	/* mode 1 only */
 	uint16_t	ct_flags;
 	uint32_t	ct_resid;	/* residual length */
 	uint16_t	ct_oxid;
 	uint16_t	ct_scsi_status;	/* modes 0 && 1 only */
 	union {
 		struct {
 			uint32_t	reloff;
 			uint32_t	reserved0;
 			uint32_t	ct_xfrlen;
 			uint32_t	reserved1;
 			ispds64_t	ds;
 		} m0;
 		struct {
 			uint16_t ct_resplen;
 			uint16_t reserved;
 			uint8_t  ct_resp[MAXRESPLEN_24XX];
 		} m1;
 		struct {
 			uint32_t reserved0;
 			uint32_t reserved1;
 			uint32_t ct_datalen;
 			uint32_t reserved2;
 			ispds64_t ct_fcp_rsp_iudata;
 		} m2;
 	} rsp;
 } ct7_entry_t;
 
 /*
  * ct_flags values for CTIO7
  */
 #define CT7_NO_DATA	0x0000
 #define CT7_DATA_OUT	0x0001	/* *from* initiator */
 #define CT7_DATA_IN	0x0002	/* *to* initiator */
 #define 	CT7_DATAMASK	0x3
 #define	CT7_DSD_ENABLE	0x0004
 #define	CT7_CONF_STSFD	0x0010
 #define	CT7_EXPLCT_CONF	0x0020
 #define	CT7_FLAG_MODE0	0x0000
 #define	CT7_FLAG_MODE1	0x0040
 #define	CT7_FLAG_MODE2	0x0080
 #define		CT7_FLAG_MMASK	0x00C0
 #define	CT7_NOACK	    0x0100
 #define	CT7_TASK_ATTR_SHIFT	9
 #define	CT7_CONFIRM     0x2000
 #define	CT7_TERMINATE	0x4000
 #define CT7_SENDSTATUS	0x8000
 
 /*
  * Type 7 CTIO status codes
  */
 #define CT7_OK		0x01	/* completed without error */
 #define CT7_ABORTED	0x02	/* aborted by host */
 #define CT7_ERR		0x04	/* see sense data for error */
 #define CT7_INVAL	0x06	/* request for disabled lun */
 #define	CT7_INVRXID	0x08	/* Invalid RX_ID */
 #define	CT7_DATA_OVER	0x09	/* Data Overrun */
 #define CT7_TIMEOUT	0x0B	/* timed out */
 #define CT7_RESET	0x0E	/* LIP Rset Received */
 #define	CT7_BUS_ERROR	0x10	/* DMA PCI Error */
 #define	CT7_REASSY_ERR	0x11	/* DMA reassembly error */
 #define	CT7_DATA_UNDER	0x15	/* Data Underrun */
 #define	CT7_PORTUNAVAIL	0x28	/* port not available */
 #define	CT7_LOGOUT	0x29	/* port logout */
 #define	CT7_PORTCHANGED	0x2A	/* port changed */
 #define	CT7_SRR		0x45	/* SRR Received */
 
 /*
  * Other 24XX related target IOCBs
  */
 
 /*
  * ABTS Received
  */
 typedef struct {
 	isphdr_t	abts_header;
 	uint8_t		abts_reserved0[6];
 	uint16_t	abts_nphdl;
 	uint16_t	abts_reserved1;
 	uint16_t	abts_sof;
 	uint32_t	abts_rxid_abts;
 	uint16_t	abts_did_lo;
 	uint8_t		abts_did_hi;
 	uint8_t		abts_r_ctl;
 	uint16_t	abts_sid_lo;
 	uint8_t		abts_sid_hi;
 	uint8_t		abts_cs_ctl;
 	uint16_t	abts_fs_ctl;
 	uint8_t		abts_f_ctl;
 	uint8_t		abts_type;
 	uint16_t	abts_seq_cnt;
 	uint8_t		abts_df_ctl;
 	uint8_t		abts_seq_id;
 	uint16_t	abts_rx_id;
 	uint16_t	abts_ox_id;
 	uint32_t	abts_param;
 	uint8_t		abts_reserved2[16];
 	uint32_t	abts_rxid_task;
 } abts_t;
 
 typedef struct {
 	isphdr_t	abts_rsp_header;
 	uint32_t	abts_rsp_handle;
 	uint16_t	abts_rsp_status;
 	uint16_t	abts_rsp_nphdl;
 	uint16_t	abts_rsp_ctl_flags;
 	uint16_t	abts_rsp_sof;
 	uint32_t	abts_rsp_rxid_abts;
 	uint16_t	abts_rsp_did_lo;
 	uint8_t		abts_rsp_did_hi;
 	uint8_t		abts_rsp_r_ctl;
 	uint16_t	abts_rsp_sid_lo;
 	uint8_t		abts_rsp_sid_hi;
 	uint8_t		abts_rsp_cs_ctl;
 	uint16_t	abts_rsp_f_ctl_lo;
 	uint8_t		abts_rsp_f_ctl_hi;
 	uint8_t		abts_rsp_type;
 	uint16_t	abts_rsp_seq_cnt;
 	uint8_t		abts_rsp_df_ctl;
 	uint8_t		abts_rsp_seq_id;
 	uint16_t	abts_rsp_rx_id;
 	uint16_t	abts_rsp_ox_id;
 	uint32_t	abts_rsp_param;
 	union {
 		struct {
 			uint16_t reserved;
 			uint8_t	last_seq_id;
 			uint8_t seq_id_valid;
 			uint16_t aborted_rx_id;
 			uint16_t aborted_ox_id;
 			uint16_t high_seq_cnt;
 			uint16_t low_seq_cnt;
 			uint8_t reserved2[4];
 		} ba_acc;
 		struct {
 			uint8_t vendor_unique;
 			uint8_t	explanation;
 			uint8_t reason;
 			uint8_t reserved;
 			uint8_t reserved2[12];
 		} ba_rjt;
 		struct {
 			uint8_t reserved[8];
 			uint32_t subcode1;
 			uint32_t subcode2;
 		} rsp;
 		uint8_t reserved[16];
 	} abts_rsp_payload;
 	uint32_t	abts_rsp_rxid_task;
 } abts_rsp_t;
 
 /* terminate this ABTS exchange */
 #define	ISP24XX_ABTS_RSP_TERMINATE	0x01
 
 #define	ISP24XX_ABTS_RSP_COMPLETE	0x00
 #define	ISP24XX_ABTS_RSP_RESET		0x04
 #define	ISP24XX_ABTS_RSP_ABORTED	0x05
 #define	ISP24XX_ABTS_RSP_TIMEOUT	0x06
 #define	ISP24XX_ABTS_RSP_INVXID		0x08
 #define	ISP24XX_ABTS_RSP_LOGOUT		0x29
 #define	ISP24XX_ABTS_RSP_SUBCODE	0x31
 
 #define	ISP24XX_NO_TASK			0xffffffff
 
 /*
  * Miscellaneous
  *
  * These are the limits of the number of dma segments we
  * can deal with based not on the size of the segment counter
  * (which is 16 bits), but on the size of the number of 
  * queue entries field (which is 8 bits). We assume no
  * segments in the first queue entry, so we can either
  * have 7 dma segments per continuation entry or 5
  * (for 64 bit dma).. multiplying out by 254....
  */
 #define	ISP_NSEG_MAX	1778
 #define	ISP_NSEG64_MAX	1270
 
 #endif	/* _ISPMBOX_H */
Index: projects/powernv/dev/re/if_re.c
===================================================================
--- projects/powernv/dev/re/if_re.c	(revision 290990)
+++ projects/powernv/dev/re/if_re.c	(revision 290991)
@@ -1,4068 +1,4074 @@
 /*-
  * Copyright (c) 1997, 1998-2003
  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
  *
  * Written by Bill Paul <wpaul@windriver.com>
  * Senior Networking Software Engineer
  * Wind River Systems
  */
 
 /*
  * This driver is designed to support RealTek's next generation of
  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
  *
  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
  * with the older 8139 family, however it also supports a special
  * C+ mode of operation that provides several new performance enhancing
  * features. These include:
  *
  *	o Descriptor based DMA mechanism. Each descriptor represents
  *	  a single packet fragment. Data buffers may be aligned on
  *	  any byte boundary.
  *
  *	o 64-bit DMA
  *
  *	o TCP/IP checksum offload for both RX and TX
  *
  *	o High and normal priority transmit DMA rings
  *
  *	o VLAN tag insertion and extraction
  *
  *	o TCP large send (segmentation offload)
  *
  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
  * programming API is fairly straightforward. The RX filtering, EEPROM
  * access and PHY access is the same as it is on the older 8139 series
  * chips.
  *
  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
  * same programming API and feature set as the 8139C+ with the following
  * differences and additions:
  *
  *	o 1000Mbps mode
  *
  *	o Jumbo frames
  *
  *	o GMII and TBI ports/registers for interfacing with copper
  *	  or fiber PHYs
  *
  *	o RX and TX DMA rings can have up to 1024 descriptors
  *	  (the 8139C+ allows a maximum of 64)
  *
  *	o Slight differences in register layout from the 8139C+
  *
  * The TX start and timer interrupt registers are at different locations
  * on the 8169 than they are on the 8139C+. Also, the status word in the
  * RX descriptor has a slightly different bit layout. The 8169 does not
  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
  * copper gigE PHY.
  *
  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
  * (the 'S' stands for 'single-chip'). These devices have the same
  * programming API as the older 8169, but also have some vendor-specific
  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
  *
  * This driver takes advantage of the RX and TX checksum offload and
  * VLAN tag insertion/extraction features. It also implements TX
  * interrupt moderation using the timer interrupt registers, which
  * significantly reduces TX interrupt load. There is also support
  * for jumbo frames, however the 8169/8169S/8110S can not transmit
  * jumbo frames larger than 7440, so the max MTU possible with this
  * driver is 7422 bytes.
  */
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <net/bpf.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <dev/rl/if_rlreg.h>
 
 MODULE_DEPEND(re, pci, 1, 1, 1);
 MODULE_DEPEND(re, ether, 1, 1, 1);
 MODULE_DEPEND(re, miibus, 1, 1, 1);
 
 /* "device miibus" required.  See GENERIC if you get errors here. */
 #include "miibus_if.h"
 
 /* Tunables. */
 static int intr_filter = 0;
 TUNABLE_INT("hw.re.intr_filter", &intr_filter);
 static int msi_disable = 0;
 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
 static int msix_disable = 0;
 TUNABLE_INT("hw.re.msix_disable", &msix_disable);
 static int prefer_iomap = 0;
 TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
 
 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
 
 /*
  * Various supported device vendors/types and their names.
  */
 static const struct rl_type re_devs[] = {
 	{ DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
 	    "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
 	{ DLINK_VENDORID, DLINK_DEVICEID_530T_REVC, 0,
 	    "D-Link DGE-530(T) Gigabit Ethernet Adapter" },
 	{ RT_VENDORID, RT_DEVICEID_8139, 0,
 	    "RealTek 8139C+ 10/100BaseTX" },
 	{ RT_VENDORID, RT_DEVICEID_8101E, 0,
 	    "RealTek 810xE PCIe 10/100baseTX" },
 	{ RT_VENDORID, RT_DEVICEID_8168, 0,
 	    "RealTek 8168/8111 B/C/CP/D/DP/E/F/G PCIe Gigabit Ethernet" },
 	{ RT_VENDORID, RT_DEVICEID_8169, 0,
 	    "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
 	{ RT_VENDORID, RT_DEVICEID_8169SC, 0,
 	    "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
 	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
 	    "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
 	{ LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
 	    "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
 	{ USR_VENDORID, USR_DEVICEID_997902, 0,
 	    "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
 };
 
 static const struct rl_hwrev re_hwrevs[] = {
 	{ RL_HWREV_8139, RL_8139, "", RL_MTU },
 	{ RL_HWREV_8139A, RL_8139, "A", RL_MTU },
 	{ RL_HWREV_8139AG, RL_8139, "A-G", RL_MTU },
 	{ RL_HWREV_8139B, RL_8139, "B", RL_MTU },
 	{ RL_HWREV_8130, RL_8139, "8130", RL_MTU },
 	{ RL_HWREV_8139C, RL_8139, "C", RL_MTU },
 	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C", RL_MTU },
 	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+", RL_MTU },
 	{ RL_HWREV_8168B_SPIN1, RL_8169, "8168", RL_JUMBO_MTU },
 	{ RL_HWREV_8169, RL_8169, "8169", RL_JUMBO_MTU },
 	{ RL_HWREV_8169S, RL_8169, "8169S", RL_JUMBO_MTU },
 	{ RL_HWREV_8110S, RL_8169, "8110S", RL_JUMBO_MTU },
 	{ RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB", RL_JUMBO_MTU },
 	{ RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU },
 	{ RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL", RL_JUMBO_MTU },
 	{ RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU },
 	{ RL_HWREV_8100, RL_8139, "8100", RL_MTU },
 	{ RL_HWREV_8101, RL_8139, "8101", RL_MTU },
 	{ RL_HWREV_8100E, RL_8169, "8100E", RL_MTU },
 	{ RL_HWREV_8101E, RL_8169, "8101E", RL_MTU },
 	{ RL_HWREV_8102E, RL_8169, "8102E", RL_MTU },
 	{ RL_HWREV_8102EL, RL_8169, "8102EL", RL_MTU },
 	{ RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL", RL_MTU },
 	{ RL_HWREV_8103E, RL_8169, "8103E", RL_MTU },
 	{ RL_HWREV_8401E, RL_8169, "8401E", RL_MTU },
 	{ RL_HWREV_8402, RL_8169, "8402", RL_MTU },
 	{ RL_HWREV_8105E, RL_8169, "8105E", RL_MTU },
 	{ RL_HWREV_8105E_SPIN1, RL_8169, "8105E", RL_MTU },
 	{ RL_HWREV_8106E, RL_8169, "8106E", RL_MTU },
 	{ RL_HWREV_8168B_SPIN2, RL_8169, "8168", RL_JUMBO_MTU },
 	{ RL_HWREV_8168B_SPIN3, RL_8169, "8168", RL_JUMBO_MTU },
 	{ RL_HWREV_8168C, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K },
 	{ RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K },
 	{ RL_HWREV_8168CP, RL_8169, "8168CP/8111CP", RL_JUMBO_MTU_6K },
 	{ RL_HWREV_8168D, RL_8169, "8168D/8111D", RL_JUMBO_MTU_9K },
 	{ RL_HWREV_8168DP, RL_8169, "8168DP/8111DP", RL_JUMBO_MTU_9K },
 	{ RL_HWREV_8168E, RL_8169, "8168E/8111E", RL_JUMBO_MTU_9K},
 	{ RL_HWREV_8168E_VL, RL_8169, "8168E/8111E-VL", RL_JUMBO_MTU_6K},
 	{ RL_HWREV_8168EP, RL_8169, "8168EP/8111EP", RL_JUMBO_MTU_9K},
 	{ RL_HWREV_8168F, RL_8169, "8168F/8111F", RL_JUMBO_MTU_9K},
 	{ RL_HWREV_8168G, RL_8169, "8168G/8111G", RL_JUMBO_MTU_9K},
 	{ RL_HWREV_8168GU, RL_8169, "8168GU/8111GU", RL_JUMBO_MTU_9K},
 	{ RL_HWREV_8168H, RL_8169, "8168H/8111H", RL_JUMBO_MTU_9K},
 	{ RL_HWREV_8411, RL_8169, "8411", RL_JUMBO_MTU_9K},
 	{ RL_HWREV_8411B, RL_8169, "8411B", RL_JUMBO_MTU_9K},
 	{ 0, 0, NULL, 0 }
 };
 
 static int re_probe		(device_t);
 static int re_attach		(device_t);
 static int re_detach		(device_t);
 
 static int re_encap		(struct rl_softc *, struct mbuf **);
 
 static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
 static int re_allocmem		(device_t, struct rl_softc *);
 static __inline void re_discard_rxbuf
 				(struct rl_softc *, int);
 static int re_newbuf		(struct rl_softc *, int);
 static int re_jumbo_newbuf	(struct rl_softc *, int);
 static int re_rx_list_init	(struct rl_softc *);
 static int re_jrx_list_init	(struct rl_softc *);
 static int re_tx_list_init	(struct rl_softc *);
 #ifdef RE_FIXUP_RX
 static __inline void re_fixup_rx
 				(struct mbuf *);
 #endif
 static int re_rxeof		(struct rl_softc *, int *);
 static void re_txeof		(struct rl_softc *);
 #ifdef DEVICE_POLLING
 static int re_poll		(struct ifnet *, enum poll_cmd, int);
 static int re_poll_locked	(struct ifnet *, enum poll_cmd, int);
 #endif
 static int re_intr		(void *);
 static void re_intr_msi		(void *);
 static void re_tick		(void *);
 static void re_int_task		(void *, int);
 static void re_start		(struct ifnet *);
 static void re_start_locked	(struct ifnet *);
 static int re_ioctl		(struct ifnet *, u_long, caddr_t);
 static void re_init		(void *);
 static void re_init_locked	(struct rl_softc *);
 static void re_stop		(struct rl_softc *);
 static void re_watchdog		(struct rl_softc *);
 static int re_suspend		(device_t);
 static int re_resume		(device_t);
 static int re_shutdown		(device_t);
 static int re_ifmedia_upd	(struct ifnet *);
 static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
 
 static void re_eeprom_putbyte	(struct rl_softc *, int);
 static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
 static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int);
 static int re_gmii_readreg	(device_t, int, int);
 static int re_gmii_writereg	(device_t, int, int, int);
 
 static int re_miibus_readreg	(device_t, int, int);
 static int re_miibus_writereg	(device_t, int, int, int);
 static void re_miibus_statchg	(device_t);
 
 static void re_set_jumbo	(struct rl_softc *, int);
 static void re_set_rxmode		(struct rl_softc *);
 static void re_reset		(struct rl_softc *);
 static void re_setwol		(struct rl_softc *);
 static void re_clrwol		(struct rl_softc *);
 static void re_set_linkspeed	(struct rl_softc *);
 
 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
 #include <dev/netmap/if_re_netmap.h>
 MODULE_DEPEND(re, netmap, 1, 1, 1);
 #endif /* !DEV_NETMAP */
 
 #ifdef RE_DIAG
 static int re_diag		(struct rl_softc *);
 #endif
 
 static void re_add_sysctls	(struct rl_softc *);
 static int re_sysctl_stats	(SYSCTL_HANDLER_ARGS);
 static int sysctl_int_range	(SYSCTL_HANDLER_ARGS, int, int);
 static int sysctl_hw_re_int_mod	(SYSCTL_HANDLER_ARGS);
 
 static device_method_t re_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		re_probe),
 	DEVMETHOD(device_attach,	re_attach),
 	DEVMETHOD(device_detach,	re_detach),
 	DEVMETHOD(device_suspend,	re_suspend),
 	DEVMETHOD(device_resume,	re_resume),
 	DEVMETHOD(device_shutdown,	re_shutdown),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
 	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
 	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
 
 	DEVMETHOD_END
 };
 
 static driver_t re_driver = {
 	"re",
 	re_methods,
 	sizeof(struct rl_softc)
 };
 
 static devclass_t re_devclass;
 
 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
 
 #define EE_SET(x)					\
 	CSR_WRITE_1(sc, RL_EECMD,			\
 		CSR_READ_1(sc, RL_EECMD) | x)
 
 #define EE_CLR(x)					\
 	CSR_WRITE_1(sc, RL_EECMD,			\
 		CSR_READ_1(sc, RL_EECMD) & ~x)
 
 /*
  * Send a read command and address to the EEPROM, check for ACK.
  */
 static void
 re_eeprom_putbyte(struct rl_softc *sc, int addr)
 {
 	int			d, i;
 
 	d = addr | (RL_9346_READ << sc->rl_eewidth);
 
 	/*
 	 * Feed in each bit and strobe the clock.
 	 */
 
 	for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
 		if (d & i) {
 			EE_SET(RL_EE_DATAIN);
 		} else {
 			EE_CLR(RL_EE_DATAIN);
 		}
 		DELAY(100);
 		EE_SET(RL_EE_CLK);
 		DELAY(150);
 		EE_CLR(RL_EE_CLK);
 		DELAY(100);
 	}
 }
 
 /*
  * Read a word of data stored in the EEPROM at address 'addr.'
  */
 static void
 re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
 {
 	int			i;
 	u_int16_t		word = 0;
 
 	/*
 	 * Send address of word we want to read.
 	 */
 	re_eeprom_putbyte(sc, addr);
 
 	/*
 	 * Start reading bits from EEPROM.
 	 */
 	for (i = 0x8000; i; i >>= 1) {
 		EE_SET(RL_EE_CLK);
 		DELAY(100);
 		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
 			word |= i;
 		EE_CLR(RL_EE_CLK);
 		DELAY(100);
 	}
 
 	*dest = word;
 }
 
 /*
  * Read a sequence of words from the EEPROM.
  */
 static void
 re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
 {
 	int			i;
 	u_int16_t		word = 0, *ptr;
 
 	CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
 
         DELAY(100);
 
 	for (i = 0; i < cnt; i++) {
 		CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
 		re_eeprom_getword(sc, off + i, &word);
 		CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
 		ptr = (u_int16_t *)(dest + (i * 2));
                 *ptr = word;
 	}
 
 	CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
 }
 
 static int
 re_gmii_readreg(device_t dev, int phy, int reg)
 {
 	struct rl_softc		*sc;
 	u_int32_t		rval;
 	int			i;
 
 	sc = device_get_softc(dev);
 
 	/* Let the rgephy driver read the GMEDIASTAT register */
 
 	if (reg == RL_GMEDIASTAT) {
 		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
 		return (rval);
 	}
 
 	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
 
 	for (i = 0; i < RL_PHY_TIMEOUT; i++) {
 		rval = CSR_READ_4(sc, RL_PHYAR);
 		if (rval & RL_PHYAR_BUSY)
 			break;
 		DELAY(25);
 	}
 
 	if (i == RL_PHY_TIMEOUT) {
 		device_printf(sc->rl_dev, "PHY read failed\n");
 		return (0);
 	}
 
 	/*
 	 * Controller requires a 20us delay to process next MDIO request.
 	 */
 	DELAY(20);
 
 	return (rval & RL_PHYAR_PHYDATA);
 }
 
 static int
 re_gmii_writereg(device_t dev, int phy, int reg, int data)
 {
 	struct rl_softc		*sc;
 	u_int32_t		rval;
 	int			i;
 
 	sc = device_get_softc(dev);
 
 	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
 	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
 
 	for (i = 0; i < RL_PHY_TIMEOUT; i++) {
 		rval = CSR_READ_4(sc, RL_PHYAR);
 		if (!(rval & RL_PHYAR_BUSY))
 			break;
 		DELAY(25);
 	}
 
 	if (i == RL_PHY_TIMEOUT) {
 		device_printf(sc->rl_dev, "PHY write failed\n");
 		return (0);
 	}
 
 	/*
 	 * Controller requires a 20us delay to process next MDIO request.
 	 */
 	DELAY(20);
 
 	return (0);
 }
 
 static int
 re_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct rl_softc		*sc;
 	u_int16_t		rval = 0;
 	u_int16_t		re8139_reg = 0;
 
 	sc = device_get_softc(dev);
 
 	if (sc->rl_type == RL_8169) {
 		rval = re_gmii_readreg(dev, phy, reg);
 		return (rval);
 	}
 
 	switch (reg) {
 	case MII_BMCR:
 		re8139_reg = RL_BMCR;
 		break;
 	case MII_BMSR:
 		re8139_reg = RL_BMSR;
 		break;
 	case MII_ANAR:
 		re8139_reg = RL_ANAR;
 		break;
 	case MII_ANER:
 		re8139_reg = RL_ANER;
 		break;
 	case MII_ANLPAR:
 		re8139_reg = RL_LPAR;
 		break;
 	case MII_PHYIDR1:
 	case MII_PHYIDR2:
 		return (0);
 	/*
 	 * Allow the rlphy driver to read the media status
 	 * register. If we have a link partner which does not
 	 * support NWAY, this is the register which will tell
 	 * us the results of parallel detection.
 	 */
 	case RL_MEDIASTAT:
 		rval = CSR_READ_1(sc, RL_MEDIASTAT);
 		return (rval);
 	default:
 		device_printf(sc->rl_dev, "bad phy register\n");
 		return (0);
 	}
 	rval = CSR_READ_2(sc, re8139_reg);
 	if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
 		/* 8139C+ has different bit layout. */
 		rval &= ~(BMCR_LOOP | BMCR_ISO);
 	}
 	return (rval);
 }
 
 static int
 re_miibus_writereg(device_t dev, int phy, int reg, int data)
 {
 	struct rl_softc		*sc;
 	u_int16_t		re8139_reg = 0;
 	int			rval = 0;
 
 	sc = device_get_softc(dev);
 
 	if (sc->rl_type == RL_8169) {
 		rval = re_gmii_writereg(dev, phy, reg, data);
 		return (rval);
 	}
 
 	switch (reg) {
 	case MII_BMCR:
 		re8139_reg = RL_BMCR;
 		if (sc->rl_type == RL_8139CPLUS) {
 			/* 8139C+ has different bit layout. */
 			data &= ~(BMCR_LOOP | BMCR_ISO);
 		}
 		break;
 	case MII_BMSR:
 		re8139_reg = RL_BMSR;
 		break;
 	case MII_ANAR:
 		re8139_reg = RL_ANAR;
 		break;
 	case MII_ANER:
 		re8139_reg = RL_ANER;
 		break;
 	case MII_ANLPAR:
 		re8139_reg = RL_LPAR;
 		break;
 	case MII_PHYIDR1:
 	case MII_PHYIDR2:
 		return (0);
 		break;
 	default:
 		device_printf(sc->rl_dev, "bad phy register\n");
 		return (0);
 	}
 	CSR_WRITE_2(sc, re8139_reg, data);
 	return (0);
 }
 
 static void
 re_miibus_statchg(device_t dev)
 {
 	struct rl_softc		*sc;
 	struct ifnet		*ifp;
 	struct mii_data		*mii;
 
 	sc = device_get_softc(dev);
 	mii = device_get_softc(sc->rl_miibus);
 	ifp = sc->rl_ifp;
 	if (mii == NULL || ifp == NULL ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	sc->rl_flags &= ~RL_FLAG_LINK;
 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
 	    (IFM_ACTIVE | IFM_AVALID)) {
 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
 		case IFM_10_T:
 		case IFM_100_TX:
 			sc->rl_flags |= RL_FLAG_LINK;
 			break;
 		case IFM_1000_T:
 			if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
 				break;
 			sc->rl_flags |= RL_FLAG_LINK;
 			break;
 		default:
 			break;
 		}
 	}
 	/*
 	 * RealTek controllers do not provide any interface to the RX/TX
 	 * MACs for resolved speed, duplex and flow-control parameters.
 	 */
 }
 
 /*
  * Set the RX configuration and 64-bit multicast hash filter.
  */
 static void
 re_set_rxmode(struct rl_softc *sc)
 {
 	struct ifnet		*ifp;
 	struct ifmultiaddr	*ifma;
 	uint32_t		hashes[2] = { 0, 0 };
 	uint32_t		h, rxfilt;
 
 	RL_LOCK_ASSERT(sc);
 
 	ifp = sc->rl_ifp;
 
 	rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
 	if ((sc->rl_flags & RL_FLAG_EARLYOFF) != 0)
 		rxfilt |= RL_RXCFG_EARLYOFF;
 	else if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0)
 		rxfilt |= RL_RXCFG_EARLYOFFV2;
 
 	if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
 		if (ifp->if_flags & IFF_PROMISC)
 			rxfilt |= RL_RXCFG_RX_ALLPHYS;
 		/*
 		 * Unlike other hardwares, we have to explicitly set
 		 * RL_RXCFG_RX_MULTI to receive multicast frames in
 		 * promiscuous mode.
 		 */
 		rxfilt |= RL_RXCFG_RX_MULTI;
 		hashes[0] = hashes[1] = 0xffffffff;
 		goto done;
 	}
 
 	if_maddr_rlock(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
 		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
 		if (h < 32)
 			hashes[0] |= (1 << h);
 		else
 			hashes[1] |= (1 << (h - 32));
 	}
 	if_maddr_runlock(ifp);
 
 	if (hashes[0] != 0 || hashes[1] != 0) {
 		/*
 		 * For some unfathomable reason, RealTek decided to
 		 * reverse the order of the multicast hash registers
 		 * in the PCI Express parts.  This means we have to
 		 * write the hash pattern in reverse order for those
 		 * devices.
 		 */
 		if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
 			h = bswap32(hashes[0]);
 			hashes[0] = bswap32(hashes[1]);
 			hashes[1] = h;
 		}
 		rxfilt |= RL_RXCFG_RX_MULTI;
 	}
 
 	if  (sc->rl_hwrev->rl_rev == RL_HWREV_8168F) {
 		/* Disable multicast filtering due to silicon bug. */
 		hashes[0] = 0xffffffff;
 		hashes[1] = 0xffffffff;
 	}
 
 done:
 	CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
 	CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
 	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
 }
 
 static void
 re_reset(struct rl_softc *sc)
 {
 	int			i;
 
 	RL_LOCK_ASSERT(sc);
 
 	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
 
 	for (i = 0; i < RL_TIMEOUT; i++) {
 		DELAY(10);
 		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
 			break;
 	}
 	if (i == RL_TIMEOUT)
 		device_printf(sc->rl_dev, "reset never completed!\n");
 
 	if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
 		CSR_WRITE_1(sc, 0x82, 1);
 	if (sc->rl_hwrev->rl_rev == RL_HWREV_8169S)
 		re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
 }
 
 #ifdef RE_DIAG
 
 /*
  * The following routine is designed to test for a defect on some
  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
  * lines connected to the bus, however for a 32-bit only card, they
  * should be pulled high. The result of this defect is that the
  * NIC will not work right if you plug it into a 64-bit slot: DMA
  * operations will be done with 64-bit transfers, which will fail
  * because the 64-bit data lines aren't connected.
  *
  * There's no way to work around this (short of talking a soldering
  * iron to the board), however we can detect it. The method we use
  * here is to put the NIC into digital loopback mode, set the receiver
  * to promiscuous mode, and then try to send a frame. We then compare
  * the frame data we sent to what was received. If the data matches,
  * then the NIC is working correctly, otherwise we know the user has
  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
  * slot. In the latter case, there's no way the NIC can work correctly,
  * so we print out a message on the console and abort the device attach.
  */
 
 static int
 re_diag(struct rl_softc *sc)
 {
 	struct ifnet		*ifp = sc->rl_ifp;
 	struct mbuf		*m0;
 	struct ether_header	*eh;
 	struct rl_desc		*cur_rx;
 	u_int16_t		status;
 	u_int32_t		rxstat;
 	int			total_len, i, error = 0, phyaddr;
 	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
 	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
 
 	/* Allocate a single mbuf */
 	MGETHDR(m0, M_NOWAIT, MT_DATA);
 	if (m0 == NULL)
 		return (ENOBUFS);
 
 	RL_LOCK(sc);
 
 	/*
 	 * Initialize the NIC in test mode. This sets the chip up
 	 * so that it can send and receive frames, but performs the
 	 * following special functions:
 	 * - Puts receiver in promiscuous mode
 	 * - Enables digital loopback mode
 	 * - Leaves interrupts turned off
 	 */
 
 	ifp->if_flags |= IFF_PROMISC;
 	sc->rl_testmode = 1;
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	re_init_locked(sc);
 	sc->rl_flags |= RL_FLAG_LINK;
 	if (sc->rl_type == RL_8169)
 		phyaddr = 1;
 	else
 		phyaddr = 0;
 
 	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
 	for (i = 0; i < RL_TIMEOUT; i++) {
 		status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
 		if (!(status & BMCR_RESET))
 			break;
 	}
 
 	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
 	CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
 
 	DELAY(100000);
 
 	/* Put some data in the mbuf */
 
 	eh = mtod(m0, struct ether_header *);
 	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
 	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
 	eh->ether_type = htons(ETHERTYPE_IP);
 	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
 
 	/*
 	 * Queue the packet, start transmission.
 	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
 	 */
 
 	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
 	RL_UNLOCK(sc);
 	/* XXX: re_diag must not be called when in ALTQ mode */
 	IF_HANDOFF(&ifp->if_snd, m0, ifp);
 	RL_LOCK(sc);
 	m0 = NULL;
 
 	/* Wait for it to propagate through the chip */
 
 	DELAY(100000);
 	for (i = 0; i < RL_TIMEOUT; i++) {
 		status = CSR_READ_2(sc, RL_ISR);
 		CSR_WRITE_2(sc, RL_ISR, status);
 		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
 		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
 			break;
 		DELAY(10);
 	}
 
 	if (i == RL_TIMEOUT) {
 		device_printf(sc->rl_dev,
 		    "diagnostic failed, failed to receive packet in"
 		    " loopback mode\n");
 		error = EIO;
 		goto done;
 	}
 
 	/*
 	 * The packet should have been dumped into the first
 	 * entry in the RX DMA ring. Grab it from there.
 	 */
 
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
 	    sc->rl_ldata.rl_rx_list_map,
 	    BUS_DMASYNC_POSTREAD);
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
 	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
 	    BUS_DMASYNC_POSTREAD);
 	bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
 	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
 
 	m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
 	sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
 	eh = mtod(m0, struct ether_header *);
 
 	cur_rx = &sc->rl_ldata.rl_rx_list[0];
 	total_len = RL_RXBYTES(cur_rx);
 	rxstat = le32toh(cur_rx->rl_cmdstat);
 
 	if (total_len != ETHER_MIN_LEN) {
 		device_printf(sc->rl_dev,
 		    "diagnostic failed, received short packet\n");
 		error = EIO;
 		goto done;
 	}
 
 	/* Test that the received packet data matches what we sent. */
 
 	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
 	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
 	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
 		device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
 		device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
 		    dst, ":", src, ":", ETHERTYPE_IP);
 		device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
 		    eh->ether_dhost, ":", eh->ether_shost, ":",
 		    ntohs(eh->ether_type));
 		device_printf(sc->rl_dev, "You may have a defective 32-bit "
 		    "NIC plugged into a 64-bit PCI slot.\n");
 		device_printf(sc->rl_dev, "Please re-install the NIC in a "
 		    "32-bit slot for proper operation.\n");
 		device_printf(sc->rl_dev, "Read the re(4) man page for more "
 		    "details.\n");
 		error = EIO;
 	}
 
 done:
 	/* Turn interface off, release resources */
 
 	sc->rl_testmode = 0;
 	sc->rl_flags &= ~RL_FLAG_LINK;
 	ifp->if_flags &= ~IFF_PROMISC;
 	re_stop(sc);
 	if (m0 != NULL)
 		m_freem(m0);
 
 	RL_UNLOCK(sc);
 
 	return (error);
 }
 
 #endif
 
 /*
  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
  * IDs against our list and return a device name if we find a match.
  */
 static int
 re_probe(device_t dev)
 {
 	const struct rl_type	*t;
 	uint16_t		devid, vendor;
 	uint16_t		revid, sdevid;
 	int			i;
 
 	vendor = pci_get_vendor(dev);
 	devid = pci_get_device(dev);
 	revid = pci_get_revid(dev);
 	sdevid = pci_get_subdevice(dev);
 
 	if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
 		if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
 			/*
 			 * Only attach to rev. 3 of the Linksys EG1032 adapter.
 			 * Rev. 2 is supported by sk(4).
 			 */
 			return (ENXIO);
 		}
 	}
 
 	if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
 		if (revid != 0x20) {
 			/* 8139, let rl(4) take care of this device. */
 			return (ENXIO);
 		}
 	}
 
 	t = re_devs;
 	for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
 		if (vendor == t->rl_vid && devid == t->rl_did) {
 			device_set_desc(dev, t->rl_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 /*
  * Map a single buffer address.
  */
 
 static void
 re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	bus_addr_t		*addr;
 
 	if (error)
 		return;
 
 	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
 	addr = arg;
 	*addr = segs->ds_addr;
 }
 
 static int
 re_allocmem(device_t dev, struct rl_softc *sc)
 {
 	bus_addr_t		lowaddr;
 	bus_size_t		rx_list_size, tx_list_size;
 	int			error;
 	int			i;
 
 	rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
 	tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
 
 	/*
 	 * Allocate the parent bus DMA tag appropriate for PCI.
 	 * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
 	 * register should be set. However some RealTek chips are known
 	 * to be buggy on DAC handling, therefore disable DAC by limiting
 	 * DMA address space to 32bit. PCIe variants of RealTek chips
 	 * may not have the limitation.
 	 */
 	lowaddr = BUS_SPACE_MAXADDR;
 	if ((sc->rl_flags & RL_FLAG_PCIE) == 0)
 		lowaddr = BUS_SPACE_MAXADDR_32BIT;
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
 	    lowaddr, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
 	    NULL, NULL, &sc->rl_parent_tag);
 	if (error) {
 		device_printf(dev, "could not allocate parent DMA tag\n");
 		return (error);
 	}
 
 	/*
 	 * Allocate map for TX mbufs.
 	 */
 	error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
 	    NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
 	    NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
 	if (error) {
 		device_printf(dev, "could not allocate TX DMA tag\n");
 		return (error);
 	}
 
 	/*
 	 * Allocate map for RX mbufs.
 	 */
 
 	if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
 		error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t),
 		    0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 		    MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL,
 		    &sc->rl_ldata.rl_jrx_mtag);
 		if (error) {
 			device_printf(dev,
 			    "could not allocate jumbo RX DMA tag\n");
 			return (error);
 		}
 	}
 	error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
 	if (error) {
 		device_printf(dev, "could not allocate RX DMA tag\n");
 		return (error);
 	}
 
 	/*
 	 * Allocate map for TX descriptor list.
 	 */
 	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
 	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
 	    NULL, tx_list_size, 1, tx_list_size, 0,
 	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
 	if (error) {
 		device_printf(dev, "could not allocate TX DMA ring tag\n");
 		return (error);
 	}
 
 	/* Allocate DMA'able memory for the TX ring */
 
 	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
 	    (void **)&sc->rl_ldata.rl_tx_list,
 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc->rl_ldata.rl_tx_list_map);
 	if (error) {
 		device_printf(dev, "could not allocate TX DMA ring\n");
 		return (error);
 	}
 
 	/* Load the map for the TX ring. */
 
 	sc->rl_ldata.rl_tx_list_addr = 0;
 	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
 	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
 	     tx_list_size, re_dma_map_addr,
 	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
 	if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
 		device_printf(dev, "could not load TX DMA ring\n");
 		return (ENOMEM);
 	}
 
 	/* Create DMA maps for TX buffers */
 
 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
 		error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
 		    &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
 		if (error) {
 			device_printf(dev, "could not create DMA map for TX\n");
 			return (error);
 		}
 	}
 
 	/*
 	 * Allocate map for RX descriptor list.
 	 */
 	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
 	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
 	    NULL, rx_list_size, 1, rx_list_size, 0,
 	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
 	if (error) {
 		device_printf(dev, "could not create RX DMA ring tag\n");
 		return (error);
 	}
 
 	/* Allocate DMA'able memory for the RX ring */
 
 	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
 	    (void **)&sc->rl_ldata.rl_rx_list,
 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc->rl_ldata.rl_rx_list_map);
 	if (error) {
 		device_printf(dev, "could not allocate RX DMA ring\n");
 		return (error);
 	}
 
 	/* Load the map for the RX ring. */
 
 	sc->rl_ldata.rl_rx_list_addr = 0;
 	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
 	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
 	     rx_list_size, re_dma_map_addr,
 	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
 	if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
 		device_printf(dev, "could not load RX DMA ring\n");
 		return (ENOMEM);
 	}
 
 	/* Create DMA maps for RX buffers */
 
 	if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
 		error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0,
 		    &sc->rl_ldata.rl_jrx_sparemap);
 		if (error) {
 			device_printf(dev,
 			    "could not create spare DMA map for jumbo RX\n");
 			return (error);
 		}
 		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 			error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0,
 			    &sc->rl_ldata.rl_jrx_desc[i].rx_dmamap);
 			if (error) {
 				device_printf(dev,
 				    "could not create DMA map for jumbo RX\n");
 				return (error);
 			}
 		}
 	}
 	error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
 	    &sc->rl_ldata.rl_rx_sparemap);
 	if (error) {
 		device_printf(dev, "could not create spare DMA map for RX\n");
 		return (error);
 	}
 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 		error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
 		    &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
 		if (error) {
 			device_printf(dev, "could not create DMA map for RX\n");
 			return (error);
 		}
 	}
 
 	/* Create DMA map for statistics. */
 	error = bus_dma_tag_create(sc->rl_parent_tag, RL_DUMP_ALIGN, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    sizeof(struct rl_stats), 1, sizeof(struct rl_stats), 0, NULL, NULL,
 	    &sc->rl_ldata.rl_stag);
 	if (error) {
 		device_printf(dev, "could not create statistics DMA tag\n");
 		return (error);
 	}
 	/* Allocate DMA'able memory for statistics. */
 	error = bus_dmamem_alloc(sc->rl_ldata.rl_stag,
 	    (void **)&sc->rl_ldata.rl_stats,
 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc->rl_ldata.rl_smap);
 	if (error) {
 		device_printf(dev,
 		    "could not allocate statistics DMA memory\n");
 		return (error);
 	}
 	/* Load the map for statistics. */
 	sc->rl_ldata.rl_stats_addr = 0;
 	error = bus_dmamap_load(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap,
 	    sc->rl_ldata.rl_stats, sizeof(struct rl_stats), re_dma_map_addr,
 	     &sc->rl_ldata.rl_stats_addr, BUS_DMA_NOWAIT);
 	if (error != 0 || sc->rl_ldata.rl_stats_addr == 0) {
 		device_printf(dev, "could not load statistics DMA memory\n");
 		return (ENOMEM);
 	}
 
 	return (0);
 }
 
 /*
  * Attach the interface. Allocate softc structures, do ifmedia
  * setup and ethernet/BPF attach.
  */
 static int
 re_attach(device_t dev)
 {
 	u_char			eaddr[ETHER_ADDR_LEN];
 	u_int16_t		as[ETHER_ADDR_LEN / 2];
 	struct rl_softc		*sc;
 	struct ifnet		*ifp;
 	const struct rl_hwrev	*hw_rev;
 	int			capmask, error = 0, hwrev, i, msic, msixc,
 				phy, reg, rid;
 	u_int32_t		cap, ctl;
 	u_int16_t		devid, re_did = 0;
 	uint8_t			cfg;
 
 	sc = device_get_softc(dev);
 	sc->rl_dev = dev;
 
 	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
 
 	/*
 	 * Map control/status registers.
 	 */
 	pci_enable_busmaster(dev);
 
 	devid = pci_get_device(dev);
 	/*
 	 * Prefer memory space register mapping over IO space.
 	 * Because RTL8169SC does not seem to work when memory mapping
 	 * is used always activate io mapping.
 	 */
 	if (devid == RT_DEVICEID_8169SC)
 		prefer_iomap = 1;
 	if (prefer_iomap == 0) {
 		sc->rl_res_id = PCIR_BAR(1);
 		sc->rl_res_type = SYS_RES_MEMORY;
 		/* RTL8168/8101E seems to use different BARs. */
 		if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
 			sc->rl_res_id = PCIR_BAR(2);
 	} else {
 		sc->rl_res_id = PCIR_BAR(0);
 		sc->rl_res_type = SYS_RES_IOPORT;
 	}
 	sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
 	    &sc->rl_res_id, RF_ACTIVE);
 	if (sc->rl_res == NULL && prefer_iomap == 0) {
 		sc->rl_res_id = PCIR_BAR(0);
 		sc->rl_res_type = SYS_RES_IOPORT;
 		sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
 		    &sc->rl_res_id, RF_ACTIVE);
 	}
 	if (sc->rl_res == NULL) {
 		device_printf(dev, "couldn't map ports/memory\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	sc->rl_btag = rman_get_bustag(sc->rl_res);
 	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
 
 	msic = pci_msi_count(dev);
 	msixc = pci_msix_count(dev);
 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
 		sc->rl_flags |= RL_FLAG_PCIE;
 		sc->rl_expcap = reg;
 	}
 	if (bootverbose) {
 		device_printf(dev, "MSI count : %d\n", msic);
 		device_printf(dev, "MSI-X count : %d\n", msixc);
 	}
 	if (msix_disable > 0)
 		msixc = 0;
 	if (msi_disable > 0)
 		msic = 0;
 	/* Prefer MSI-X to MSI. */
 	if (msixc > 0) {
 		msixc = RL_MSI_MESSAGES;
 		rid = PCIR_BAR(4);
 		sc->rl_res_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 		    &rid, RF_ACTIVE);
 		if (sc->rl_res_pba == NULL) {
 			device_printf(sc->rl_dev,
 			    "could not allocate MSI-X PBA resource\n");
 		}
 		if (sc->rl_res_pba != NULL &&
 		    pci_alloc_msix(dev, &msixc) == 0) {
 			if (msixc == RL_MSI_MESSAGES) {
 				device_printf(dev, "Using %d MSI-X message\n",
 				    msixc);
 				sc->rl_flags |= RL_FLAG_MSIX;
 			} else
 				pci_release_msi(dev);
 		}
 		if ((sc->rl_flags & RL_FLAG_MSIX) == 0) {
 			if (sc->rl_res_pba != NULL)
 				bus_release_resource(dev, SYS_RES_MEMORY, rid,
 				    sc->rl_res_pba);
 			sc->rl_res_pba = NULL;
 			msixc = 0;
 		}
 	}
 	/* Prefer MSI to INTx. */
 	if (msixc == 0 && msic > 0) {
 		msic = RL_MSI_MESSAGES;
 		if (pci_alloc_msi(dev, &msic) == 0) {
 			if (msic == RL_MSI_MESSAGES) {
 				device_printf(dev, "Using %d MSI message\n",
 				    msic);
 				sc->rl_flags |= RL_FLAG_MSI;
 				/* Explicitly set MSI enable bit. */
 				CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
 				cfg = CSR_READ_1(sc, RL_CFG2);
 				cfg |= RL_CFG2_MSI;
 				CSR_WRITE_1(sc, RL_CFG2, cfg);
 				CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
 			} else
 				pci_release_msi(dev);
 		}
 		if ((sc->rl_flags & RL_FLAG_MSI) == 0)
 			msic = 0;
 	}
 
 	/* Allocate interrupt */
 	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) {
 		rid = 0;
 		sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 		    RF_SHAREABLE | RF_ACTIVE);
 		if (sc->rl_irq[0] == NULL) {
 			device_printf(dev, "couldn't allocate IRQ resources\n");
 			error = ENXIO;
 			goto fail;
 		}
 	} else {
 		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
 			sc->rl_irq[i] = bus_alloc_resource_any(dev,
 			    SYS_RES_IRQ, &rid, RF_ACTIVE);
 			if (sc->rl_irq[i] == NULL) {
 				device_printf(dev,
 				    "couldn't allocate IRQ resources for "
 				    "message %d\n", rid);
 				error = ENXIO;
 				goto fail;
 			}
 		}
 	}
 
 	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
 		CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
 		cfg = CSR_READ_1(sc, RL_CFG2);
 		if ((cfg & RL_CFG2_MSI) != 0) {
 			device_printf(dev, "turning off MSI enable bit.\n");
 			cfg &= ~RL_CFG2_MSI;
 			CSR_WRITE_1(sc, RL_CFG2, cfg);
 		}
 		CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
 	}
 
 	/* Disable ASPM L0S/L1. */
 	if (sc->rl_expcap != 0) {
 		cap = pci_read_config(dev, sc->rl_expcap +
 		    PCIER_LINK_CAP, 2);
 		if ((cap & PCIEM_LINK_CAP_ASPM) != 0) {
 			ctl = pci_read_config(dev, sc->rl_expcap +
 			    PCIER_LINK_CTL, 2);
 			if ((ctl & PCIEM_LINK_CTL_ASPMC) != 0) {
 				ctl &= ~PCIEM_LINK_CTL_ASPMC;
 				pci_write_config(dev, sc->rl_expcap +
 				    PCIER_LINK_CTL, ctl, 2);
 				device_printf(dev, "ASPM disabled\n");
 			}
 		} else
 			device_printf(dev, "no ASPM capability\n");
 	}
 
 	hw_rev = re_hwrevs;
 	hwrev = CSR_READ_4(sc, RL_TXCFG);
 	switch (hwrev & 0x70000000) {
 	case 0x00000000:
 	case 0x10000000:
 		device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
 		hwrev &= (RL_TXCFG_HWREV | 0x80000000);
 		break;
 	default:
 		device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
 		sc->rl_macrev = hwrev & 0x00700000;
 		hwrev &= RL_TXCFG_HWREV;
 		break;
 	}
 	device_printf(dev, "MAC rev. 0x%08x\n", sc->rl_macrev);
 	while (hw_rev->rl_desc != NULL) {
 		if (hw_rev->rl_rev == hwrev) {
 			sc->rl_type = hw_rev->rl_type;
 			sc->rl_hwrev = hw_rev;
 			break;
 		}
 		hw_rev++;
 	}
 	if (hw_rev->rl_desc == NULL) {
 		device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
 		error = ENXIO;
 		goto fail;
 	}
 
 	switch (hw_rev->rl_rev) {
 	case RL_HWREV_8139CPLUS:
 		sc->rl_flags |= RL_FLAG_FASTETHER | RL_FLAG_AUTOPAD;
 		break;
 	case RL_HWREV_8100E:
 	case RL_HWREV_8101E:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER;
 		break;
 	case RL_HWREV_8102E:
 	case RL_HWREV_8102EL:
 	case RL_HWREV_8102EL_SPIN1:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
 		    RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP |
 		    RL_FLAG_AUTOPAD;
 		break;
 	case RL_HWREV_8103E:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
 		    RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP |
 		    RL_FLAG_AUTOPAD | RL_FLAG_MACSLEEP;
 		break;
 	case RL_HWREV_8401E:
 	case RL_HWREV_8105E:
 	case RL_HWREV_8105E_SPIN1:
 	case RL_HWREV_8106E:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
 		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
 		    RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD;
 		break;
 	case RL_HWREV_8402:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
 		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
 		    RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD |
 		    RL_FLAG_CMDSTOP_WAIT_TXQ;
 		break;
 	case RL_HWREV_8168B_SPIN1:
 	case RL_HWREV_8168B_SPIN2:
 		sc->rl_flags |= RL_FLAG_WOLRXENB;
 		/* FALLTHROUGH */
 	case RL_HWREV_8168B_SPIN3:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
 		break;
 	case RL_HWREV_8168C_SPIN2:
 		sc->rl_flags |= RL_FLAG_MACSLEEP;
 		/* FALLTHROUGH */
 	case RL_HWREV_8168C:
 		if (sc->rl_macrev == 0x00200000)
 			sc->rl_flags |= RL_FLAG_MACSLEEP;
 		/* FALLTHROUGH */
 	case RL_HWREV_8168CP:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
 		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
 		    RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK;
 		break;
 	case RL_HWREV_8168D:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
 		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
 		    RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
 		    RL_FLAG_WOL_MANLINK;
 		break;
 	case RL_HWREV_8168DP:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
 		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_AUTOPAD |
 		    RL_FLAG_JUMBOV2 | RL_FLAG_WAIT_TXPOLL | RL_FLAG_WOL_MANLINK;
 		break;
 	case RL_HWREV_8168E:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
 		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
 		    RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
 		    RL_FLAG_WOL_MANLINK;
 		break;
 	case RL_HWREV_8168E_VL:
 	case RL_HWREV_8168F:
 		sc->rl_flags |= RL_FLAG_EARLYOFF;
 		/* FALLTHROUGH */
 	case RL_HWREV_8411:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
 		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
 		    RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
 		    RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK;
 		break;
 	case RL_HWREV_8168EP:
 	case RL_HWREV_8168G:
 	case RL_HWREV_8411B:
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
 		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
 		    RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
 		    RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK |
 		    RL_FLAG_8168G_PLUS;
 		break;
 	case RL_HWREV_8168GU:
 	case RL_HWREV_8168H:
 		if (pci_get_device(dev) == RT_DEVICEID_8101E) {
 			/* RTL8106E(US), RTL8107E */
 			sc->rl_flags |= RL_FLAG_FASTETHER;
 		} else
 			sc->rl_flags |= RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK;
 
 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
 		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
 		    RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ |
 		    RL_FLAG_8168G_PLUS;
 		break;
 	case RL_HWREV_8169_8110SB:
 	case RL_HWREV_8169_8110SBL:
 	case RL_HWREV_8169_8110SC:
 	case RL_HWREV_8169_8110SCE:
 		sc->rl_flags |= RL_FLAG_PHYWAKE;
 		/* FALLTHROUGH */
 	case RL_HWREV_8169:
 	case RL_HWREV_8169S:
 	case RL_HWREV_8110S:
 		sc->rl_flags |= RL_FLAG_MACRESET;
 		break;
 	default:
 		break;
 	}
 
 	if (sc->rl_hwrev->rl_rev == RL_HWREV_8139CPLUS) {
 		sc->rl_cfg0 = RL_8139_CFG0;
 		sc->rl_cfg1 = RL_8139_CFG1;
 		sc->rl_cfg2 = 0;
 		sc->rl_cfg3 = RL_8139_CFG3;
 		sc->rl_cfg4 = RL_8139_CFG4;
 		sc->rl_cfg5 = RL_8139_CFG5;
 	} else {
 		sc->rl_cfg0 = RL_CFG0;
 		sc->rl_cfg1 = RL_CFG1;
 		sc->rl_cfg2 = RL_CFG2;
 		sc->rl_cfg3 = RL_CFG3;
 		sc->rl_cfg4 = RL_CFG4;
 		sc->rl_cfg5 = RL_CFG5;
 	}
 
 	/* Reset the adapter. */
 	RL_LOCK(sc);
 	re_reset(sc);
 	RL_UNLOCK(sc);
 
 	/* Enable PME. */
 	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
 	cfg = CSR_READ_1(sc, sc->rl_cfg1);
 	cfg |= RL_CFG1_PME;
 	CSR_WRITE_1(sc, sc->rl_cfg1, cfg);
 	cfg = CSR_READ_1(sc, sc->rl_cfg5);
 	cfg &= RL_CFG5_PME_STS;
 	CSR_WRITE_1(sc, sc->rl_cfg5, cfg);
 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
 
 	if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
 		/*
 		 * XXX Should have a better way to extract station
 		 * address from EEPROM.
 		 */
 		for (i = 0; i < ETHER_ADDR_LEN; i++)
 			eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
 	} else {
 		sc->rl_eewidth = RL_9356_ADDR_LEN;
 		re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
 		if (re_did != 0x8129)
 			sc->rl_eewidth = RL_9346_ADDR_LEN;
 
 		/*
 		 * Get station address from the EEPROM.
 		 */
 		re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
 		for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
 			as[i] = le16toh(as[i]);
 		bcopy(as, eaddr, ETHER_ADDR_LEN);
 	}
 
 	if (sc->rl_type == RL_8169) {
 		/* Set RX length mask and number of descriptors. */
 		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
 		sc->rl_txstart = RL_GTXSTART;
 		sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
 		sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
 	} else {
 		/* Set RX length mask and number of descriptors. */
 		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
 		sc->rl_txstart = RL_TXSTART;
 		sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
 		sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
 	}
 
 	error = re_allocmem(dev, sc);
 	if (error)
 		goto fail;
 	re_add_sysctls(sc);
 
 	ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not if_alloc()\n");
 		error = ENOSPC;
 		goto fail;
 	}
 
 	/* Take controller out of deep sleep mode. */
 	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
 		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
 			CSR_WRITE_1(sc, RL_GPIO,
 			    CSR_READ_1(sc, RL_GPIO) | 0x01);
 		else
 			CSR_WRITE_1(sc, RL_GPIO,
 			    CSR_READ_1(sc, RL_GPIO) & ~0x01);
 	}
 
 	/* Take PHY out of power down mode. */
 	if ((sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) {
 		CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) | 0x80);
 		if (hw_rev->rl_rev == RL_HWREV_8401E)
 			CSR_WRITE_1(sc, 0xD1, CSR_READ_1(sc, 0xD1) & ~0x08);
 	}
 	if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
 		re_gmii_writereg(dev, 1, 0x1f, 0);
 		re_gmii_writereg(dev, 1, 0x0e, 0);
 	}
 
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = re_ioctl;
 	ifp->if_start = re_start;
 	/*
 	 * RTL8168/8111C generates wrong IP checksummed frame if the
 	 * packet has IP options so disable TX checksum offloading.
 	 */
 	if (sc->rl_hwrev->rl_rev == RL_HWREV_8168C ||
 	    sc->rl_hwrev->rl_rev == RL_HWREV_8168C_SPIN2 ||
 	    sc->rl_hwrev->rl_rev == RL_HWREV_8168CP) {
 		ifp->if_hwassist = 0;
 		ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TSO4;
 	} else {
 		ifp->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP;
 		ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4;
 	}
 	ifp->if_hwassist |= CSUM_TSO;
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_init = re_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
 	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
 
 #define	RE_PHYAD_INTERNAL	 0
 
 	/* Do MII setup. */
 	phy = RE_PHYAD_INTERNAL;
 	if (sc->rl_type == RL_8169)
 		phy = 1;
 	capmask = BMSR_DEFCAPMASK;
 	if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
 		 capmask &= ~BMSR_EXTSTAT;
 	error = mii_attach(dev, &sc->rl_miibus, ifp, re_ifmedia_upd,
 	    re_ifmedia_sts, capmask, phy, MII_OFFSET_ANY, MIIF_DOPAUSE);
 	if (error != 0) {
 		device_printf(dev, "attaching PHYs failed\n");
 		goto fail;
 	}
 
 	/*
 	 * Call MI attach routine.
 	 */
 	ether_ifattach(ifp, eaddr);
 
 	/* VLAN capability setup */
 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
 	if (ifp->if_capabilities & IFCAP_HWCSUM)
 		ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
 	/* Enable WOL if PM is supported. */
 	if (pci_find_cap(sc->rl_dev, PCIY_PMG, &reg) == 0)
 		ifp->if_capabilities |= IFCAP_WOL;
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_capenable &= ~(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST);
 	/*
 	 * Don't enable TSO by default.  It is known to generate
 	 * corrupted TCP segments(bad TCP options) under certain
 	 * circumstances.
 	 */
 	ifp->if_hwassist &= ~CSUM_TSO;
 	ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO);
 #ifdef DEVICE_POLLING
 	ifp->if_capabilities |= IFCAP_POLLING;
 #endif
 	/*
 	 * Tell the upper layer(s) we support long frames.
 	 * Must appear after the call to ether_ifattach() because
 	 * ether_ifattach() sets ifi_hdrlen to the default value.
 	 */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 #ifdef DEV_NETMAP
 	re_netmap_attach(sc);
 #endif /* DEV_NETMAP */
+
 #ifdef RE_DIAG
 	/*
 	 * Perform hardware diagnostic on the original RTL8169.
 	 * Some 32-bit cards were incorrectly wired and would
 	 * malfunction if plugged into a 64-bit slot.
 	 */
-
 	if (hwrev == RL_HWREV_8169) {
 		error = re_diag(sc);
 		if (error) {
 			device_printf(dev,
 		    	"attach aborted due to hardware diag failure\n");
 			ether_ifdetach(ifp);
 			goto fail;
 		}
 	}
 #endif
 
 #ifdef RE_TX_MODERATION
 	intr_filter = 1;
 #endif
 	/* Hook interrupt last to avoid having to lock softc */
 	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 &&
 	    intr_filter == 0) {
 		error = bus_setup_intr(dev, sc->rl_irq[0],
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, re_intr_msi, sc,
 		    &sc->rl_intrhand[0]);
 	} else {
 		error = bus_setup_intr(dev, sc->rl_irq[0],
 		    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
 		    &sc->rl_intrhand[0]);
 	}
 	if (error) {
 		device_printf(dev, "couldn't set up irq\n");
 		ether_ifdetach(ifp);
 	}
 
 fail:
 	if (error)
 		re_detach(dev);
 
 	return (error);
 }
 
 /*
  * Shutdown hardware and free up resources. This can be called any
  * time after the mutex has been initialized. It is called in both
  * the error case in attach and the normal detach case so it needs
  * to be careful about only freeing resources that have actually been
  * allocated.
  */
 static int
 re_detach(device_t dev)
 {
 	struct rl_softc		*sc;
 	struct ifnet		*ifp;
 	int			i, rid;
 
 	sc = device_get_softc(dev);
 	ifp = sc->rl_ifp;
 	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
 
 	/* These should only be active if attach succeeded */
 	if (device_is_attached(dev)) {
 #ifdef DEVICE_POLLING
 		if (ifp->if_capenable & IFCAP_POLLING)
 			ether_poll_deregister(ifp);
 #endif
 		RL_LOCK(sc);
 #if 0
 		sc->suspended = 1;
 #endif
 		re_stop(sc);
 		RL_UNLOCK(sc);
 		callout_drain(&sc->rl_stat_callout);
 		taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
 		/*
 		 * Force off the IFF_UP flag here, in case someone
 		 * still had a BPF descriptor attached to this
 		 * interface. If they do, ether_ifdetach() will cause
 		 * the BPF code to try and clear the promisc mode
 		 * flag, which will bubble down to re_ioctl(),
 		 * which will try to call re_init() again. This will
 		 * turn the NIC back on and restart the MII ticker,
 		 * which will panic the system when the kernel tries
 		 * to invoke the re_tick() function that isn't there
 		 * anymore.
 		 */
 		ifp->if_flags &= ~IFF_UP;
 		ether_ifdetach(ifp);
 	}
 	if (sc->rl_miibus)
 		device_delete_child(dev, sc->rl_miibus);
 	bus_generic_detach(dev);
 
 	/*
 	 * The rest is resource deallocation, so we should already be
 	 * stopped here.
 	 */
 
 	if (sc->rl_intrhand[0] != NULL) {
 		bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]);
 		sc->rl_intrhand[0] = NULL;
 	}
 	if (ifp != NULL) {
 #ifdef DEV_NETMAP
 		netmap_detach(ifp);
 #endif /* DEV_NETMAP */
 		if_free(ifp);
 	}
 	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0)
 		rid = 0;
 	else
 		rid = 1;
 	if (sc->rl_irq[0] != NULL) {
 		bus_release_resource(dev, SYS_RES_IRQ, rid, sc->rl_irq[0]);
 		sc->rl_irq[0] = NULL;
 	}
 	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0)
 		pci_release_msi(dev);
 	if (sc->rl_res_pba) {
 		rid = PCIR_BAR(4);
 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba);
 	}
 	if (sc->rl_res)
 		bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
 		    sc->rl_res);
 
 	/* Unload and free the RX DMA ring memory and map */
 
 	if (sc->rl_ldata.rl_rx_list_tag) {
 		if (sc->rl_ldata.rl_rx_list_addr)
 			bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
 			    sc->rl_ldata.rl_rx_list_map);
 		if (sc->rl_ldata.rl_rx_list)
 			bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
 			    sc->rl_ldata.rl_rx_list,
 			    sc->rl_ldata.rl_rx_list_map);
 		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
 	}
 
 	/* Unload and free the TX DMA ring memory and map */
 
 	if (sc->rl_ldata.rl_tx_list_tag) {
 		if (sc->rl_ldata.rl_tx_list_addr)
 			bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
 			    sc->rl_ldata.rl_tx_list_map);
 		if (sc->rl_ldata.rl_tx_list)
 			bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
 			    sc->rl_ldata.rl_tx_list,
 			    sc->rl_ldata.rl_tx_list_map);
 		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
 	}
 
 	/* Destroy all the RX and TX buffer maps */
 
 	if (sc->rl_ldata.rl_tx_mtag) {
 		for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
 			if (sc->rl_ldata.rl_tx_desc[i].tx_dmamap)
 				bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
 				    sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
 		}
 		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
 	}
 	if (sc->rl_ldata.rl_rx_mtag) {
 		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 			if (sc->rl_ldata.rl_rx_desc[i].rx_dmamap)
 				bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
 				    sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
 		}
 		if (sc->rl_ldata.rl_rx_sparemap)
 			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
 			    sc->rl_ldata.rl_rx_sparemap);
 		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
 	}
 	if (sc->rl_ldata.rl_jrx_mtag) {
 		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 			if (sc->rl_ldata.rl_jrx_desc[i].rx_dmamap)
 				bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag,
 				    sc->rl_ldata.rl_jrx_desc[i].rx_dmamap);
 		}
 		if (sc->rl_ldata.rl_jrx_sparemap)
 			bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag,
 			    sc->rl_ldata.rl_jrx_sparemap);
 		bus_dma_tag_destroy(sc->rl_ldata.rl_jrx_mtag);
 	}
 	/* Unload and free the stats buffer and map */
 
 	if (sc->rl_ldata.rl_stag) {
 		if (sc->rl_ldata.rl_stats_addr)
 			bus_dmamap_unload(sc->rl_ldata.rl_stag,
 			    sc->rl_ldata.rl_smap);
 		if (sc->rl_ldata.rl_stats)
 			bus_dmamem_free(sc->rl_ldata.rl_stag,
 			    sc->rl_ldata.rl_stats, sc->rl_ldata.rl_smap);
 		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
 	}
 
 	if (sc->rl_parent_tag)
 		bus_dma_tag_destroy(sc->rl_parent_tag);
 
 	mtx_destroy(&sc->rl_mtx);
 
 	return (0);
 }
 
 static __inline void
 re_discard_rxbuf(struct rl_softc *sc, int idx)
 {
 	struct rl_desc		*desc;
 	struct rl_rxdesc	*rxd;
 	uint32_t		cmdstat;
 
 	if (sc->rl_ifp->if_mtu > RL_MTU &&
 	    (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
 		rxd = &sc->rl_ldata.rl_jrx_desc[idx];
 	else
 		rxd = &sc->rl_ldata.rl_rx_desc[idx];
 	desc = &sc->rl_ldata.rl_rx_list[idx];
 	desc->rl_vlanctl = 0;
 	cmdstat = rxd->rx_size;
 	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
 		cmdstat |= RL_RDESC_CMD_EOR;
 	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
 }
 
 static int
 re_newbuf(struct rl_softc *sc, int idx)
 {
 	struct mbuf		*m;
 	struct rl_rxdesc	*rxd;
 	bus_dma_segment_t	segs[1];
 	bus_dmamap_t		map;
 	struct rl_desc		*desc;
 	uint32_t		cmdstat;
 	int			error, nsegs;
 
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_len = m->m_pkthdr.len = MCLBYTES;
 #ifdef RE_FIXUP_RX
 	/*
 	 * This is part of an evil trick to deal with non-x86 platforms.
 	 * The RealTek chip requires RX buffers to be aligned on 64-bit
 	 * boundaries, but that will hose non-x86 machines. To get around
 	 * this, we leave some empty space at the start of each buffer
 	 * and for non-x86 hosts, we copy the buffer back six bytes
 	 * to achieve word alignment. This is slightly more efficient
 	 * than allocating a new buffer, copying the contents, and
 	 * discarding the old buffer.
 	 */
 	m_adj(m, RE_ETHER_ALIGN);
 #endif
 	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
 	    sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
 
 	rxd = &sc->rl_ldata.rl_rx_desc[idx];
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
 	}
 
 	rxd->rx_m = m;
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
 	rxd->rx_size = segs[0].ds_len;
 	sc->rl_ldata.rl_rx_sparemap = map;
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
 	    BUS_DMASYNC_PREREAD);
 
 	desc = &sc->rl_ldata.rl_rx_list[idx];
 	desc->rl_vlanctl = 0;
 	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
 	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
 	cmdstat = segs[0].ds_len;
 	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
 		cmdstat |= RL_RDESC_CMD_EOR;
 	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
 
 	return (0);
 }
 
 static int
 re_jumbo_newbuf(struct rl_softc *sc, int idx)
 {
 	struct mbuf		*m;
 	struct rl_rxdesc	*rxd;
 	bus_dma_segment_t	segs[1];
 	bus_dmamap_t		map;
 	struct rl_desc		*desc;
 	uint32_t		cmdstat;
 	int			error, nsegs;
 
 	m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = MJUM9BYTES;
 #ifdef RE_FIXUP_RX
 	m_adj(m, RE_ETHER_ALIGN);
 #endif
 	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_jrx_mtag,
 	    sc->rl_ldata.rl_jrx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
 
 	rxd = &sc->rl_ldata.rl_jrx_desc[idx];
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap);
 	}
 
 	rxd->rx_m = m;
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc->rl_ldata.rl_jrx_sparemap;
 	rxd->rx_size = segs[0].ds_len;
 	sc->rl_ldata.rl_jrx_sparemap = map;
 	bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap,
 	    BUS_DMASYNC_PREREAD);
 
 	desc = &sc->rl_ldata.rl_rx_list[idx];
 	desc->rl_vlanctl = 0;
 	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
 	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
 	cmdstat = segs[0].ds_len;
 	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
 		cmdstat |= RL_RDESC_CMD_EOR;
 	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
 
 	return (0);
 }
 
 #ifdef RE_FIXUP_RX
 static __inline void
 re_fixup_rx(struct mbuf *m)
 {
 	int                     i;
 	uint16_t                *src, *dst;
 
 	src = mtod(m, uint16_t *);
 	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
 
 	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
 		*dst++ = *src++;
 
 	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
 }
 #endif
 
 static int
 re_tx_list_init(struct rl_softc *sc)
 {
 	struct rl_desc		*desc;
 	int			i;
 
 	RL_LOCK_ASSERT(sc);
 
 	bzero(sc->rl_ldata.rl_tx_list,
 	    sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
 		sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
 #ifdef DEV_NETMAP
 	re_netmap_tx_init(sc);
 #endif /* DEV_NETMAP */
 	/* Set EOR. */
 	desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
 
 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
 	    sc->rl_ldata.rl_tx_list_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	sc->rl_ldata.rl_tx_prodidx = 0;
 	sc->rl_ldata.rl_tx_considx = 0;
 	sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
 
 	return (0);
 }
 
 static int
 re_rx_list_init(struct rl_softc *sc)
 {
 	int			error, i;
 
 	bzero(sc->rl_ldata.rl_rx_list,
 	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 		sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
 		if ((error = re_newbuf(sc, i)) != 0)
 			return (error);
 	}
 #ifdef DEV_NETMAP
 	re_netmap_rx_init(sc);
 #endif /* DEV_NETMAP */
 
 	/* Flush the RX descriptors */
 
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
 	    sc->rl_ldata.rl_rx_list_map,
 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
 
 	sc->rl_ldata.rl_rx_prodidx = 0;
 	sc->rl_head = sc->rl_tail = NULL;
 	sc->rl_int_rx_act = 0;
 
 	return (0);
 }
 
 static int
 re_jrx_list_init(struct rl_softc *sc)
 {
 	int			error, i;
 
 	bzero(sc->rl_ldata.rl_rx_list,
 	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 		sc->rl_ldata.rl_jrx_desc[i].rx_m = NULL;
 		if ((error = re_jumbo_newbuf(sc, i)) != 0)
 			return (error);
 	}
 
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
 	    sc->rl_ldata.rl_rx_list_map,
 	    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 
 	sc->rl_ldata.rl_rx_prodidx = 0;
 	sc->rl_head = sc->rl_tail = NULL;
 	sc->rl_int_rx_act = 0;
 
 	return (0);
 }
 
 /*
  * RX handler for C+ and 8169. For the gigE chips, we support
  * the reception of jumbo frames that have been fragmented
  * across multiple 2K mbuf cluster buffers.
  */
 static int
 re_rxeof(struct rl_softc *sc, int *rx_npktsp)
 {
 	struct mbuf		*m;
 	struct ifnet		*ifp;
 	int			i, rxerr, total_len;
 	struct rl_desc		*cur_rx;
 	u_int32_t		rxstat, rxvlan;
 	int			jumbo, maxpkt = 16, rx_npkts = 0;
 
 	RL_LOCK_ASSERT(sc);
 
 	ifp = sc->rl_ifp;
 #ifdef DEV_NETMAP
 	if (netmap_rx_irq(ifp, 0, &rx_npkts))
 		return 0;
 #endif /* DEV_NETMAP */
 	if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
 		jumbo = 1;
 	else
 		jumbo = 0;
 
 	/* Invalidate the descriptor memory */
 
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
 	    sc->rl_ldata.rl_rx_list_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
 	    i = RL_RX_DESC_NXT(sc, i)) {
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 		cur_rx = &sc->rl_ldata.rl_rx_list[i];
 		rxstat = le32toh(cur_rx->rl_cmdstat);
 		if ((rxstat & RL_RDESC_STAT_OWN) != 0)
 			break;
 		total_len = rxstat & sc->rl_rxlenmask;
 		rxvlan = le32toh(cur_rx->rl_vlanctl);
 		if (jumbo != 0)
 			m = sc->rl_ldata.rl_jrx_desc[i].rx_m;
 		else
 			m = sc->rl_ldata.rl_rx_desc[i].rx_m;
 
 		if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
 		    (rxstat & (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) !=
 		    (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) {
 			/*
 			 * RTL8168C or later controllers do not
 			 * support multi-fragment packet.
 			 */
 			re_discard_rxbuf(sc, i);
 			continue;
 		} else if ((rxstat & RL_RDESC_STAT_EOF) == 0) {
 			if (re_newbuf(sc, i) != 0) {
 				/*
 				 * If this is part of a multi-fragment packet,
 				 * discard all the pieces.
 				 */
 				if (sc->rl_head != NULL) {
 					m_freem(sc->rl_head);
 					sc->rl_head = sc->rl_tail = NULL;
 				}
 				re_discard_rxbuf(sc, i);
 				continue;
 			}
 			m->m_len = RE_RX_DESC_BUFLEN;
 			if (sc->rl_head == NULL)
 				sc->rl_head = sc->rl_tail = m;
 			else {
 				m->m_flags &= ~M_PKTHDR;
 				sc->rl_tail->m_next = m;
 				sc->rl_tail = m;
 			}
 			continue;
 		}
 
 		/*
 		 * NOTE: for the 8139C+, the frame length field
 		 * is always 12 bits in size, but for the gigE chips,
 		 * it is 13 bits (since the max RX frame length is 16K).
 		 * Unfortunately, all 32 bits in the status word
 		 * were already used, so to make room for the extra
 		 * length bit, RealTek took out the 'frame alignment
 		 * error' bit and shifted the other status bits
 		 * over one slot. The OWN, EOR, FS and LS bits are
 		 * still in the same places. We have already extracted
 		 * the frame length and checked the OWN bit, so rather
 		 * than using an alternate bit mapping, we shift the
 		 * status bits one space to the right so we can evaluate
 		 * them using the 8169 status as though it was in the
 		 * same format as that of the 8139C+.
 		 */
 		if (sc->rl_type == RL_8169)
 			rxstat >>= 1;
 
 		/*
 		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
 		 * set, but if CRC is clear, it will still be a valid frame.
 		 */
 		if ((rxstat & RL_RDESC_STAT_RXERRSUM) != 0) {
 			rxerr = 1;
 			if ((sc->rl_flags & RL_FLAG_JUMBOV2) == 0 &&
 			    total_len > 8191 &&
 			    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)
 				rxerr = 0;
 			if (rxerr != 0) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				/*
 				 * If this is part of a multi-fragment packet,
 				 * discard all the pieces.
 				 */
 				if (sc->rl_head != NULL) {
 					m_freem(sc->rl_head);
 					sc->rl_head = sc->rl_tail = NULL;
 				}
 				re_discard_rxbuf(sc, i);
 				continue;
 			}
 		}
 
 		/*
 		 * If allocating a replacement mbuf fails,
 		 * reload the current one.
 		 */
 		if (jumbo != 0)
 			rxerr = re_jumbo_newbuf(sc, i);
 		else
 			rxerr = re_newbuf(sc, i);
 		if (rxerr != 0) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			if (sc->rl_head != NULL) {
 				m_freem(sc->rl_head);
 				sc->rl_head = sc->rl_tail = NULL;
 			}
 			re_discard_rxbuf(sc, i);
 			continue;
 		}
 
 		if (sc->rl_head != NULL) {
 			if (jumbo != 0)
 				m->m_len = total_len;
 			else {
 				m->m_len = total_len % RE_RX_DESC_BUFLEN;
 				if (m->m_len == 0)
 					m->m_len = RE_RX_DESC_BUFLEN;
 			}
 			/*
 			 * Special case: if there's 4 bytes or less
 			 * in this buffer, the mbuf can be discarded:
 			 * the last 4 bytes is the CRC, which we don't
 			 * care about anyway.
 			 */
 			if (m->m_len <= ETHER_CRC_LEN) {
 				sc->rl_tail->m_len -=
 				    (ETHER_CRC_LEN - m->m_len);
 				m_freem(m);
 			} else {
 				m->m_len -= ETHER_CRC_LEN;
 				m->m_flags &= ~M_PKTHDR;
 				sc->rl_tail->m_next = m;
 			}
 			m = sc->rl_head;
 			sc->rl_head = sc->rl_tail = NULL;
 			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
 		} else
 			m->m_pkthdr.len = m->m_len =
 			    (total_len - ETHER_CRC_LEN);
 
 #ifdef RE_FIXUP_RX
 		re_fixup_rx(m);
 #endif
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		m->m_pkthdr.rcvif = ifp;
 
 		/* Do RX checksumming if enabled */
 
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
 				/* Check IP header checksum */
 				if (rxstat & RL_RDESC_STAT_PROTOID)
 					m->m_pkthdr.csum_flags |=
 					    CSUM_IP_CHECKED;
 				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
 					m->m_pkthdr.csum_flags |=
 					    CSUM_IP_VALID;
 
 				/* Check TCP/UDP checksum */
 				if ((RL_TCPPKT(rxstat) &&
 				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
 				    (RL_UDPPKT(rxstat) &&
 				     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
 					m->m_pkthdr.csum_flags |=
 						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
 					m->m_pkthdr.csum_data = 0xffff;
 				}
 			} else {
 				/*
 				 * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
 				 */
 				if ((rxstat & RL_RDESC_STAT_PROTOID) &&
 				    (rxvlan & RL_RDESC_IPV4))
 					m->m_pkthdr.csum_flags |=
 					    CSUM_IP_CHECKED;
 				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
 				    (rxvlan & RL_RDESC_IPV4))
 					m->m_pkthdr.csum_flags |=
 					    CSUM_IP_VALID;
 				if (((rxstat & RL_RDESC_STAT_TCP) &&
 				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
 				    ((rxstat & RL_RDESC_STAT_UDP) &&
 				    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
 					m->m_pkthdr.csum_flags |=
 						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
 					m->m_pkthdr.csum_data = 0xffff;
 				}
 			}
 		}
 		maxpkt--;
 		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
 			m->m_pkthdr.ether_vtag =
 			    bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
 			m->m_flags |= M_VLANTAG;
 		}
 		RL_UNLOCK(sc);
 		(*ifp->if_input)(ifp, m);
 		RL_LOCK(sc);
 		rx_npkts++;
 	}
 
 	/* Flush the RX DMA ring */
 
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
 	    sc->rl_ldata.rl_rx_list_map,
 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
 
 	sc->rl_ldata.rl_rx_prodidx = i;
 
 	if (rx_npktsp != NULL)
 		*rx_npktsp = rx_npkts;
 	if (maxpkt)
 		return (EAGAIN);
 
 	return (0);
 }
 
 static void
 re_txeof(struct rl_softc *sc)
 {
 	struct ifnet		*ifp;
 	struct rl_txdesc	*txd;
 	u_int32_t		txstat;
 	int			cons;
 
 	cons = sc->rl_ldata.rl_tx_considx;
 	if (cons == sc->rl_ldata.rl_tx_prodidx)
 		return;
 
 	ifp = sc->rl_ifp;
 #ifdef DEV_NETMAP
 	if (netmap_tx_irq(ifp, 0))
 		return;
 #endif /* DEV_NETMAP */
 	/* Invalidate the TX descriptor list */
 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
 	    sc->rl_ldata.rl_tx_list_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	for (; cons != sc->rl_ldata.rl_tx_prodidx;
 	    cons = RL_TX_DESC_NXT(sc, cons)) {
 		txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
 		if (txstat & RL_TDESC_STAT_OWN)
 			break;
 		/*
 		 * We only stash mbufs in the last descriptor
 		 * in a fragment chain, which also happens to
 		 * be the only place where the TX status bits
 		 * are valid.
 		 */
 		if (txstat & RL_TDESC_CMD_EOF) {
 			txd = &sc->rl_ldata.rl_tx_desc[cons];
 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
 			    txd->tx_dmamap);
 			KASSERT(txd->tx_m != NULL,
 			    ("%s: freeing NULL mbufs!", __func__));
 			m_freem(txd->tx_m);
 			txd->tx_m = NULL;
 			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
 			    RL_TDESC_STAT_COLCNT))
 				if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
 			if (txstat & RL_TDESC_STAT_TXERRSUM)
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			else
 				if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		}
 		sc->rl_ldata.rl_tx_free++;
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	}
 	sc->rl_ldata.rl_tx_considx = cons;
 
 	/* No changes made to the TX ring, so no flush needed */
 
 	if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
 #ifdef RE_TX_MODERATION
 		/*
 		 * If not all descriptors have been reaped yet, reload
 		 * the timer so that we will eventually get another
 		 * interrupt that will cause us to re-enter this routine.
 		 * This is done in case the transmitter has gone idle.
 		 */
 		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
 #endif
 	} else
 		sc->rl_watchdog_timer = 0;
 }
 
 static void
 re_tick(void *xsc)
 {
 	struct rl_softc		*sc;
 	struct mii_data		*mii;
 
 	sc = xsc;
 
 	RL_LOCK_ASSERT(sc);
 
 	mii = device_get_softc(sc->rl_miibus);
 	mii_tick(mii);
 	if ((sc->rl_flags & RL_FLAG_LINK) == 0)
 		re_miibus_statchg(sc->rl_dev);
 	/*
 	 * Reclaim transmitted frames here. Technically it is not
 	 * necessary to do here but it ensures periodic reclamation
 	 * regardless of Tx completion interrupt which seems to be
 	 * lost on PCIe based controllers under certain situations.
 	 */
 	re_txeof(sc);
 	re_watchdog(sc);
 	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
 }
 
 #ifdef DEVICE_POLLING
 static int
 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
 	struct rl_softc *sc = ifp->if_softc;
 	int rx_npkts = 0;
 
 	RL_LOCK(sc);
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		rx_npkts = re_poll_locked(ifp, cmd, count);
 	RL_UNLOCK(sc);
 	return (rx_npkts);
 }
 
 static int
 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
 	struct rl_softc *sc = ifp->if_softc;
 	int rx_npkts;
 
 	RL_LOCK_ASSERT(sc);
 
 	sc->rxcycles = count;
 	re_rxeof(sc, &rx_npkts);
 	re_txeof(sc);
 
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		re_start_locked(ifp);
 
 	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
 		u_int16_t       status;
 
 		status = CSR_READ_2(sc, RL_ISR);
 		if (status == 0xffff)
 			return (rx_npkts);
 		if (status)
 			CSR_WRITE_2(sc, RL_ISR, status);
 		if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
 		    (sc->rl_flags & RL_FLAG_PCIE))
 			CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
 
 		/*
 		 * XXX check behaviour on receiver stalls.
 		 */
 
 		if (status & RL_ISR_SYSTEM_ERR) {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			re_init_locked(sc);
 		}
 	}
 	return (rx_npkts);
 }
 #endif /* DEVICE_POLLING */
 
 static int
 re_intr(void *arg)
 {
 	struct rl_softc		*sc;
 	uint16_t		status;
 
 	sc = arg;
 
 	status = CSR_READ_2(sc, RL_ISR);
 	if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
                 return (FILTER_STRAY);
 	CSR_WRITE_2(sc, RL_IMR, 0);
 
 	taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
 
 	return (FILTER_HANDLED);
 }
 
 static void
 re_int_task(void *arg, int npending)
 {
 	struct rl_softc		*sc;
 	struct ifnet		*ifp;
 	u_int16_t		status;
 	int			rval = 0;
 
 	sc = arg;
 	ifp = sc->rl_ifp;
 
 	RL_LOCK(sc);
 
 	status = CSR_READ_2(sc, RL_ISR);
         CSR_WRITE_2(sc, RL_ISR, status);
 
 	if (sc->suspended ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		RL_UNLOCK(sc);
 		return;
 	}
 
 #ifdef DEVICE_POLLING
 	if  (ifp->if_capenable & IFCAP_POLLING) {
 		RL_UNLOCK(sc);
 		return;
 	}
 #endif
 
 	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
 		rval = re_rxeof(sc, NULL);
 
 	/*
 	 * Some chips will ignore a second TX request issued
 	 * while an existing transmission is in progress. If
 	 * the transmitter goes idle but there are still
 	 * packets waiting to be sent, we need to restart the
 	 * channel here to flush them out. This only seems to
 	 * be required with the PCIe devices.
 	 */
 	if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
 	    (sc->rl_flags & RL_FLAG_PCIE))
 		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
 	if (status & (
 #ifdef RE_TX_MODERATION
 	    RL_ISR_TIMEOUT_EXPIRED|
 #else
 	    RL_ISR_TX_OK|
 #endif
 	    RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
 		re_txeof(sc);
 
 	if (status & RL_ISR_SYSTEM_ERR) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		re_init_locked(sc);
 	}
 
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		re_start_locked(ifp);
 
 	RL_UNLOCK(sc);
 
         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
 		return;
 	}
 
 	CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
 }
 
 static void
 re_intr_msi(void *xsc)
 {
 	struct rl_softc		*sc;
 	struct ifnet		*ifp;
 	uint16_t		intrs, status;
 
 	sc = xsc;
 	RL_LOCK(sc);
 
 	ifp = sc->rl_ifp;
 #ifdef DEVICE_POLLING
 	if (ifp->if_capenable & IFCAP_POLLING) {
 		RL_UNLOCK(sc);
 		return;
 	}
 #endif
 	/* Disable interrupts. */
 	CSR_WRITE_2(sc, RL_IMR, 0);
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		RL_UNLOCK(sc);
 		return;
 	}
 
 	intrs = RL_INTRS_CPLUS;
 	status = CSR_READ_2(sc, RL_ISR);
         CSR_WRITE_2(sc, RL_ISR, status);
 	if (sc->rl_int_rx_act > 0) {
 		intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW |
 		    RL_ISR_RX_OVERRUN);
 		status &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW |
 		    RL_ISR_RX_OVERRUN);
 	}
 
 	if (status & (RL_ISR_TIMEOUT_EXPIRED | RL_ISR_RX_OK | RL_ISR_RX_ERR |
 	    RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) {
 		re_rxeof(sc, NULL);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 			if (sc->rl_int_rx_mod != 0 &&
 			    (status & (RL_ISR_RX_OK | RL_ISR_RX_ERR |
 			    RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) != 0) {
 				/* Rearm one-shot timer. */
 				CSR_WRITE_4(sc, RL_TIMERCNT, 1);
 				intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR |
 				    RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN);
 				sc->rl_int_rx_act = 1;
 			} else {
 				intrs |= RL_ISR_RX_OK | RL_ISR_RX_ERR |
 				    RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN;
 				sc->rl_int_rx_act = 0;
 			}
 		}
 	}
 
 	/*
 	 * Some chips will ignore a second TX request issued
 	 * while an existing transmission is in progress. If
 	 * the transmitter goes idle but there are still
 	 * packets waiting to be sent, we need to restart the
 	 * channel here to flush them out. This only seems to
 	 * be required with the PCIe devices.
 	 */
 	if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
 	    (sc->rl_flags & RL_FLAG_PCIE))
 		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
 	if (status & (RL_ISR_TX_OK | RL_ISR_TX_ERR | RL_ISR_TX_DESC_UNAVAIL))
 		re_txeof(sc);
 
 	if (status & RL_ISR_SYSTEM_ERR) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		re_init_locked(sc);
 	}
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			re_start_locked(ifp);
 		CSR_WRITE_2(sc, RL_IMR, intrs);
 	}
 	RL_UNLOCK(sc);
 }
 
 static int
 re_encap(struct rl_softc *sc, struct mbuf **m_head)
 {
 	struct rl_txdesc	*txd, *txd_last;
 	bus_dma_segment_t	segs[RL_NTXSEGS];
 	bus_dmamap_t		map;
 	struct mbuf		*m_new;
 	struct rl_desc		*desc;
 	int			nsegs, prod;
 	int			i, error, ei, si;
 	int			padlen;
 	uint32_t		cmdstat, csum_flags, vlanctl;
 
 	RL_LOCK_ASSERT(sc);
 	M_ASSERTPKTHDR((*m_head));
 
 	/*
 	 * With some of the RealTek chips, using the checksum offload
 	 * support in conjunction with the autopadding feature results
 	 * in the transmission of corrupt frames. For example, if we
 	 * need to send a really small IP fragment that's less than 60
 	 * bytes in size, and IP header checksumming is enabled, the
 	 * resulting ethernet frame that appears on the wire will
 	 * have garbled payload. To work around this, if TX IP checksum
 	 * offload is enabled, we always manually pad short frames out
 	 * to the minimum ethernet frame size.
 	 */
 	if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 &&
 	    (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
 	    ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
 		padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
 		if (M_WRITABLE(*m_head) == 0) {
 			/* Get a writable copy. */
 			m_new = m_dup(*m_head, M_NOWAIT);
 			m_freem(*m_head);
 			if (m_new == NULL) {
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 			*m_head = m_new;
 		}
 		if ((*m_head)->m_next != NULL ||
 		    M_TRAILINGSPACE(*m_head) < padlen) {
 			m_new = m_defrag(*m_head, M_NOWAIT);
 			if (m_new == NULL) {
 				m_freem(*m_head);
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 		} else
 			m_new = *m_head;
 
 		/*
 		 * Manually pad short frames, and zero the pad space
 		 * to avoid leaking data.
 		 */
 		bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
 		m_new->m_pkthdr.len += padlen;
 		m_new->m_len = m_new->m_pkthdr.len;
 		*m_head = m_new;
 	}
 
 	prod = sc->rl_ldata.rl_tx_prodidx;
 	txd = &sc->rl_ldata.rl_tx_desc[prod];
 	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
 	    *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		m_new = m_collapse(*m_head, M_NOWAIT, RL_NTXSEGS);
 		if (m_new == NULL) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		*m_head = m_new;
 		error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
 		    txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (error);
 		}
 	} else if (error != 0)
 		return (error);
 	if (nsegs == 0) {
 		m_freem(*m_head);
 		*m_head = NULL;
 		return (EIO);
 	}
 
 	/* Check for number of available descriptors. */
 	if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
 		bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
 		return (ENOBUFS);
 	}
 
 	bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 
 	/*
 	 * Set up checksum offload. Note: checksum offload bits must
 	 * appear in all descriptors of a multi-descriptor transmit
 	 * attempt. This is according to testing done with an 8169
 	 * chip. This is a requirement.
 	 */
 	vlanctl = 0;
 	csum_flags = 0;
 	if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
 		if ((sc->rl_flags & RL_FLAG_DESCV2) != 0) {
 			csum_flags |= RL_TDESC_CMD_LGSEND;
 			vlanctl |= ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
 			    RL_TDESC_CMD_MSSVALV2_SHIFT);
 		} else {
 			csum_flags |= RL_TDESC_CMD_LGSEND |
 			    ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
 			    RL_TDESC_CMD_MSSVAL_SHIFT);
 		}
 	} else {
 		/*
 		 * Unconditionally enable IP checksum if TCP or UDP
 		 * checksum is required. Otherwise, TCP/UDP checksum
 		 * doesn't make effects.
 		 */
 		if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
 			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
 				csum_flags |= RL_TDESC_CMD_IPCSUM;
 				if (((*m_head)->m_pkthdr.csum_flags &
 				    CSUM_TCP) != 0)
 					csum_flags |= RL_TDESC_CMD_TCPCSUM;
 				if (((*m_head)->m_pkthdr.csum_flags &
 				    CSUM_UDP) != 0)
 					csum_flags |= RL_TDESC_CMD_UDPCSUM;
 			} else {
 				vlanctl |= RL_TDESC_CMD_IPCSUMV2;
 				if (((*m_head)->m_pkthdr.csum_flags &
 				    CSUM_TCP) != 0)
 					vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
 				if (((*m_head)->m_pkthdr.csum_flags &
 				    CSUM_UDP) != 0)
 					vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
 			}
 		}
 	}
 
 	/*
 	 * Set up hardware VLAN tagging. Note: vlan tag info must
 	 * appear in all descriptors of a multi-descriptor
 	 * transmission attempt.
 	 */
 	if ((*m_head)->m_flags & M_VLANTAG)
 		vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
 		    RL_TDESC_VLANCTL_TAG;
 
 	si = prod;
 	for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
 		desc = &sc->rl_ldata.rl_tx_list[prod];
 		desc->rl_vlanctl = htole32(vlanctl);
 		desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
 		desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
 		cmdstat = segs[i].ds_len;
 		if (i != 0)
 			cmdstat |= RL_TDESC_CMD_OWN;
 		if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
 			cmdstat |= RL_TDESC_CMD_EOR;
 		desc->rl_cmdstat = htole32(cmdstat | csum_flags);
 		sc->rl_ldata.rl_tx_free--;
 	}
 	/* Update producer index. */
 	sc->rl_ldata.rl_tx_prodidx = prod;
 
 	/* Set EOF on the last descriptor. */
 	ei = RL_TX_DESC_PRV(sc, prod);
 	desc = &sc->rl_ldata.rl_tx_list[ei];
 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
 
 	desc = &sc->rl_ldata.rl_tx_list[si];
 	/* Set SOF and transfer ownership of packet to the chip. */
 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
 
 	/*
 	 * Insure that the map for this transmission
 	 * is placed at the array index of the last descriptor
 	 * in this chain.  (Swap last and first dmamaps.)
 	 */
 	txd_last = &sc->rl_ldata.rl_tx_desc[ei];
 	map = txd->tx_dmamap;
 	txd->tx_dmamap = txd_last->tx_dmamap;
 	txd_last->tx_dmamap = map;
 	txd_last->tx_m = *m_head;
 
 	return (0);
 }
 
 static void
 re_start(struct ifnet *ifp)
 {
 	struct rl_softc		*sc;
 
 	sc = ifp->if_softc;
 	RL_LOCK(sc);
 	re_start_locked(ifp);
 	RL_UNLOCK(sc);
 }
 
 /*
  * Main transmit routine for C+ and gigE NICs.
  */
 static void
 re_start_locked(struct ifnet *ifp)
 {
 	struct rl_softc		*sc;
 	struct mbuf		*m_head;
 	int			queued;
 
 	sc = ifp->if_softc;
 
 #ifdef DEV_NETMAP
 	/* XXX is this necessary ? */
 	if (ifp->if_capenable & IFCAP_NETMAP) {
 		struct netmap_kring *kring = &NA(ifp)->tx_rings[0];
 		if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) {
 			/* kick the tx unit */
 			CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
 #ifdef RE_TX_MODERATION
 			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
 #endif
 			sc->rl_watchdog_timer = 5;
 		}
 		return;
 	}
 #endif /* DEV_NETMAP */
+
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0)
 		return;
 
 	for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
 	    sc->rl_ldata.rl_tx_free > 1;) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (re_encap(sc, &m_head) != 0) {
 			if (m_head == NULL)
 				break;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 
 		/*
 		 * If there's a BPF listener, bounce a copy of this frame
 		 * to him.
 		 */
 		ETHER_BPF_MTAP(ifp, m_head);
 
 		queued++;
 	}
 
 	if (queued == 0) {
 #ifdef RE_TX_MODERATION
 		if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
 			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
 #endif
 		return;
 	}
 
 	/* Flush the TX descriptors */
 
 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
 	    sc->rl_ldata.rl_tx_list_map,
 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
 
 	CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
 
 #ifdef RE_TX_MODERATION
 	/*
 	 * Use the countdown timer for interrupt moderation.
 	 * 'TX done' interrupts are disabled. Instead, we reset the
 	 * countdown timer, which will begin counting until it hits
 	 * the value in the TIMERINT register, and then trigger an
 	 * interrupt. Each time we write to the TIMERCNT register,
 	 * the timer count is reset to 0.
 	 */
 	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
 #endif
 
 	/*
 	 * Set a timeout in case the chip goes out to lunch.
 	 */
 	sc->rl_watchdog_timer = 5;
 }
 
 static void
 re_set_jumbo(struct rl_softc *sc, int jumbo)
 {
 
 	if (sc->rl_hwrev->rl_rev == RL_HWREV_8168E_VL) {
 		pci_set_max_read_req(sc->rl_dev, 4096);
 		return;
 	}
 
 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
 	if (jumbo != 0) {
 		CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) |
 		    RL_CFG3_JUMBO_EN0);
 		switch (sc->rl_hwrev->rl_rev) {
 		case RL_HWREV_8168DP:
 			break;
 		case RL_HWREV_8168E:
 			CSR_WRITE_1(sc, sc->rl_cfg4,
 			    CSR_READ_1(sc, sc->rl_cfg4) | 0x01);
 			break;
 		default:
 			CSR_WRITE_1(sc, sc->rl_cfg4,
 			    CSR_READ_1(sc, sc->rl_cfg4) | RL_CFG4_JUMBO_EN1);
 		}
 	} else {
 		CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) &
 		    ~RL_CFG3_JUMBO_EN0);
 		switch (sc->rl_hwrev->rl_rev) {
 		case RL_HWREV_8168DP:
 			break;
 		case RL_HWREV_8168E:
 			CSR_WRITE_1(sc, sc->rl_cfg4,
 			    CSR_READ_1(sc, sc->rl_cfg4) & ~0x01);
 			break;
 		default:
 			CSR_WRITE_1(sc, sc->rl_cfg4,
 			    CSR_READ_1(sc, sc->rl_cfg4) & ~RL_CFG4_JUMBO_EN1);
 		}
 	}
 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
 
 	switch (sc->rl_hwrev->rl_rev) {
 	case RL_HWREV_8168DP:
 		pci_set_max_read_req(sc->rl_dev, 4096);
 		break;
 	default:
 		if (jumbo != 0)
 			pci_set_max_read_req(sc->rl_dev, 512);
 		else
 			pci_set_max_read_req(sc->rl_dev, 4096);
 	}
 }
 
 static void
 re_init(void *xsc)
 {
 	struct rl_softc		*sc = xsc;
 
 	RL_LOCK(sc);
 	re_init_locked(sc);
 	RL_UNLOCK(sc);
 }
 
 static void
 re_init_locked(struct rl_softc *sc)
 {
 	struct ifnet		*ifp = sc->rl_ifp;
 	struct mii_data		*mii;
 	uint32_t		reg;
 	uint16_t		cfg;
 	union {
 		uint32_t align_dummy;
 		u_char eaddr[ETHER_ADDR_LEN];
         } eaddr;
 
 	RL_LOCK_ASSERT(sc);
 
 	mii = device_get_softc(sc->rl_miibus);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 		return;
 
 	/*
 	 * Cancel pending I/O and free all RX/TX buffers.
 	 */
 	re_stop(sc);
 
 	/* Put controller into known state. */
 	re_reset(sc);
 
 	/*
 	 * For C+ mode, initialize the RX descriptors and mbufs.
 	 */
 	if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
 		if (ifp->if_mtu > RL_MTU) {
 			if (re_jrx_list_init(sc) != 0) {
 				device_printf(sc->rl_dev,
 				    "no memory for jumbo RX buffers\n");
 				re_stop(sc);
 				return;
 			}
 			/* Disable checksum offloading for jumbo frames. */
 			ifp->if_capenable &= ~(IFCAP_HWCSUM | IFCAP_TSO4);
 			ifp->if_hwassist &= ~(RE_CSUM_FEATURES | CSUM_TSO);
 		} else {
 			if (re_rx_list_init(sc) != 0) {
 				device_printf(sc->rl_dev,
 				    "no memory for RX buffers\n");
 				re_stop(sc);
 				return;
 			}
 		}
 		re_set_jumbo(sc, ifp->if_mtu > RL_MTU);
 	} else {
 		if (re_rx_list_init(sc) != 0) {
 			device_printf(sc->rl_dev, "no memory for RX buffers\n");
 			re_stop(sc);
 			return;
 		}
 		if ((sc->rl_flags & RL_FLAG_PCIE) != 0 &&
 		    pci_get_device(sc->rl_dev) != RT_DEVICEID_8101E) {
 			if (ifp->if_mtu > RL_MTU)
 				pci_set_max_read_req(sc->rl_dev, 512);
 			else
 				pci_set_max_read_req(sc->rl_dev, 4096);
 		}
 	}
 	re_tx_list_init(sc);
 
 	/*
 	 * Enable C+ RX and TX mode, as well as VLAN stripping and
 	 * RX checksum offload. We must configure the C+ register
 	 * before all others.
 	 */
 	cfg = RL_CPLUSCMD_PCI_MRW;
 	if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 		cfg |= RL_CPLUSCMD_RXCSUM_ENB;
 	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
 		cfg |= RL_CPLUSCMD_VLANSTRIP;
 	if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
 		cfg |= RL_CPLUSCMD_MACSTAT_DIS;
 		/* XXX magic. */
 		cfg |= 0x0001;
 	} else
 		cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
 	CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
 	if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SC ||
 	    sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) {
 		reg = 0x000fff00;
 		if ((CSR_READ_1(sc, sc->rl_cfg2) & RL_CFG2_PCI66MHZ) != 0)
 			reg |= 0x000000ff;
 		if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE)
 			reg |= 0x00f00000;
 		CSR_WRITE_4(sc, 0x7c, reg);
 		/* Disable interrupt mitigation. */
 		CSR_WRITE_2(sc, 0xe2, 0);
 	}
 	/*
 	 * Disable TSO if interface MTU size is greater than MSS
 	 * allowed in controller.
 	 */
 	if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
 		ifp->if_capenable &= ~IFCAP_TSO4;
 		ifp->if_hwassist &= ~CSUM_TSO;
 	}
 
 	/*
 	 * Init our MAC address.  Even though the chipset
 	 * documentation doesn't mention it, we need to enter "Config
 	 * register write enable" mode to modify the ID registers.
 	 */
 	/* Copy MAC address on stack to align. */
 	bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
 	CSR_WRITE_4(sc, RL_IDR0,
 	    htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
 	CSR_WRITE_4(sc, RL_IDR4,
 	    htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
 
 	/*
 	 * Load the addresses of the RX and TX lists into the chip.
 	 */
 
 	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
 	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
 	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
 	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
 
 	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
 	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
 	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
 	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
 
 	if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
 		/* Disable RXDV gate. */
 		CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) &
 		    ~0x00080000);
 	}
 
 	/*
 	 * Enable transmit and receive for pre-RTL8168G controllers.
 	 * RX/TX MACs should be enabled before RX/TX configuration.
 	 */
 	if ((sc->rl_flags & RL_FLAG_8168G_PLUS) == 0)
 		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB);
 
 	/*
 	 * Set the initial TX configuration.
 	 */
 	if (sc->rl_testmode) {
 		if (sc->rl_type == RL_8169)
 			CSR_WRITE_4(sc, RL_TXCFG,
 			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
 		else
 			CSR_WRITE_4(sc, RL_TXCFG,
 			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
 	} else
 		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
 
 	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
 
 	/*
 	 * Set the initial RX configuration.
 	 */
 	re_set_rxmode(sc);
 
 	/* Configure interrupt moderation. */
 	if (sc->rl_type == RL_8169) {
 		/* Magic from vendor. */
 		CSR_WRITE_2(sc, RL_INTRMOD, 0x5100);
 	}
 
 	/*
 	 * Enable transmit and receive for RTL8168G and later controllers.
 	 * RX/TX MACs should be enabled after RX/TX configuration.
 	 */
 	if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0)
 		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB);
 
 #ifdef DEVICE_POLLING
 	/*
 	 * Disable interrupts if we are polling.
 	 */
 	if (ifp->if_capenable & IFCAP_POLLING)
 		CSR_WRITE_2(sc, RL_IMR, 0);
 	else	/* otherwise ... */
 #endif
 
 	/*
 	 * Enable interrupts.
 	 */
 	if (sc->rl_testmode)
 		CSR_WRITE_2(sc, RL_IMR, 0);
 	else
 		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
 	CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
 
 	/* Set initial TX threshold */
 	sc->rl_txthresh = RL_TX_THRESH_INIT;
 
 	/* Start RX/TX process. */
 	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
 
 	/*
 	 * Initialize the timer interrupt register so that
 	 * a timer interrupt will be generated once the timer
 	 * reaches a certain number of ticks. The timer is
 	 * reloaded on each transmit.
 	 */
 #ifdef RE_TX_MODERATION
 	/*
 	 * Use timer interrupt register to moderate TX interrupt
 	 * moderation, which dramatically improves TX frame rate.
 	 */
 	if (sc->rl_type == RL_8169)
 		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
 	else
 		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
 #else
 	/*
 	 * Use timer interrupt register to moderate RX interrupt
 	 * moderation.
 	 */
 	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 &&
 	    intr_filter == 0) {
 		if (sc->rl_type == RL_8169)
 			CSR_WRITE_4(sc, RL_TIMERINT_8169,
 			    RL_USECS(sc->rl_int_rx_mod));
 	} else {
 		if (sc->rl_type == RL_8169)
 			CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(0));
 	}
 #endif
 
 	/*
 	 * For 8169 gigE NICs, set the max allowed RX packet
 	 * size so we can receive jumbo frames.
 	 */
 	if (sc->rl_type == RL_8169) {
 		if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
 			/*
 			 * For controllers that use new jumbo frame scheme,
 			 * set maximum size of jumbo frame depending on
 			 * controller revisions.
 			 */
 			if (ifp->if_mtu > RL_MTU)
 				CSR_WRITE_2(sc, RL_MAXRXPKTLEN,
 				    sc->rl_hwrev->rl_max_mtu +
 				    ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN +
 				    ETHER_CRC_LEN);
 			else
 				CSR_WRITE_2(sc, RL_MAXRXPKTLEN,
 				    RE_RX_DESC_BUFLEN);
 		} else if ((sc->rl_flags & RL_FLAG_PCIE) != 0 &&
 		    sc->rl_hwrev->rl_max_mtu == RL_MTU) {
 			/* RTL810x has no jumbo frame support. */
 			CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN);
 		} else
 			CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
 	}
 
 	if (sc->rl_testmode)
 		return;
 
 	CSR_WRITE_1(sc, sc->rl_cfg1, CSR_READ_1(sc, sc->rl_cfg1) |
 	    RL_CFG1_DRVLOAD);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	sc->rl_flags &= ~RL_FLAG_LINK;
 	mii_mediachg(mii);
 
 	sc->rl_watchdog_timer = 0;
 	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
 }
 
 /*
  * Set media options.
  */
 static int
 re_ifmedia_upd(struct ifnet *ifp)
 {
 	struct rl_softc		*sc;
 	struct mii_data		*mii;
 	int			error;
 
 	sc = ifp->if_softc;
 	mii = device_get_softc(sc->rl_miibus);
 	RL_LOCK(sc);
 	error = mii_mediachg(mii);
 	RL_UNLOCK(sc);
 
 	return (error);
 }
 
 /*
  * Report current media status.
  */
 static void
 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct rl_softc		*sc;
 	struct mii_data		*mii;
 
 	sc = ifp->if_softc;
 	mii = device_get_softc(sc->rl_miibus);
 
 	RL_LOCK(sc);
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 	RL_UNLOCK(sc);
 }
 
 static int
 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct rl_softc		*sc = ifp->if_softc;
 	struct ifreq		*ifr = (struct ifreq *) data;
 	struct mii_data		*mii;
 	int			error = 0;
 
 	switch (command) {
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < ETHERMIN ||
 		    ifr->ifr_mtu > sc->rl_hwrev->rl_max_mtu ||
 		    ((sc->rl_flags & RL_FLAG_FASTETHER) != 0 &&
 		    ifr->ifr_mtu > RL_MTU)) {
 			error = EINVAL;
 			break;
 		}
 		RL_LOCK(sc);
 		if (ifp->if_mtu != ifr->ifr_mtu) {
 			ifp->if_mtu = ifr->ifr_mtu;
 			if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
 			    (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 				re_init_locked(sc);
 			}
 			if (ifp->if_mtu > RL_TSO_MTU &&
 			    (ifp->if_capenable & IFCAP_TSO4) != 0) {
 				ifp->if_capenable &= ~(IFCAP_TSO4 |
 				    IFCAP_VLAN_HWTSO);
 				ifp->if_hwassist &= ~CSUM_TSO;
 			}
 			VLAN_CAPABILITIES(ifp);
 		}
 		RL_UNLOCK(sc);
 		break;
 	case SIOCSIFFLAGS:
 		RL_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) != 0) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 				if (((ifp->if_flags ^ sc->rl_if_flags)
 				    & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
 					re_set_rxmode(sc);
 			} else
 				re_init_locked(sc);
 		} else {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 				re_stop(sc);
 		}
 		sc->rl_if_flags = ifp->if_flags;
 		RL_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		RL_LOCK(sc);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			re_set_rxmode(sc);
 		RL_UNLOCK(sc);
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		mii = device_get_softc(sc->rl_miibus);
 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
 		break;
 	case SIOCSIFCAP:
 	    {
 		int mask, reinit;
 
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		reinit = 0;
 #ifdef DEVICE_POLLING
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				error = ether_poll_register(re_poll, ifp);
 				if (error)
 					return (error);
 				RL_LOCK(sc);
 				/* Disable interrupts */
 				CSR_WRITE_2(sc, RL_IMR, 0x0000);
 				ifp->if_capenable |= IFCAP_POLLING;
 				RL_UNLOCK(sc);
 			} else {
 				error = ether_poll_deregister(ifp);
 				/* Enable interrupts. */
 				RL_LOCK(sc);
 				CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
 				ifp->if_capenable &= ~IFCAP_POLLING;
 				RL_UNLOCK(sc);
 			}
 		}
 #endif /* DEVICE_POLLING */
 		RL_LOCK(sc);
 		if ((mask & IFCAP_TXCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_TXCSUM) != 0) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
 				ifp->if_hwassist |= RE_CSUM_FEATURES;
 			else
 				ifp->if_hwassist &= ~RE_CSUM_FEATURES;
 			reinit = 1;
 		}
 		if ((mask & IFCAP_RXCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_RXCSUM) != 0) {
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 			reinit = 1;
 		}
 		if ((mask & IFCAP_TSO4) != 0 &&
 		    (ifp->if_capabilities & IFCAP_TSO4) != 0) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if ((IFCAP_TSO4 & ifp->if_capenable) != 0)
 				ifp->if_hwassist |= CSUM_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_TSO;
 			if (ifp->if_mtu > RL_TSO_MTU &&
 			    (ifp->if_capenable & IFCAP_TSO4) != 0) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				ifp->if_hwassist &= ~CSUM_TSO;
 			}
 		}
 		if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			/* TSO over VLAN requires VLAN hardware tagging. */
 			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
 				ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
 			reinit = 1;
 		}
 		if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
 		    (mask & (IFCAP_HWCSUM | IFCAP_TSO4 |
 		    IFCAP_VLAN_HWTSO)) != 0)
 				reinit = 1;
 		if ((mask & IFCAP_WOL) != 0 &&
 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
 			if ((mask & IFCAP_WOL_UCAST) != 0)
 				ifp->if_capenable ^= IFCAP_WOL_UCAST;
 			if ((mask & IFCAP_WOL_MCAST) != 0)
 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
 			if ((mask & IFCAP_WOL_MAGIC) != 0)
 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
 		}
 		if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			re_init_locked(sc);
 		}
 		RL_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 	    }
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static void
 re_watchdog(struct rl_softc *sc)
 {
 	struct ifnet		*ifp;
 
 	RL_LOCK_ASSERT(sc);
 
 	if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
 		return;
 
 	ifp = sc->rl_ifp;
 	re_txeof(sc);
 	if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
 		if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
 		    "-- recovering\n");
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			re_start_locked(ifp);
 		return;
 	}
 
 	if_printf(ifp, "watchdog timeout\n");
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 
 	re_rxeof(sc, NULL);
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	re_init_locked(sc);
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		re_start_locked(ifp);
 }
 
 /*
  * Stop the adapter and free any mbufs allocated to the
  * RX and TX lists.
  */
 static void
 re_stop(struct rl_softc *sc)
 {
 	int			i;
 	struct ifnet		*ifp;
 	struct rl_txdesc	*txd;
 	struct rl_rxdesc	*rxd;
 
 	RL_LOCK_ASSERT(sc);
 
 	ifp = sc->rl_ifp;
 
 	sc->rl_watchdog_timer = 0;
 	callout_stop(&sc->rl_stat_callout);
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	/*
 	 * Disable accepting frames to put RX MAC into idle state.
 	 * Otherwise it's possible to get frames while stop command
 	 * execution is in progress and controller can DMA the frame
 	 * to already freed RX buffer during that period.
 	 */
 	CSR_WRITE_4(sc, RL_RXCFG, CSR_READ_4(sc, RL_RXCFG) &
 	    ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_MULTI |
 	    RL_RXCFG_RX_BROAD));
 
 	if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
 		/* Enable RXDV gate. */
 		CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) |
 		    0x00080000);
 	}
 
 	if ((sc->rl_flags & RL_FLAG_WAIT_TXPOLL) != 0) {
 		for (i = RL_TIMEOUT; i > 0; i--) {
 			if ((CSR_READ_1(sc, sc->rl_txstart) &
 			    RL_TXSTART_START) == 0)
 				break;
 			DELAY(20);
 		}
 		if (i == 0)
 			device_printf(sc->rl_dev,
 			    "stopping TX poll timed out!\n");
 		CSR_WRITE_1(sc, RL_COMMAND, 0x00);
 	} else if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0) {
 		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
 		    RL_CMD_RX_ENB);
 		if ((sc->rl_flags & RL_FLAG_CMDSTOP_WAIT_TXQ) != 0) {
 			for (i = RL_TIMEOUT; i > 0; i--) {
 				if ((CSR_READ_4(sc, RL_TXCFG) &
 				    RL_TXCFG_QUEUE_EMPTY) != 0)
 					break;
 				DELAY(100);
 			}
 			if (i == 0)
 				device_printf(sc->rl_dev,
 				   "stopping TXQ timed out!\n");
 		}
 	} else
 		CSR_WRITE_1(sc, RL_COMMAND, 0x00);
 	DELAY(1000);
 	CSR_WRITE_2(sc, RL_IMR, 0x0000);
 	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
 
 	if (sc->rl_head != NULL) {
 		m_freem(sc->rl_head);
 		sc->rl_head = sc->rl_tail = NULL;
 	}
 
 	/* Free the TX list buffers. */
 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
 		txd = &sc->rl_ldata.rl_tx_desc[i];
 		if (txd->tx_m != NULL) {
 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
 			    txd->tx_dmamap);
 			m_freem(txd->tx_m);
 			txd->tx_m = NULL;
 		}
 	}
 
 	/* Free the RX list buffers. */
 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 		rxd = &sc->rl_ldata.rl_rx_desc[i];
 		if (rxd->rx_m != NULL) {
 			bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
 			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
 			    rxd->rx_dmamap);
 			m_freem(rxd->rx_m);
 			rxd->rx_m = NULL;
 		}
 	}
 
 	if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
 		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
 			rxd = &sc->rl_ldata.rl_jrx_desc[i];
 			if (rxd->rx_m != NULL) {
 				bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag,
 				    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag,
 				    rxd->rx_dmamap);
 				m_freem(rxd->rx_m);
 				rxd->rx_m = NULL;
 			}
 		}
 	}
 }
 
 /*
  * Device suspend routine.  Stop the interface and save some PCI
  * settings in case the BIOS doesn't restore them properly on
  * resume.
  */
 static int
 re_suspend(device_t dev)
 {
 	struct rl_softc		*sc;
 
 	sc = device_get_softc(dev);
 
 	RL_LOCK(sc);
 	re_stop(sc);
 	re_setwol(sc);
 	sc->suspended = 1;
 	RL_UNLOCK(sc);
 
 	return (0);
 }
 
 /*
  * Device resume routine.  Restore some PCI settings in case the BIOS
  * doesn't, re-enable busmastering, and restart the interface if
  * appropriate.
  */
 static int
 re_resume(device_t dev)
 {
 	struct rl_softc		*sc;
 	struct ifnet		*ifp;
 
 	sc = device_get_softc(dev);
 
 	RL_LOCK(sc);
 
 	ifp = sc->rl_ifp;
 	/* Take controller out of sleep mode. */
 	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
 		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
 			CSR_WRITE_1(sc, RL_GPIO,
 			    CSR_READ_1(sc, RL_GPIO) | 0x01);
 	}
 
 	/*
 	 * Clear WOL matching such that normal Rx filtering
 	 * wouldn't interfere with WOL patterns.
 	 */
 	re_clrwol(sc);
 
 	/* reinitialize interface if necessary */
 	if (ifp->if_flags & IFF_UP)
 		re_init_locked(sc);
 
 	sc->suspended = 0;
 	RL_UNLOCK(sc);
 
 	return (0);
 }
 
 /*
  * Stop all chip I/O so that the kernel's probe routines don't
  * get confused by errant DMAs when rebooting.
  */
 static int
 re_shutdown(device_t dev)
 {
 	struct rl_softc		*sc;
 
 	sc = device_get_softc(dev);
 
 	RL_LOCK(sc);
 	re_stop(sc);
 	/*
 	 * Mark interface as down since otherwise we will panic if
 	 * interrupt comes in later on, which can happen in some
 	 * cases.
 	 */
 	sc->rl_ifp->if_flags &= ~IFF_UP;
 	re_setwol(sc);
 	RL_UNLOCK(sc);
 
 	return (0);
 }
 
 static void
 re_set_linkspeed(struct rl_softc *sc)
 {
 	struct mii_softc *miisc;
 	struct mii_data *mii;
 	int aneg, i, phyno;
 
 	RL_LOCK_ASSERT(sc);
 
 	mii = device_get_softc(sc->rl_miibus);
 	mii_pollstat(mii);
 	aneg = 0;
 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
 	    (IFM_ACTIVE | IFM_AVALID)) {
 		switch IFM_SUBTYPE(mii->mii_media_active) {
 		case IFM_10_T:
 		case IFM_100_TX:
 			return;
 		case IFM_1000_T:
 			aneg++;
 			break;
 		default:
 			break;
 		}
 	}
 	miisc = LIST_FIRST(&mii->mii_phys);
 	phyno = miisc->mii_phy;
 	LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
 		PHY_RESET(miisc);
 	re_miibus_writereg(sc->rl_dev, phyno, MII_100T2CR, 0);
 	re_miibus_writereg(sc->rl_dev, phyno,
 	    MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA);
 	re_miibus_writereg(sc->rl_dev, phyno,
 	    MII_BMCR, BMCR_AUTOEN | BMCR_STARTNEG);
 	DELAY(1000);
 	if (aneg != 0) {
 		/*
 		 * Poll link state until re(4) get a 10/100Mbps link.
 		 */
 		for (i = 0; i < MII_ANEGTICKS_GIGE; i++) {
 			mii_pollstat(mii);
 			if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID))
 			    == (IFM_ACTIVE | IFM_AVALID)) {
 				switch (IFM_SUBTYPE(mii->mii_media_active)) {
 				case IFM_10_T:
 				case IFM_100_TX:
 					return;
 				default:
 					break;
 				}
 			}
 			RL_UNLOCK(sc);
 			pause("relnk", hz);
 			RL_LOCK(sc);
 		}
 		if (i == MII_ANEGTICKS_GIGE)
 			device_printf(sc->rl_dev,
 			    "establishing a link failed, WOL may not work!");
 	}
 	/*
 	 * No link, force MAC to have 100Mbps, full-duplex link.
 	 * MAC does not require reprogramming on resolved speed/duplex,
 	 * so this is just for completeness.
 	 */
 	mii->mii_media_status = IFM_AVALID | IFM_ACTIVE;
 	mii->mii_media_active = IFM_ETHER | IFM_100_TX | IFM_FDX;
 }
 
 static void
 re_setwol(struct rl_softc *sc)
 {
 	struct ifnet		*ifp;
 	int			pmc;
 	uint16_t		pmstat;
 	uint8_t			v;
 
 	RL_LOCK_ASSERT(sc);
 
 	if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
 		return;
 
 	ifp = sc->rl_ifp;
 	/* Put controller into sleep mode. */
 	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
 		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
 			CSR_WRITE_1(sc, RL_GPIO,
 			    CSR_READ_1(sc, RL_GPIO) & ~0x01);
 	}
 	if ((ifp->if_capenable & IFCAP_WOL) != 0) {
+		if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
+			/* Disable RXDV gate. */
+			CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) &
+			    ~0x00080000);
+		}
 		re_set_rxmode(sc);
 		if ((sc->rl_flags & RL_FLAG_WOL_MANLINK) != 0)
 			re_set_linkspeed(sc);
 		if ((sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
 			CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
 	}
 	/* Enable config register write. */
 	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
 
 	/* Enable PME. */
 	v = CSR_READ_1(sc, sc->rl_cfg1);
 	v &= ~RL_CFG1_PME;
 	if ((ifp->if_capenable & IFCAP_WOL) != 0)
 		v |= RL_CFG1_PME;
 	CSR_WRITE_1(sc, sc->rl_cfg1, v);
 
 	v = CSR_READ_1(sc, sc->rl_cfg3);
 	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
 		v |= RL_CFG3_WOL_MAGIC;
 	CSR_WRITE_1(sc, sc->rl_cfg3, v);
 
 	v = CSR_READ_1(sc, sc->rl_cfg5);
 	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST |
 	    RL_CFG5_WOL_LANWAKE);
 	if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
 		v |= RL_CFG5_WOL_UCAST;
 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
 		v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
 	if ((ifp->if_capenable & IFCAP_WOL) != 0)
 		v |= RL_CFG5_WOL_LANWAKE;
 	CSR_WRITE_1(sc, sc->rl_cfg5, v);
 
 	/* Config register write done. */
 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
 
 	if ((ifp->if_capenable & IFCAP_WOL) == 0 &&
 	    (sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0)
 		CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) & ~0x80);
 	/*
 	 * It seems that hardware resets its link speed to 100Mbps in
 	 * power down mode so switching to 100Mbps in driver is not
 	 * needed.
 	 */
 
 	/* Request PME if WOL is requested. */
 	pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
 	pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
 	if ((ifp->if_capenable & IFCAP_WOL) != 0)
 		pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
 	pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
 }
 
 static void
 re_clrwol(struct rl_softc *sc)
 {
 	int			pmc;
 	uint8_t			v;
 
 	RL_LOCK_ASSERT(sc);
 
 	if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
 		return;
 
 	/* Enable config register write. */
 	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
 
 	v = CSR_READ_1(sc, sc->rl_cfg3);
 	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
 	CSR_WRITE_1(sc, sc->rl_cfg3, v);
 
 	/* Config register write done. */
 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
 
 	v = CSR_READ_1(sc, sc->rl_cfg5);
 	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
 	v &= ~RL_CFG5_WOL_LANWAKE;
 	CSR_WRITE_1(sc, sc->rl_cfg5, v);
 }
 
 static void
 re_add_sysctls(struct rl_softc *sc)
 {
 	struct sysctl_ctx_list	*ctx;
 	struct sysctl_oid_list	*children;
 	int			error;
 
 	ctx = device_get_sysctl_ctx(sc->rl_dev);
 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->rl_dev));
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "stats",
 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, re_sysctl_stats, "I",
 	    "Statistics Information");
 	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0)
 		return;
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "int_rx_mod",
 	    CTLTYPE_INT | CTLFLAG_RW, &sc->rl_int_rx_mod, 0,
 	    sysctl_hw_re_int_mod, "I", "re RX interrupt moderation");
 	/* Pull in device tunables. */
 	sc->rl_int_rx_mod = RL_TIMER_DEFAULT;
 	error = resource_int_value(device_get_name(sc->rl_dev),
 	    device_get_unit(sc->rl_dev), "int_rx_mod", &sc->rl_int_rx_mod);
 	if (error == 0) {
 		if (sc->rl_int_rx_mod < RL_TIMER_MIN ||
 		    sc->rl_int_rx_mod > RL_TIMER_MAX) {
 			device_printf(sc->rl_dev, "int_rx_mod value out of "
 			    "range; using default: %d\n",
 			    RL_TIMER_DEFAULT);
 			sc->rl_int_rx_mod = RL_TIMER_DEFAULT;
 		}
 	}
 }
 
 static int
 re_sysctl_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct rl_softc		*sc;
 	struct rl_stats		*stats;
 	int			error, i, result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	if (result == 1) {
 		sc = (struct rl_softc *)arg1;
 		RL_LOCK(sc);
 		if ((sc->rl_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 			RL_UNLOCK(sc);
 			goto done;
 		}
 		bus_dmamap_sync(sc->rl_ldata.rl_stag,
 		    sc->rl_ldata.rl_smap, BUS_DMASYNC_PREREAD);
 		CSR_WRITE_4(sc, RL_DUMPSTATS_HI,
 		    RL_ADDR_HI(sc->rl_ldata.rl_stats_addr));
 		CSR_WRITE_4(sc, RL_DUMPSTATS_LO,
 		    RL_ADDR_LO(sc->rl_ldata.rl_stats_addr));
 		CSR_WRITE_4(sc, RL_DUMPSTATS_LO,
 		    RL_ADDR_LO(sc->rl_ldata.rl_stats_addr |
 		    RL_DUMPSTATS_START));
 		for (i = RL_TIMEOUT; i > 0; i--) {
 			if ((CSR_READ_4(sc, RL_DUMPSTATS_LO) &
 			    RL_DUMPSTATS_START) == 0)
 				break;
 			DELAY(1000);
 		}
 		bus_dmamap_sync(sc->rl_ldata.rl_stag,
 		    sc->rl_ldata.rl_smap, BUS_DMASYNC_POSTREAD);
 		RL_UNLOCK(sc);
 		if (i == 0) {
 			device_printf(sc->rl_dev,
 			    "DUMP statistics request timed out\n");
 			return (ETIMEDOUT);
 		}
 done:
 		stats = sc->rl_ldata.rl_stats;
 		printf("%s statistics:\n", device_get_nameunit(sc->rl_dev));
 		printf("Tx frames : %ju\n",
 		    (uintmax_t)le64toh(stats->rl_tx_pkts));
 		printf("Rx frames : %ju\n",
 		    (uintmax_t)le64toh(stats->rl_rx_pkts));
 		printf("Tx errors : %ju\n",
 		    (uintmax_t)le64toh(stats->rl_tx_errs));
 		printf("Rx errors : %u\n",
 		    le32toh(stats->rl_rx_errs));
 		printf("Rx missed frames : %u\n",
 		    (uint32_t)le16toh(stats->rl_missed_pkts));
 		printf("Rx frame alignment errs : %u\n",
 		    (uint32_t)le16toh(stats->rl_rx_framealign_errs));
 		printf("Tx single collisions : %u\n",
 		    le32toh(stats->rl_tx_onecoll));
 		printf("Tx multiple collisions : %u\n",
 		    le32toh(stats->rl_tx_multicolls));
 		printf("Rx unicast frames : %ju\n",
 		    (uintmax_t)le64toh(stats->rl_rx_ucasts));
 		printf("Rx broadcast frames : %ju\n",
 		    (uintmax_t)le64toh(stats->rl_rx_bcasts));
 		printf("Rx multicast frames : %u\n",
 		    le32toh(stats->rl_rx_mcasts));
 		printf("Tx aborts : %u\n",
 		    (uint32_t)le16toh(stats->rl_tx_aborts));
 		printf("Tx underruns : %u\n",
 		    (uint32_t)le16toh(stats->rl_rx_underruns));
 	}
 
 	return (error);
 }
 
 static int
 sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high)
 {
 	int error, value;
 
 	if (arg1 == NULL)
 		return (EINVAL);
 	value = *(int *)arg1;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	if (value < low || value > high)
 		return (EINVAL);
 	*(int *)arg1 = value;
 
 	return (0);
 }
 
 static int
 sysctl_hw_re_int_mod(SYSCTL_HANDLER_ARGS)
 {
 
 	return (sysctl_int_range(oidp, arg1, arg2, req, RL_TIMER_MIN,
 	    RL_TIMER_MAX));
 }
Index: projects/powernv/dev/vmware/vmxnet3/if_vmx.c
===================================================================
--- projects/powernv/dev/vmware/vmxnet3/if_vmx.c	(revision 290990)
+++ projects/powernv/dev/vmware/vmxnet3/if_vmx.c	(revision 290991)
@@ -1,3915 +1,3922 @@
 /*-
  * Copyright (c) 2013 Tsubai Masanari
  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
  */
 
 /* Driver for VMware vmxnet3 virtual ethernet devices. */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/endian.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 #include <sys/taskqueue.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 
 #include <net/bpf.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 
 #include <machine/in_cksum.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "if_vmxreg.h"
 #include "if_vmxvar.h"
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef VMXNET3_FAILPOINTS
 #include <sys/fail.h>
 static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
     "vmxnet3 fail points");
 #define VMXNET3_FP	_debug_fail_point_vmxnet3
 #endif
 
 static int	vmxnet3_probe(device_t);
 static int	vmxnet3_attach(device_t);
 static int	vmxnet3_detach(device_t);
 static int	vmxnet3_shutdown(device_t);
 
 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
 static int	vmxnet3_check_version(struct vmxnet3_softc *);
 static void	vmxnet3_initial_config(struct vmxnet3_softc *);
 static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
 
 static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
 		    struct vmxnet3_interrupt *);
 static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
 static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
 static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
 static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
 
 static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
 		    struct vmxnet3_interrupt *);
 static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
 
 #ifndef VMXNET3_LEGACY_TX
 static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
 static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
 static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
 static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
 #endif
 
 static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
 static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
 static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
 static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
 static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
 static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
 
 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
 static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
 static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
 static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
 static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
 static void	vmxnet3_free_data(struct vmxnet3_softc *);
 static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
 
 static void	vmxnet3_evintr(struct vmxnet3_softc *);
 static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
 static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
 static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
 static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
 		    struct vmxnet3_rxring *, int);
 static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
 static void	vmxnet3_legacy_intr(void *);
 static void	vmxnet3_txq_intr(void *);
 static void	vmxnet3_rxq_intr(void *);
 static void	vmxnet3_event_intr(void *);
 
 static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
 static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
 static void	vmxnet3_stop(struct vmxnet3_softc *);
 
 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
 static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
 static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
 static int	vmxnet3_reinit(struct vmxnet3_softc *);
 static void	vmxnet3_init_locked(struct vmxnet3_softc *);
 static void	vmxnet3_init(void *);
 
 static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
 		    int *, int *, int *);
 static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
 		    bus_dmamap_t, bus_dma_segment_t [], int *);
 static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
 static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
 #ifdef VMXNET3_LEGACY_TX
 static void	vmxnet3_start_locked(struct ifnet *);
 static void	vmxnet3_start(struct ifnet *);
 #else
 static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
 		    struct mbuf *);
 static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
 static void	vmxnet3_txq_tq_deferred(void *, int);
 #endif
 static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
 static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
 
 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
 		    uint16_t);
 static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
 static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
 static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
 static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
 static uint64_t	vmxnet3_get_counter(struct ifnet *, ift_counter);
 
 #ifndef VMXNET3_LEGACY_TX
 static void	vmxnet3_qflush(struct ifnet *);
 #endif
 
 static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
 static void	vmxnet3_tick(void *);
 static void	vmxnet3_link_status(struct vmxnet3_softc *);
 static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
 static int	vmxnet3_media_change(struct ifnet *);
 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
 
 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
 
 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
 		    uint32_t);
 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
 		    uint32_t);
 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
 
 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
 static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
 static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
 
 static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
 		    bus_size_t, struct vmxnet3_dma_alloc *);
 static void	vmxnet3_dma_free(struct vmxnet3_softc *,
 		    struct vmxnet3_dma_alloc *);
 static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
 		    const char *, int);
 
 typedef enum {
 	VMXNET3_BARRIER_RD,
 	VMXNET3_BARRIER_WR,
 	VMXNET3_BARRIER_RDWR,
 } vmxnet3_barrier_t;
 
 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
 
 /* Tunables. */
 static int vmxnet3_mq_disable = 0;
 TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
 static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
 TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
 static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
 TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
 static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
 TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
 static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
 TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
 
 static device_method_t vmxnet3_methods[] = {
 	/* Device interface. */
 	DEVMETHOD(device_probe,		vmxnet3_probe),
 	DEVMETHOD(device_attach,	vmxnet3_attach),
 	DEVMETHOD(device_detach,	vmxnet3_detach),
 	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
 
 	DEVMETHOD_END
 };
 
 static driver_t vmxnet3_driver = {
 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
 };
 
 static devclass_t vmxnet3_devclass;
 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
 
 MODULE_DEPEND(vmx, pci, 1, 1, 1);
 MODULE_DEPEND(vmx, ether, 1, 1, 1);
 
 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
 
 static int
 vmxnet3_probe(device_t dev)
 {
 
 	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
 	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
 		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
 		return (BUS_PROBE_DEFAULT);
 	}
 
 	return (ENXIO);
 }
 
 static int
 vmxnet3_attach(device_t dev)
 {
 	struct vmxnet3_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->vmx_dev = dev;
 
 	pci_enable_busmaster(dev);
 
 	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
 	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
 
 	vmxnet3_initial_config(sc);
 
 	error = vmxnet3_alloc_resources(sc);
 	if (error)
 		goto fail;
 
 	error = vmxnet3_check_version(sc);
 	if (error)
 		goto fail;
 
 	error = vmxnet3_alloc_rxtx_queues(sc);
 	if (error)
 		goto fail;
 
 #ifndef VMXNET3_LEGACY_TX
 	error = vmxnet3_alloc_taskqueue(sc);
 	if (error)
 		goto fail;
 #endif
 
 	error = vmxnet3_alloc_interrupts(sc);
 	if (error)
 		goto fail;
 
 	vmxnet3_check_multiqueue(sc);
 
 	error = vmxnet3_alloc_data(sc);
 	if (error)
 		goto fail;
 
 	error = vmxnet3_setup_interface(sc);
 	if (error)
 		goto fail;
 
 	error = vmxnet3_setup_interrupts(sc);
 	if (error) {
 		ether_ifdetach(sc->vmx_ifp);
 		device_printf(dev, "could not set up interrupt\n");
 		goto fail;
 	}
 
 	vmxnet3_setup_sysctl(sc);
 #ifndef VMXNET3_LEGACY_TX
 	vmxnet3_start_taskqueue(sc);
 #endif
 
 fail:
 	if (error)
 		vmxnet3_detach(dev);
 
 	return (error);
 }
 
 static int
 vmxnet3_detach(device_t dev)
 {
 	struct vmxnet3_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->vmx_ifp;
 
 	if (device_is_attached(dev)) {
 		VMXNET3_CORE_LOCK(sc);
 		vmxnet3_stop(sc);
 		VMXNET3_CORE_UNLOCK(sc);
 
 		callout_drain(&sc->vmx_tick);
 #ifndef VMXNET3_LEGACY_TX
 		vmxnet3_drain_taskqueue(sc);
 #endif
 
 		ether_ifdetach(ifp);
 	}
 
 	if (sc->vmx_vlan_attach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
 		sc->vmx_vlan_attach = NULL;
 	}
 	if (sc->vmx_vlan_detach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
 		sc->vmx_vlan_detach = NULL;
 	}
 
 #ifndef VMXNET3_LEGACY_TX
 	vmxnet3_free_taskqueue(sc);
 #endif
 	vmxnet3_free_interrupts(sc);
 
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vmx_ifp = NULL;
 	}
 
 	ifmedia_removeall(&sc->vmx_media);
 
 	vmxnet3_free_data(sc);
 	vmxnet3_free_resources(sc);
 	vmxnet3_free_rxtx_queues(sc);
 
 	VMXNET3_CORE_LOCK_DESTROY(sc);
 
 	return (0);
 }
 
 static int
 vmxnet3_shutdown(device_t dev)
 {
 
 	return (0);
 }
 
 static int
 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	int rid;
 
 	dev = sc->vmx_dev;
 
 	rid = PCIR_BAR(0);
 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->vmx_res0 == NULL) {
 		device_printf(dev,
 		    "could not map BAR0 memory\n");
 		return (ENXIO);
 	}
 
 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
 
 	rid = PCIR_BAR(1);
 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->vmx_res1 == NULL) {
 		device_printf(dev,
 		    "could not map BAR1 memory\n");
 		return (ENXIO);
 	}
 
 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
 
 	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
 		rid = PCIR_BAR(2);
 		sc->vmx_msix_res = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
 	}
 
 	if (sc->vmx_msix_res == NULL)
 		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
 
 	return (0);
 }
 
 static void
 vmxnet3_free_resources(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	int rid;
 
 	dev = sc->vmx_dev;
 
 	if (sc->vmx_res0 != NULL) {
 		rid = PCIR_BAR(0);
 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
 		sc->vmx_res0 = NULL;
 	}
 
 	if (sc->vmx_res1 != NULL) {
 		rid = PCIR_BAR(1);
 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
 		sc->vmx_res1 = NULL;
 	}
 
 	if (sc->vmx_msix_res != NULL) {
 		rid = PCIR_BAR(2);
 		bus_release_resource(dev, SYS_RES_MEMORY, rid,
 		    sc->vmx_msix_res);
 		sc->vmx_msix_res = NULL;
 	}
 }
 
 static int
 vmxnet3_check_version(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	uint32_t version;
 
 	dev = sc->vmx_dev;
 
 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
 	if ((version & 0x01) == 0) {
 		device_printf(dev, "unsupported hardware version %#x\n",
 		    version);
 		return (ENOTSUP);
 	}
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
 
 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
 	if ((version & 0x01) == 0) {
 		device_printf(dev, "unsupported UPT version %#x\n", version);
 		return (ENOTSUP);
 	}
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
 
 	return (0);
 }
 
+static int
+trunc_powerof2(int val)
+{
+
+	return (1U << (fls(val) - 1));
+}
+
 static void
 vmxnet3_initial_config(struct vmxnet3_softc *sc)
 {
 	int nqueue, ndesc;
 
 	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
 	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
 		nqueue = VMXNET3_DEF_TX_QUEUES;
 	if (nqueue > mp_ncpus)
 		nqueue = mp_ncpus;
-	sc->vmx_max_ntxqueues = nqueue;
+	sc->vmx_max_ntxqueues = trunc_powerof2(nqueue);
 
 	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
 	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
 		nqueue = VMXNET3_DEF_RX_QUEUES;
 	if (nqueue > mp_ncpus)
 		nqueue = mp_ncpus;
-	sc->vmx_max_nrxqueues = nqueue;
+	sc->vmx_max_nrxqueues = trunc_powerof2(nqueue);
 
 	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
 		sc->vmx_max_nrxqueues = 1;
 		sc->vmx_max_ntxqueues = 1;
 	}
 
 	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
 	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
 		ndesc = VMXNET3_DEF_TX_NDESC;
 	if (ndesc & VMXNET3_MASK_TX_NDESC)
 		ndesc &= ~VMXNET3_MASK_TX_NDESC;
 	sc->vmx_ntxdescs = ndesc;
 
 	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
 	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
 		ndesc = VMXNET3_DEF_RX_NDESC;
 	if (ndesc & VMXNET3_MASK_RX_NDESC)
 		ndesc &= ~VMXNET3_MASK_RX_NDESC;
 	sc->vmx_nrxdescs = ndesc;
 	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
 }
 
 static void
 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
 {
 
 	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
 		goto out;
 
 	/* BMV: Just use the maximum configured for now. */
 	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
 	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
 
 	if (sc->vmx_nrxqueues > 1)
 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
 
 	return;
 
 out:
 	sc->vmx_ntxqueues = 1;
 	sc->vmx_nrxqueues = 1;
 }
 
 static int
 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	int nmsix, cnt, required;
 
 	dev = sc->vmx_dev;
 
 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
 		return (1);
 
 	/* Allocate an additional vector for the events interrupt. */
 	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
 
 	nmsix = pci_msix_count(dev);
 	if (nmsix < required)
 		return (1);
 
 	cnt = required;
 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
 		sc->vmx_nintrs = required;
 		return (0);
 	} else
 		pci_release_msi(dev);
 
 	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
 
 	return (1);
 }
 
 static int
 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	int nmsi, cnt, required;
 
 	dev = sc->vmx_dev;
 	required = 1;
 
 	nmsi = pci_msi_count(dev);
 	if (nmsi < required)
 		return (1);
 
 	cnt = required;
 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
 		sc->vmx_nintrs = 1;
 		return (0);
 	} else
 		pci_release_msi(dev);
 
 	return (1);
 }
 
 static int
 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
 {
 
 	sc->vmx_nintrs = 1;
 	return (0);
 }
 
 static int
 vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
     struct vmxnet3_interrupt *intr)
 {
 	struct resource *irq;
 
 	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
 	if (irq == NULL)
 		return (ENXIO);
 
 	intr->vmxi_irq = irq;
 	intr->vmxi_rid = rid;
 
 	return (0);
 }
 
 static int
 vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
 {
 	int i, rid, flags, error;
 
 	rid = 0;
 	flags = RF_ACTIVE;
 
 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
 		flags |= RF_SHAREABLE;
 	else
 		rid = 1;
 
 	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
 		error = vmxnet3_alloc_interrupt(sc, rid, flags,
 		    &sc->vmx_intrs[i]);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 static int
 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_interrupt *intr;
 	enum intr_type type;
 	int i, error;
 
 	dev = sc->vmx_dev;
 	intr = &sc->vmx_intrs[0];
 	type = INTR_TYPE_NET | INTR_MPSAFE;
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
 		txq = &sc->vmx_txq[i];
 		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
 		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
 		if (error)
 			return (error);
 		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
 		    "tq%d", i);
 		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
 	}
 
 	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
 		rxq = &sc->vmx_rxq[i];
 		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
 		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
 		if (error)
 			return (error);
 		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
 		    "rq%d", i);
 		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
 	}
 
 	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
 	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
 	if (error)
 		return (error);
 	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
 	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
 
 	return (0);
 }
 
 static int
 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
 {
 	struct vmxnet3_interrupt *intr;
 	int i, error;
 
 	intr = &sc->vmx_intrs[0];
 	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
 	    &intr->vmxi_handler);
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++)
 		sc->vmx_txq[i].vxtxq_intr_idx = 0;
 	for (i = 0; i < sc->vmx_nrxqueues; i++)
 		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
 	sc->vmx_event_intr_idx = 0;
 
 	return (error);
 }
 
 static void
 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
 {
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txq_shared *txs;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxq_shared *rxs;
 	int i;
 
 	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		txq = &sc->vmx_txq[i];
 		txs = txq->vxtxq_ts;
 		txs->intr_idx = txq->vxtxq_intr_idx;
 	}
 
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
 		rxq = &sc->vmx_rxq[i];
 		rxs = rxq->vxrxq_rs;
 		rxs->intr_idx = rxq->vxrxq_intr_idx;
 	}
 }
 
 static int
 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
 {
 	int error;
 
 	error = vmxnet3_alloc_intr_resources(sc);
 	if (error)
 		return (error);
 
 	switch (sc->vmx_intr_type) {
 	case VMXNET3_IT_MSIX:
 		error = vmxnet3_setup_msix_interrupts(sc);
 		break;
 	case VMXNET3_IT_MSI:
 	case VMXNET3_IT_LEGACY:
 		error = vmxnet3_setup_legacy_interrupt(sc);
 		break;
 	default:
 		panic("%s: invalid interrupt type %d", __func__,
 		    sc->vmx_intr_type);
 	}
 
 	if (error == 0)
 		vmxnet3_set_interrupt_idx(sc);
 
 	return (error);
 }
 
 static int
 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	uint32_t config;
 	int error;
 
 	dev = sc->vmx_dev;
 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
 
 	sc->vmx_intr_type = config & 0x03;
 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
 
 	switch (sc->vmx_intr_type) {
 	case VMXNET3_IT_AUTO:
 		sc->vmx_intr_type = VMXNET3_IT_MSIX;
 		/* FALLTHROUGH */
 	case VMXNET3_IT_MSIX:
 		error = vmxnet3_alloc_msix_interrupts(sc);
 		if (error == 0)
 			break;
 		sc->vmx_intr_type = VMXNET3_IT_MSI;
 		/* FALLTHROUGH */
 	case VMXNET3_IT_MSI:
 		error = vmxnet3_alloc_msi_interrupts(sc);
 		if (error == 0)
 			break;
 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
 		/* FALLTHROUGH */
 	case VMXNET3_IT_LEGACY:
 		error = vmxnet3_alloc_legacy_interrupts(sc);
 		if (error == 0)
 			break;
 		/* FALLTHROUGH */
 	default:
 		sc->vmx_intr_type = -1;
 		device_printf(dev, "cannot allocate any interrupt resources\n");
 		return (ENXIO);
 	}
 
 	return (error);
 }
 
 static void
 vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
     struct vmxnet3_interrupt *intr)
 {
 	device_t dev;
 
 	dev = sc->vmx_dev;
 
 	if (intr->vmxi_handler != NULL) {
 		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
 		intr->vmxi_handler = NULL;
 	}
 
 	if (intr->vmxi_irq != NULL) {
 		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
 		    intr->vmxi_irq);
 		intr->vmxi_irq = NULL;
 		intr->vmxi_rid = -1;
 	}
 }
 
 static void
 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vmx_nintrs; i++)
 		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
 
 	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
 	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
 		pci_release_msi(sc->vmx_dev);
 }
 
 #ifndef VMXNET3_LEGACY_TX
 static int
 vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 
 	dev = sc->vmx_dev;
 
 	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
 	    taskqueue_thread_enqueue, &sc->vmx_tq);
 	if (sc->vmx_tq == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 static void
 vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	int nthreads, error;
 
 	dev = sc->vmx_dev;
 
 	/*
 	 * The taskqueue is typically not frequently used, so a dedicated
 	 * thread for each queue is unnecessary.
 	 */
 	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
 
 	/*
 	 * Most drivers just ignore the return value - it only fails
 	 * with ENOMEM so an error is not likely. It is hard for us
 	 * to recover from an error here.
 	 */
 	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
 	    "%s taskq", device_get_nameunit(dev));
 	if (error)
 		device_printf(dev, "failed to start taskqueue: %d", error);
 }
 
 static void
 vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
 {
 	struct vmxnet3_txqueue *txq;
 	int i;
 
 	if (sc->vmx_tq != NULL) {
 		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
 			txq = &sc->vmx_txq[i];
 			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
 		}
 	}
 }
 
 static void
 vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
 {
 	if (sc->vmx_tq != NULL) {
 		taskqueue_free(sc->vmx_tq);
 		sc->vmx_tq = NULL;
 	}
 }
 #endif
 
 static int
 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
 {
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxring *rxr;
 	int i;
 
 	rxq = &sc->vmx_rxq[q];
 
 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
 	    device_get_nameunit(sc->vmx_dev), q);
 	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
 
 	rxq->vxrxq_sc = sc;
 	rxq->vxrxq_id = q;
 
 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_rid = i;
 		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
 		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
 		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (rxr->vxrxr_rxbuf == NULL)
 			return (ENOMEM);
 
 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
 	}
 
 	return (0);
 }
 
 static int
 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
 {
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txring *txr;
 
 	txq = &sc->vmx_txq[q];
 	txr = &txq->vxtxq_cmd_ring;
 
 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
 	    device_get_nameunit(sc->vmx_dev), q);
 	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
 
 	txq->vxtxq_sc = sc;
 	txq->vxtxq_id = q;
 
 	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
 	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
 	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (txr->vxtxr_txbuf == NULL)
 		return (ENOMEM);
 
 	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
 
 #ifndef VMXNET3_LEGACY_TX
 	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
 
 	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
 	    M_NOWAIT, &txq->vxtxq_mtx);
 	if (txq->vxtxq_br == NULL)
 		return (ENOMEM);
 #endif
 
 	return (0);
 }
 
 static int
 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
 {
 	int i, error;
 
 	/*
 	 * Only attempt to create multiple queues if MSIX is available. MSIX is
 	 * disabled by default because its apparently broken for devices passed
 	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
 	 * must be set to zero for MSIX. This check prevents us from allocating
 	 * queue structures that we will not use.
 	 */
 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
 		sc->vmx_max_nrxqueues = 1;
 		sc->vmx_max_ntxqueues = 1;
 	}
 
 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
 	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
 	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
 		error = vmxnet3_init_rxq(sc, i);
 		if (error)
 			return (error);
 	}
 
 	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
 		error = vmxnet3_init_txq(sc, i);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
 {
 	struct vmxnet3_rxring *rxr;
 	int i;
 
 	rxq->vxrxq_sc = NULL;
 	rxq->vxrxq_id = -1;
 
 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 
 		if (rxr->vxrxr_rxbuf != NULL) {
 			free(rxr->vxrxr_rxbuf, M_DEVBUF);
 			rxr->vxrxr_rxbuf = NULL;
 		}
 	}
 
 	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
 		mtx_destroy(&rxq->vxrxq_mtx);
 }
 
 static void
 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
 {
 	struct vmxnet3_txring *txr;
 
 	txr = &txq->vxtxq_cmd_ring;
 
 	txq->vxtxq_sc = NULL;
 	txq->vxtxq_id = -1;
 
 #ifndef VMXNET3_LEGACY_TX
 	if (txq->vxtxq_br != NULL) {
 		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
 		txq->vxtxq_br = NULL;
 	}
 #endif
 
 	if (txr->vxtxr_txbuf != NULL) {
 		free(txr->vxtxr_txbuf, M_DEVBUF);
 		txr->vxtxr_txbuf = NULL;
 	}
 
 	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
 		mtx_destroy(&txq->vxtxq_mtx);
 }
 
 static void
 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
 {
 	int i;
 
 	if (sc->vmx_rxq != NULL) {
 		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
 			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
 		free(sc->vmx_rxq, M_DEVBUF);
 		sc->vmx_rxq = NULL;
 	}
 
 	if (sc->vmx_txq != NULL) {
 		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
 			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
 		free(sc->vmx_txq, M_DEVBUF);
 		sc->vmx_txq = NULL;
 	}
 }
 
 static int
 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	uint8_t *kva;
 	size_t size;
 	int i, error;
 
 	dev = sc->vmx_dev;
 
 	size = sizeof(struct vmxnet3_driver_shared);
 	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
 	if (error) {
 		device_printf(dev, "cannot alloc shared memory\n");
 		return (error);
 	}
 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
 
 	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
 	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
 	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
 	if (error) {
 		device_printf(dev, "cannot alloc queue shared memory\n");
 		return (error);
 	}
 	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
 	kva = sc->vmx_qs;
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
 		kva += sizeof(struct vmxnet3_txq_shared);
 	}
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
 		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
 		kva += sizeof(struct vmxnet3_rxq_shared);
 	}
 
 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
 		size = sizeof(struct vmxnet3_rss_shared);
 		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
 		if (error) {
 			device_printf(dev, "cannot alloc rss shared memory\n");
 			return (error);
 		}
 		sc->vmx_rss =
 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
 {
 
 	if (sc->vmx_rss != NULL) {
 		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
 		sc->vmx_rss = NULL;
 	}
 
 	if (sc->vmx_qs != NULL) {
 		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
 		sc->vmx_qs = NULL;
 	}
 
 	if (sc->vmx_ds != NULL) {
 		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
 		sc->vmx_ds = NULL;
 	}
 }
 
 static int
 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_comp_ring *txc;
 	size_t descsz, compsz;
 	int i, q, error;
 
 	dev = sc->vmx_dev;
 
 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
 		txq = &sc->vmx_txq[q];
 		txr = &txq->vxtxq_cmd_ring;
 		txc = &txq->vxtxq_comp_ring;
 
 		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
 		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
 
 		error = bus_dma_tag_create(bus_get_dma_tag(dev),
 		    1, 0,			/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    VMXNET3_TX_MAXSIZE,		/* maxsize */
 		    VMXNET3_TX_MAXSEGS,		/* nsegments */
 		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &txr->vxtxr_txtag);
 		if (error) {
 			device_printf(dev,
 			    "unable to create Tx buffer tag for queue %d\n", q);
 			return (error);
 		}
 
 		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
 		if (error) {
 			device_printf(dev, "cannot alloc Tx descriptors for "
 			    "queue %d error %d\n", q, error);
 			return (error);
 		}
 		txr->vxtxr_txd =
 		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
 
 		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
 		if (error) {
 			device_printf(dev, "cannot alloc Tx comp descriptors "
 			   "for queue %d error %d\n", q, error);
 			return (error);
 		}
 		txc->vxcr_u.txcd =
 		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
 
 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
 			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
 			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
 			if (error) {
 				device_printf(dev, "unable to create Tx buf "
 				    "dmamap for queue %d idx %d\n", q, i);
 				return (error);
 			}
 		}
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_comp_ring *txc;
 	struct vmxnet3_txbuf *txb;
 	int i, q;
 
 	dev = sc->vmx_dev;
 
 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
 		txq = &sc->vmx_txq[q];
 		txr = &txq->vxtxq_cmd_ring;
 		txc = &txq->vxtxq_comp_ring;
 
 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
 			txb = &txr->vxtxr_txbuf[i];
 			if (txb->vtxb_dmamap != NULL) {
 				bus_dmamap_destroy(txr->vxtxr_txtag,
 				    txb->vtxb_dmamap);
 				txb->vtxb_dmamap = NULL;
 			}
 		}
 
 		if (txc->vxcr_u.txcd != NULL) {
 			vmxnet3_dma_free(sc, &txc->vxcr_dma);
 			txc->vxcr_u.txcd = NULL;
 		}
 
 		if (txr->vxtxr_txd != NULL) {
 			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
 			txr->vxtxr_txd = NULL;
 		}
 
 		if (txr->vxtxr_txtag != NULL) {
 			bus_dma_tag_destroy(txr->vxtxr_txtag);
 			txr->vxtxr_txtag = NULL;
 		}
 	}
 }
 
 static int
 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_comp_ring *rxc;
 	int descsz, compsz;
 	int i, j, q, error;
 
 	dev = sc->vmx_dev;
 
 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
 		rxq = &sc->vmx_rxq[q];
 		rxc = &rxq->vxrxq_comp_ring;
 		compsz = 0;
 
 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
 			rxr = &rxq->vxrxq_cmd_ring[i];
 
 			descsz = rxr->vxrxr_ndesc *
 			    sizeof(struct vmxnet3_rxdesc);
 			compsz += rxr->vxrxr_ndesc *
 			    sizeof(struct vmxnet3_rxcompdesc);
 
 			error = bus_dma_tag_create(bus_get_dma_tag(dev),
 			    1, 0,		/* alignment, boundary */
 			    BUS_SPACE_MAXADDR,	/* lowaddr */
 			    BUS_SPACE_MAXADDR,	/* highaddr */
 			    NULL, NULL,		/* filter, filterarg */
 			    MJUMPAGESIZE,	/* maxsize */
 			    1,			/* nsegments */
 			    MJUMPAGESIZE,	/* maxsegsize */
 			    0,			/* flags */
 			    NULL, NULL,		/* lockfunc, lockarg */
 			    &rxr->vxrxr_rxtag);
 			if (error) {
 				device_printf(dev,
 				    "unable to create Rx buffer tag for "
 				    "queue %d\n", q);
 				return (error);
 			}
 
 			error = vmxnet3_dma_malloc(sc, descsz, 512,
 			    &rxr->vxrxr_dma);
 			if (error) {
 				device_printf(dev, "cannot allocate Rx "
 				    "descriptors for queue %d/%d error %d\n",
 				    i, q, error);
 				return (error);
 			}
 			rxr->vxrxr_rxd =
 			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
 		}
 
 		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
 		if (error) {
 			device_printf(dev, "cannot alloc Rx comp descriptors "
 			    "for queue %d error %d\n", q, error);
 			return (error);
 		}
 		rxc->vxcr_u.rxcd =
 		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
 
 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
 			rxr = &rxq->vxrxq_cmd_ring[i];
 
 			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
 			    &rxr->vxrxr_spare_dmap);
 			if (error) {
 				device_printf(dev, "unable to create spare "
 				    "dmamap for queue %d/%d error %d\n",
 				    q, i, error);
 				return (error);
 			}
 
 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
 				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
 				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
 				if (error) {
 					device_printf(dev, "unable to create "
 					    "dmamap for queue %d/%d slot %d "
 					    "error %d\n",
 					    q, i, j, error);
 					return (error);
 				}
 			}
 		}
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_comp_ring *rxc;
 	struct vmxnet3_rxbuf *rxb;
 	int i, j, q;
 
 	dev = sc->vmx_dev;
 
 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
 		rxq = &sc->vmx_rxq[q];
 		rxc = &rxq->vxrxq_comp_ring;
 
 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
 			rxr = &rxq->vxrxq_cmd_ring[i];
 
 			if (rxr->vxrxr_spare_dmap != NULL) {
 				bus_dmamap_destroy(rxr->vxrxr_rxtag,
 				    rxr->vxrxr_spare_dmap);
 				rxr->vxrxr_spare_dmap = NULL;
 			}
 
 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
 				rxb = &rxr->vxrxr_rxbuf[j];
 				if (rxb->vrxb_dmamap != NULL) {
 					bus_dmamap_destroy(rxr->vxrxr_rxtag,
 					    rxb->vrxb_dmamap);
 					rxb->vrxb_dmamap = NULL;
 				}
 			}
 		}
 
 		if (rxc->vxcr_u.rxcd != NULL) {
 			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
 			rxc->vxcr_u.rxcd = NULL;
 		}
 
 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
 			rxr = &rxq->vxrxq_cmd_ring[i];
 
 			if (rxr->vxrxr_rxd != NULL) {
 				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
 				rxr->vxrxr_rxd = NULL;
 			}
 
 			if (rxr->vxrxr_rxtag != NULL) {
 				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
 				rxr->vxrxr_rxtag = NULL;
 			}
 		}
 	}
 }
 
 static int
 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
 {
 	int error;
 
 	error = vmxnet3_alloc_txq_data(sc);
 	if (error)
 		return (error);
 
 	error = vmxnet3_alloc_rxq_data(sc);
 	if (error)
 		return (error);
 
 	return (0);
 }
 
 static void
 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
 {
 
 	if (sc->vmx_rxq != NULL)
 		vmxnet3_free_rxq_data(sc);
 
 	if (sc->vmx_txq != NULL)
 		vmxnet3_free_txq_data(sc);
 }
 
 static int
 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
 {
 	int error;
 
 	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
 	    32, &sc->vmx_mcast_dma);
 	if (error)
 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
 	else
 		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
 
 	return (error);
 }
 
 static void
 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
 {
 
 	if (sc->vmx_mcast != NULL) {
 		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
 		sc->vmx_mcast = NULL;
 	}
 }
 
 static void
 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
 {
 	struct vmxnet3_driver_shared *ds;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txq_shared *txs;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxq_shared *rxs;
 	int i;
 
 	ds = sc->vmx_ds;
 
 	/*
 	 * Initialize fields of the shared data that remains the same across
 	 * reinits. Note the shared data is zero'd when allocated.
 	 */
 
 	ds->magic = VMXNET3_REV1_MAGIC;
 
 	/* DriverInfo */
 	ds->version = VMXNET3_DRIVER_VERSION;
 	ds->guest = VMXNET3_GOS_FREEBSD |
 #ifdef __LP64__
 	    VMXNET3_GOS_64BIT;
 #else
 	    VMXNET3_GOS_32BIT;
 #endif
 	ds->vmxnet3_revision = 1;
 	ds->upt_version = 1;
 
 	/* Misc. conf */
 	ds->driver_data = vtophys(sc);
 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
 	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
 	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
 	ds->nrxsg_max = sc->vmx_max_rxsegs;
 
 	/* RSS conf */
 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
 		ds->rss.version = 1;
 		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
 		ds->rss.len = sc->vmx_rss_dma.dma_size;
 	}
 
 	/* Interrupt control. */
 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
 	ds->nintr = sc->vmx_nintrs;
 	ds->evintr = sc->vmx_event_intr_idx;
 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
 
 	for (i = 0; i < sc->vmx_nintrs; i++)
 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
 
 	/* Receive filter. */
 	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
 	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
 
 	/* Tx queues */
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		txq = &sc->vmx_txq[i];
 		txs = txq->vxtxq_ts;
 
 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
 		txs->driver_data = vtophys(txq);
 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
 	}
 
 	/* Rx queues */
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
 		rxq = &sc->vmx_rxq[i];
 		rxs = rxq->vxrxq_rs;
 
 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
 		rxs->driver_data = vtophys(rxq);
 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
 	}
 }
 
 static void
 vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vmx_ifp;
 
 	/* Use the current MAC address. */
 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
 	vmxnet3_set_lladdr(sc);
 
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_IP_TSO;
 	if (ifp->if_capenable & IFCAP_TSO6)
 		ifp->if_hwassist |= CSUM_IP6_TSO;
 }
 
 static void
 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
 {
 	/*
 	 * Use the same key as the Linux driver until FreeBSD can do
 	 * RSS (presumably Toeplitz) in software.
 	 */
 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
 	};
 
 	struct vmxnet3_driver_shared *ds;
 	struct vmxnet3_rss_shared *rss;
 	int i;
 
 	ds = sc->vmx_ds;
 	rss = sc->vmx_rss;
 
 	rss->hash_type =
 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
 
 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
 		rss->ind_table[i] = i % sc->vmx_nrxqueues;
 }
 
 static void
 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 	struct vmxnet3_driver_shared *ds;
 
 	ifp = sc->vmx_ifp;
 	ds = sc->vmx_ds;
 
 	ds->mtu = ifp->if_mtu;
 	ds->ntxqueue = sc->vmx_ntxqueues;
 	ds->nrxqueue = sc->vmx_nrxqueues;
 
 	ds->upt_features = 0;
 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
 		ds->upt_features |= UPT1_F_CSUM;
 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
 		ds->upt_features |= UPT1_F_VLAN;
 	if (ifp->if_capenable & IFCAP_LRO)
 		ds->upt_features |= UPT1_F_LRO;
 
 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
 		ds->upt_features |= UPT1_F_RSS;
 		vmxnet3_reinit_rss_shared_data(sc);
 	}
 
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
 	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
 }
 
 static int
 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
 {
 	int error;
 
 	error = vmxnet3_alloc_shared_data(sc);
 	if (error)
 		return (error);
 
 	error = vmxnet3_alloc_queue_data(sc);
 	if (error)
 		return (error);
 
 	error = vmxnet3_alloc_mcast_table(sc);
 	if (error)
 		return (error);
 
 	vmxnet3_init_shared_data(sc);
 
 	return (0);
 }
 
 static void
 vmxnet3_free_data(struct vmxnet3_softc *sc)
 {
 
 	vmxnet3_free_mcast_table(sc);
 	vmxnet3_free_queue_data(sc);
 	vmxnet3_free_shared_data(sc);
 }
 
 static int
 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vmx_dev;
 
 	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "cannot allocate ifnet structure\n");
 		return (ENOSPC);
 	}
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 #if __FreeBSD_version < 1000025
 	ifp->if_baudrate = 1000000000;
 #elif __FreeBSD_version < 1100011
 	if_initbaudrate(ifp, IF_Gbps(10));
 #else
 	ifp->if_baudrate = IF_Gbps(10);
 #endif
 	ifp->if_softc = sc;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_init = vmxnet3_init;
 	ifp->if_ioctl = vmxnet3_ioctl;
 	ifp->if_get_counter = vmxnet3_get_counter;
 	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
 	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
 
 #ifdef VMXNET3_LEGACY_TX
 	ifp->if_start = vmxnet3_start;
 	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
 	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
 	IFQ_SET_READY(&ifp->if_snd);
 #else
 	ifp->if_transmit = vmxnet3_txq_mq_start;
 	ifp->if_qflush = vmxnet3_qflush;
 #endif
 
 	vmxnet3_get_lladdr(sc);
 	ether_ifattach(ifp, sc->vmx_lladdr);
 
 	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
 	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
 	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
 	    IFCAP_VLAN_HWCSUM;
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/* These capabilities are not enabled by default. */
 	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
 
 	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
 	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 
 	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
 	    vmxnet3_media_status);
 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
 
 	return (0);
 }
 
 static void
 vmxnet3_evintr(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 	struct vmxnet3_txq_shared *ts;
 	struct vmxnet3_rxq_shared *rs;
 	uint32_t event;
 	int reset;
 
 	dev = sc->vmx_dev;
 	ifp = sc->vmx_ifp;
 	reset = 0;
 
 	VMXNET3_CORE_LOCK(sc);
 
 	/* Clear events. */
 	event = sc->vmx_ds->event;
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
 
 	if (event & VMXNET3_EVENT_LINK) {
 		vmxnet3_link_status(sc);
 		if (sc->vmx_link_active != 0)
 			vmxnet3_tx_start_all(sc);
 	}
 
 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
 		reset = 1;
 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
 		ts = sc->vmx_txq[0].vxtxq_ts;
 		if (ts->stopped != 0)
 			device_printf(dev, "Tx queue error %#x\n", ts->error);
 		rs = sc->vmx_rxq[0].vxrxq_rs;
 		if (rs->stopped != 0)
 			device_printf(dev, "Rx queue error %#x\n", rs->error);
 		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
 	}
 
 	if (event & VMXNET3_EVENT_DIC)
 		device_printf(dev, "device implementation change event\n");
 	if (event & VMXNET3_EVENT_DEBUG)
 		device_printf(dev, "debug event\n");
 
 	if (reset != 0) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vmxnet3_init_locked(sc);
 	}
 
 	VMXNET3_CORE_UNLOCK(sc);
 }
 
 static void
 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
 {
 	struct vmxnet3_softc *sc;
 	struct ifnet *ifp;
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_comp_ring *txc;
 	struct vmxnet3_txcompdesc *txcd;
 	struct vmxnet3_txbuf *txb;
 	struct mbuf *m;
 	u_int sop;
 
 	sc = txq->vxtxq_sc;
 	ifp = sc->vmx_ifp;
 	txr = &txq->vxtxq_cmd_ring;
 	txc = &txq->vxtxq_comp_ring;
 
 	VMXNET3_TXQ_LOCK_ASSERT(txq);
 
 	for (;;) {
 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
 		if (txcd->gen != txc->vxcr_gen)
 			break;
 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
 
 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
 			txc->vxcr_next = 0;
 			txc->vxcr_gen ^= 1;
 		}
 
 		sop = txr->vxtxr_next;
 		txb = &txr->vxtxr_txbuf[sop];
 
 		if ((m = txb->vtxb_m) != NULL) {
 			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
 
 			txq->vxtxq_stats.vmtxs_opackets++;
 			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
 			if (m->m_flags & M_MCAST)
 				txq->vxtxq_stats.vmtxs_omcasts++;
 
 			m_freem(m);
 			txb->vtxb_m = NULL;
 		}
 
 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
 	}
 
 	if (txr->vxtxr_head == txr->vxtxr_next)
 		txq->vxtxq_watchdog = 0;
 }
 
 static int
 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
 {
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct vmxnet3_rxdesc *rxd;
 	struct vmxnet3_rxbuf *rxb;
 	bus_dma_tag_t tag;
 	bus_dmamap_t dmap;
 	bus_dma_segment_t segs[1];
 	int idx, clsize, btype, flags, nsegs, error;
 
 	ifp = sc->vmx_ifp;
 	tag = rxr->vxrxr_rxtag;
 	dmap = rxr->vxrxr_spare_dmap;
 	idx = rxr->vxrxr_fill;
 	rxd = &rxr->vxrxr_rxd[idx];
 	rxb = &rxr->vxrxr_rxbuf[idx];
 
 #ifdef VMXNET3_FAILPOINTS
 	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
 	if (rxr->vxrxr_rid != 0)
 		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
 #endif
 
 	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
 		flags = M_PKTHDR;
 		clsize = MCLBYTES;
 		btype = VMXNET3_BTYPE_HEAD;
 	} else {
 #if __FreeBSD_version < 902001
 		/*
 		 * These mbufs will never be used for the start of a frame.
 		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
 		 * required the mbuf to always be a packet header. Avoid
 		 * unnecessary mbuf initialization in newer versions where
 		 * that is not the case.
 		 */
 		flags = M_PKTHDR;
 #else
 		flags = 0;
 #endif
 		clsize = MJUMPAGESIZE;
 		btype = VMXNET3_BTYPE_BODY;
 	}
 
 	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
 	if (m == NULL) {
 		sc->vmx_stats.vmst_mgetcl_failed++;
 		return (ENOBUFS);
 	}
 
 	if (btype == VMXNET3_BTYPE_HEAD) {
 		m->m_len = m->m_pkthdr.len = clsize;
 		m_adj(m, ETHER_ALIGN);
 	} else
 		m->m_len = clsize;
 
 	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
 	    BUS_DMA_NOWAIT);
 	if (error) {
 		m_freem(m);
 		sc->vmx_stats.vmst_mbuf_load_failed++;
 		return (error);
 	}
 	KASSERT(nsegs == 1,
 	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
 #if __FreeBSD_version < 902001
 	if (btype == VMXNET3_BTYPE_BODY)
 		m->m_flags &= ~M_PKTHDR;
 #endif
 
 	if (rxb->vrxb_m != NULL) {
 		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
 	}
 
 	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
 	rxb->vrxb_dmamap = dmap;
 	rxb->vrxb_m = m;
 
 	rxd->addr = segs[0].ds_addr;
 	rxd->len = segs[0].ds_len;
 	rxd->btype = btype;
 	rxd->gen = rxr->vxrxr_gen;
 
 	vmxnet3_rxr_increment_fill(rxr);
 	return (0);
 }
 
 static void
 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
     struct vmxnet3_rxring *rxr, int idx)
 {
 	struct vmxnet3_rxdesc *rxd;
 
 	rxd = &rxr->vxrxr_rxd[idx];
 	rxd->gen = rxr->vxrxr_gen;
 	vmxnet3_rxr_increment_fill(rxr);
 }
 
 static void
 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_comp_ring *rxc;
 	struct vmxnet3_rxcompdesc *rxcd;
 	int idx, eof;
 
 	sc = rxq->vxrxq_sc;
 	rxc = &rxq->vxrxq_comp_ring;
 
 	do {
 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
 		if (rxcd->gen != rxc->vxcr_gen)
 			break;		/* Not expected. */
 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
 
 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
 			rxc->vxcr_next = 0;
 			rxc->vxcr_gen ^= 1;
 		}
 
 		idx = rxcd->rxd_idx;
 		eof = rxcd->eop;
 		if (rxcd->qid < sc->vmx_nrxqueues)
 			rxr = &rxq->vxrxq_cmd_ring[0];
 		else
 			rxr = &rxq->vxrxq_cmd_ring[1];
 		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
 	} while (!eof);
 }
 
 static void
 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
 {
 
 	if (rxcd->ipv4) {
 		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
 		if (rxcd->ipcsum_ok)
 			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 	}
 
 	if (!rxcd->fragment) {
 		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
 			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
 			    CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xFFFF;
 		}
 	}
 }
 
 static void
 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
 {
 	struct vmxnet3_softc *sc;
 	struct ifnet *ifp;
 
 	sc = rxq->vxrxq_sc;
 	ifp = sc->vmx_ifp;
 
 	if (rxcd->error) {
 		rxq->vxrxq_stats.vmrxs_ierrors++;
 		m_freem(m);
 		return;
 	}
 
 #ifdef notyet
 	switch (rxcd->rss_type) {
 	case VMXNET3_RCD_RSS_TYPE_IPV4:
 		m->m_pkthdr.flowid = rxcd->rss_hash;
 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
 		break;
 	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
 		m->m_pkthdr.flowid = rxcd->rss_hash;
 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
 		break;
 	case VMXNET3_RCD_RSS_TYPE_IPV6:
 		m->m_pkthdr.flowid = rxcd->rss_hash;
 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
 		break;
 	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
 		m->m_pkthdr.flowid = rxcd->rss_hash;
 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
 		break;
 	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
 		m->m_pkthdr.flowid = rxq->vxrxq_id;
 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 		break;
 	}
 #else
 	m->m_pkthdr.flowid = rxq->vxrxq_id;
 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 #endif
 
 	if (!rxcd->no_csum)
 		vmxnet3_rx_csum(rxcd, m);
 	if (rxcd->vlan) {
 		m->m_flags |= M_VLANTAG;
 		m->m_pkthdr.ether_vtag = rxcd->vtag;
 	}
 
 	rxq->vxrxq_stats.vmrxs_ipackets++;
 	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
 
 	VMXNET3_RXQ_UNLOCK(rxq);
 	(*ifp->if_input)(ifp, m);
 	VMXNET3_RXQ_LOCK(rxq);
 }
 
 static void
 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
 {
 	struct vmxnet3_softc *sc;
 	struct ifnet *ifp;
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_comp_ring *rxc;
 	struct vmxnet3_rxdesc *rxd;
 	struct vmxnet3_rxcompdesc *rxcd;
 	struct mbuf *m, *m_head, *m_tail;
 	int idx, length;
 
 	sc = rxq->vxrxq_sc;
 	ifp = sc->vmx_ifp;
 	rxc = &rxq->vxrxq_comp_ring;
 
 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	m_head = rxq->vxrxq_mhead;
 	rxq->vxrxq_mhead = NULL;
 	m_tail = rxq->vxrxq_mtail;
 	rxq->vxrxq_mtail = NULL;
 	MPASS(m_head == NULL || m_tail != NULL);
 
 	for (;;) {
 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
 		if (rxcd->gen != rxc->vxcr_gen) {
 			rxq->vxrxq_mhead = m_head;
 			rxq->vxrxq_mtail = m_tail;
 			break;
 		}
 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
 
 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
 			rxc->vxcr_next = 0;
 			rxc->vxcr_gen ^= 1;
 		}
 
 		idx = rxcd->rxd_idx;
 		length = rxcd->len;
 		if (rxcd->qid < sc->vmx_nrxqueues)
 			rxr = &rxq->vxrxq_cmd_ring[0];
 		else
 			rxr = &rxq->vxrxq_cmd_ring[1];
 		rxd = &rxr->vxrxr_rxd[idx];
 
 		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
 		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
 		    __func__, rxcd->qid, idx));
 
 		/*
 		 * The host may skip descriptors. We detect this when this
 		 * descriptor does not match the previous fill index. Catch
 		 * up with the host now.
 		 */
 		if (__predict_false(rxr->vxrxr_fill != idx)) {
 			while (rxr->vxrxr_fill != idx) {
 				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
 				    rxr->vxrxr_gen;
 				vmxnet3_rxr_increment_fill(rxr);
 			}
 		}
 
 		if (rxcd->sop) {
 			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
 			    ("%s: start of frame w/o head buffer", __func__));
 			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
 			    ("%s: start of frame not in ring 0", __func__));
 			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
 			    ("%s: start of frame at unexcepted index %d (%d)",
 			     __func__, idx, sc->vmx_rx_max_chain));
 			KASSERT(m_head == NULL,
 			    ("%s: duplicate start of frame?", __func__));
 
 			if (length == 0) {
 				/* Just ignore this descriptor. */
 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
 				goto nextp;
 			}
 
 			if (vmxnet3_newbuf(sc, rxr) != 0) {
 				rxq->vxrxq_stats.vmrxs_iqdrops++;
 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
 				if (!rxcd->eop)
 					vmxnet3_rxq_discard_chain(rxq);
 				goto nextp;
 			}
 
 			m->m_pkthdr.rcvif = ifp;
 			m->m_pkthdr.len = m->m_len = length;
 			m->m_pkthdr.csum_flags = 0;
 			m_head = m_tail = m;
 
 		} else {
 			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
 			    ("%s: non start of frame w/o body buffer", __func__));
 			KASSERT(m_head != NULL,
 			    ("%s: frame not started?", __func__));
 
 			if (vmxnet3_newbuf(sc, rxr) != 0) {
 				rxq->vxrxq_stats.vmrxs_iqdrops++;
 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
 				if (!rxcd->eop)
 					vmxnet3_rxq_discard_chain(rxq);
 				m_freem(m_head);
 				m_head = m_tail = NULL;
 				goto nextp;
 			}
 
 			m->m_len = length;
 			m_head->m_pkthdr.len += length;
 			m_tail->m_next = m;
 			m_tail = m;
 		}
 
 		if (rxcd->eop) {
 			vmxnet3_rxq_input(rxq, rxcd, m_head);
 			m_head = m_tail = NULL;
 
 			/* Must recheck after dropping the Rx lock. */
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				break;
 		}
 
 nextp:
 		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
 			int qid = rxcd->qid;
 			bus_size_t r;
 
 			idx = (idx + 1) % rxr->vxrxr_ndesc;
 			if (qid >= sc->vmx_nrxqueues) {
 				qid -= sc->vmx_nrxqueues;
 				r = VMXNET3_BAR0_RXH2(qid);
 			} else
 				r = VMXNET3_BAR0_RXH1(qid);
 			vmxnet3_write_bar0(sc, r, idx);
 		}
 	}
 }
 
 static void
 vmxnet3_legacy_intr(void *xsc)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_txqueue *txq;
 
 	sc = xsc;
 	rxq = &sc->vmx_rxq[0];
 	txq = &sc->vmx_txq[0];
 
 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
 			return;
 	}
 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
 		vmxnet3_disable_all_intrs(sc);
 
 	if (sc->vmx_ds->event != 0)
 		vmxnet3_evintr(sc);
 
 	VMXNET3_RXQ_LOCK(rxq);
 	vmxnet3_rxq_eof(rxq);
 	VMXNET3_RXQ_UNLOCK(rxq);
 
 	VMXNET3_TXQ_LOCK(txq);
 	vmxnet3_txq_eof(txq);
 	vmxnet3_txq_start(txq);
 	VMXNET3_TXQ_UNLOCK(txq);
 
 	vmxnet3_enable_all_intrs(sc);
 }
 
 static void
 vmxnet3_txq_intr(void *xtxq)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 
 	txq = xtxq;
 	sc = txq->vxtxq_sc;
 
 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
 		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
 
 	VMXNET3_TXQ_LOCK(txq);
 	vmxnet3_txq_eof(txq);
 	vmxnet3_txq_start(txq);
 	VMXNET3_TXQ_UNLOCK(txq);
 
 	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
 }
 
 static void
 vmxnet3_rxq_intr(void *xrxq)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_rxqueue *rxq;
 
 	rxq = xrxq;
 	sc = rxq->vxrxq_sc;
 
 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
 
 	VMXNET3_RXQ_LOCK(rxq);
 	vmxnet3_rxq_eof(rxq);
 	VMXNET3_RXQ_UNLOCK(rxq);
 
 	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
 }
 
 static void
 vmxnet3_event_intr(void *xsc)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = xsc;
 
 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
 
 	if (sc->vmx_ds->event != 0)
 		vmxnet3_evintr(sc);
 
 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
 }
 
 static void
 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
 {
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_txbuf *txb;
 	int i;
 
 	txr = &txq->vxtxq_cmd_ring;
 
 	for (i = 0; i < txr->vxtxr_ndesc; i++) {
 		txb = &txr->vxtxr_txbuf[i];
 
 		if (txb->vtxb_m == NULL)
 			continue;
 
 		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
 		m_freem(txb->vtxb_m);
 		txb->vtxb_m = NULL;
 	}
 }
 
 static void
 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
 {
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_rxbuf *rxb;
 	int i, j;
 
 	if (rxq->vxrxq_mhead != NULL) {
 		m_freem(rxq->vxrxq_mhead);
 		rxq->vxrxq_mhead = NULL;
 		rxq->vxrxq_mtail = NULL;
 	}
 
 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 
 		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
 			rxb = &rxr->vxrxr_rxbuf[j];
 
 			if (rxb->vrxb_m == NULL)
 				continue;
 
 			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
 			m_freem(rxb->vrxb_m);
 			rxb->vrxb_m = NULL;
 		}
 	}
 }
 
 static void
 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
 {
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_txqueue *txq;
 	int i;
 
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
 		rxq = &sc->vmx_rxq[i];
 		VMXNET3_RXQ_LOCK(rxq);
 		VMXNET3_RXQ_UNLOCK(rxq);
 	}
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		txq = &sc->vmx_txq[i];
 		VMXNET3_TXQ_LOCK(txq);
 		VMXNET3_TXQ_UNLOCK(txq);
 	}
 }
 
 static void
 vmxnet3_stop(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 	int q;
 
 	ifp = sc->vmx_ifp;
 	VMXNET3_CORE_LOCK_ASSERT(sc);
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	sc->vmx_link_active = 0;
 	callout_stop(&sc->vmx_tick);
 
 	/* Disable interrupts. */
 	vmxnet3_disable_all_intrs(sc);
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
 
 	vmxnet3_stop_rendezvous(sc);
 
 	for (q = 0; q < sc->vmx_ntxqueues; q++)
 		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
 	for (q = 0; q < sc->vmx_nrxqueues; q++)
 		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
 
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
 }
 
 static void
 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
 {
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_comp_ring *txc;
 
 	txr = &txq->vxtxq_cmd_ring;
 	txr->vxtxr_head = 0;
 	txr->vxtxr_next = 0;
 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
 	bzero(txr->vxtxr_txd,
 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
 
 	txc = &txq->vxtxq_comp_ring;
 	txc->vxcr_next = 0;
 	txc->vxcr_gen = VMXNET3_INIT_GEN;
 	bzero(txc->vxcr_u.txcd,
 	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
 }
 
 static int
 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
 {
 	struct ifnet *ifp;
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_comp_ring *rxc;
 	int i, populate, idx, frame_size, error;
 
 	ifp = sc->vmx_ifp;
 	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
 	    ifp->if_mtu;
 
 	/*
 	 * If the MTU causes us to exceed what a regular sized cluster can
 	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
 	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
 	 *
 	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
 	 * our life easier. We do not support changing the ring size after
 	 * the attach.
 	 */
 	if (frame_size <= MCLBYTES)
 		sc->vmx_rx_max_chain = 1;
 	else
 		sc->vmx_rx_max_chain = 2;
 
 	/*
 	 * Only populate ring 1 if the configuration will take advantage
 	 * of it. That is either when LRO is enabled or the frame size
 	 * exceeds what ring 0 can contain.
 	 */
 	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
 	    frame_size <= MCLBYTES + MJUMPAGESIZE)
 		populate = 1;
 	else
 		populate = VMXNET3_RXRINGS_PERQ;
 
 	for (i = 0; i < populate; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_fill = 0;
 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
 		bzero(rxr->vxrxr_rxd,
 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
 
 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
 			error = vmxnet3_newbuf(sc, rxr);
 			if (error)
 				return (error);
 		}
 	}
 
 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_fill = 0;
 		rxr->vxrxr_gen = 0;
 		bzero(rxr->vxrxr_rxd,
 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
 	}
 
 	rxc = &rxq->vxrxq_comp_ring;
 	rxc->vxcr_next = 0;
 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
 	bzero(rxc->vxcr_u.rxcd,
 	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
 
 	return (0);
 }
 
 static int
 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	int q, error;
 
 	dev = sc->vmx_dev;
 
 	for (q = 0; q < sc->vmx_ntxqueues; q++)
 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
 
 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
 		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
 		if (error) {
 			device_printf(dev, "cannot populate Rx queue %d\n", q);
 			return (error);
 		}
 	}
 
 	return (0);
 }
 
 static int
 vmxnet3_enable_device(struct vmxnet3_softc *sc)
 {
 	int q;
 
 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
 		device_printf(sc->vmx_dev, "device enable command failed!\n");
 		return (1);
 	}
 
 	/* Reset the Rx queue heads. */
 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vmx_ifp;
 
 	vmxnet3_set_rxfilter(sc);
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
 		    sizeof(sc->vmx_ds->vlan_filter));
 	else
 		bzero(sc->vmx_ds->vlan_filter,
 		    sizeof(sc->vmx_ds->vlan_filter));
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
 }
 
 static int
 vmxnet3_reinit(struct vmxnet3_softc *sc)
 {
 
 	vmxnet3_reinit_interface(sc);
 	vmxnet3_reinit_shared_data(sc);
 
 	if (vmxnet3_reinit_queues(sc) != 0)
 		return (ENXIO);
 
 	if (vmxnet3_enable_device(sc) != 0)
 		return (ENXIO);
 
 	vmxnet3_reinit_rxfilters(sc);
 
 	return (0);
 }
 
 static void
 vmxnet3_init_locked(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vmx_ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	vmxnet3_stop(sc);
 
 	if (vmxnet3_reinit(sc) != 0) {
 		vmxnet3_stop(sc);
 		return;
 	}
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	vmxnet3_link_status(sc);
 
 	vmxnet3_enable_all_intrs(sc);
 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
 }
 
 static void
 vmxnet3_init(void *xsc)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = xsc;
 
 	VMXNET3_CORE_LOCK(sc);
 	vmxnet3_init_locked(sc);
 	VMXNET3_CORE_UNLOCK(sc);
 }
 
 /*
  * BMV: Much of this can go away once we finally have offsets in
  * the mbuf packet header. Bug andre@.
  */
 static int
 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
     int *etype, int *proto, int *start)
 {
 	struct ether_vlan_header *evh;
 	int offset;
 #if defined(INET)
 	struct ip *ip = NULL;
 	struct ip iphdr;
 #endif
 #if defined(INET6)
 	struct ip6_hdr *ip6 = NULL;
 	struct ip6_hdr ip6hdr;
 #endif
 
 	evh = mtod(m, struct ether_vlan_header *);
 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		/* BMV: We should handle nested VLAN tags too. */
 		*etype = ntohs(evh->evl_proto);
 		offset = sizeof(struct ether_vlan_header);
 	} else {
 		*etype = ntohs(evh->evl_encap_proto);
 		offset = sizeof(struct ether_header);
 	}
 
 	switch (*etype) {
 #if defined(INET)
 	case ETHERTYPE_IP:
 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
 			m_copydata(m, offset, sizeof(struct ip),
 			    (caddr_t) &iphdr);
 			ip = &iphdr;
 		} else
 			ip = mtodo(m, offset);
 		*proto = ip->ip_p;
 		*start = offset + (ip->ip_hl << 2);
 		break;
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		if (__predict_false(m->m_len <
 		    offset + sizeof(struct ip6_hdr))) {
 			m_copydata(m, offset, sizeof(struct ip6_hdr),
 			    (caddr_t) &ip6hdr);
 			ip6 = &ip6hdr;
 		} else
 			ip6 = mtodo(m, offset);
 		*proto = -1;
 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
 		/* Assert the network stack sent us a valid packet. */
 		KASSERT(*start > offset,
 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
 		    *start, offset, *proto));
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		struct tcphdr *tcp, tcphdr;
 		uint16_t sum;
 
 		if (__predict_false(*proto != IPPROTO_TCP)) {
 			/* Likely failed to correctly parse the mbuf. */
 			return (EINVAL);
 		}
 
 		txq->vxtxq_stats.vmtxs_tso++;
 
 		switch (*etype) {
 #if defined(INET)
 		case ETHERTYPE_IP:
 			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 			    htons(IPPROTO_TCP));
 			break;
 #endif
 #if defined(INET6)
 		case ETHERTYPE_IPV6:
 			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 			break;
 #endif
 		default:
 			sum = 0;
 			break;
 		}
 
 		if (m->m_len < *start + sizeof(struct tcphdr)) {
 			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
 			    sizeof(uint16_t), (caddr_t) &sum);
 			m_copydata(m, *start, sizeof(struct tcphdr),
 			    (caddr_t) &tcphdr);
 			tcp = &tcphdr;
 		} else {
 			tcp = mtodo(m, *start);
 			tcp->th_sum = sum;
 		}
 
 		/*
 		 * For TSO, the size of the protocol header is also
 		 * included in the descriptor header size.
 		 */
 		*start += (tcp->th_off << 2);
 	} else
 		txq->vxtxq_stats.vmtxs_csum++;
 
 	return (0);
 }
 
 static int
 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
     bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
 {
 	struct vmxnet3_txring *txr;
 	struct mbuf *m;
 	bus_dma_tag_t tag;
 	int error;
 
 	txr = &txq->vxtxq_cmd_ring;
 	m = *m0;
 	tag = txr->vxtxr_txtag;
 
 	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
 	if (error == 0 || error != EFBIG)
 		return (error);
 
 	m = m_defrag(m, M_NOWAIT);
 	if (m != NULL) {
 		*m0 = m;
 		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
 	} else
 		error = ENOBUFS;
 
 	if (error) {
 		m_freem(*m0);
 		*m0 = NULL;
 		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
 	} else
 		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
 
 	return (error);
 }
 
 static void
 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
 {
 	struct vmxnet3_txring *txr;
 
 	txr = &txq->vxtxq_cmd_ring;
 	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
 }
 
 static int
 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_txdesc *txd, *sop;
 	struct mbuf *m;
 	bus_dmamap_t dmap;
 	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
 	int i, gen, nsegs, etype, proto, start, error;
 
 	sc = txq->vxtxq_sc;
 	start = 0;
 	txd = NULL;
 	txr = &txq->vxtxq_cmd_ring;
 	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
 
 	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
 	if (error)
 		return (error);
 
 	m = *m0;
 	M_ASSERTPKTHDR(m);
 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
 	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
 
 	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
 		txq->vxtxq_stats.vmtxs_full++;
 		vmxnet3_txq_unload_mbuf(txq, dmap);
 		return (ENOSPC);
 	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
 		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
 		if (error) {
 			txq->vxtxq_stats.vmtxs_offload_failed++;
 			vmxnet3_txq_unload_mbuf(txq, dmap);
 			m_freem(m);
 			*m0 = NULL;
 			return (error);
 		}
 	}
 
 	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
 	sop = &txr->vxtxr_txd[txr->vxtxr_head];
 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
 
 	for (i = 0; i < nsegs; i++) {
 		txd = &txr->vxtxr_txd[txr->vxtxr_head];
 
 		txd->addr = segs[i].ds_addr;
 		txd->len = segs[i].ds_len;
 		txd->gen = gen;
 		txd->dtype = 0;
 		txd->offload_mode = VMXNET3_OM_NONE;
 		txd->offload_pos = 0;
 		txd->hlen = 0;
 		txd->eop = 0;
 		txd->compreq = 0;
 		txd->vtag_mode = 0;
 		txd->vtag = 0;
 
 		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
 			txr->vxtxr_head = 0;
 			txr->vxtxr_gen ^= 1;
 		}
 		gen = txr->vxtxr_gen;
 	}
 	txd->eop = 1;
 	txd->compreq = 1;
 
 	if (m->m_flags & M_VLANTAG) {
 		sop->vtag_mode = 1;
 		sop->vtag = m->m_pkthdr.ether_vtag;
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		sop->offload_mode = VMXNET3_OM_TSO;
 		sop->hlen = start;
 		sop->offload_pos = m->m_pkthdr.tso_segsz;
 	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
 		sop->offload_mode = VMXNET3_OM_CSUM;
 		sop->hlen = start;
 		sop->offload_pos = start + m->m_pkthdr.csum_data;
 	}
 
 	/* Finally, change the ownership. */
 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
 	sop->gen ^= 1;
 
 	txq->vxtxq_ts->npending += nsegs;
 	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
 		txq->vxtxq_ts->npending = 0;
 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
 		    txr->vxtxr_head);
 	}
 
 	return (0);
 }
 
 #ifdef VMXNET3_LEGACY_TX
 
 static void
 vmxnet3_start_locked(struct ifnet *ifp)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txring *txr;
 	struct mbuf *m_head;
 	int tx, avail;
 
 	sc = ifp->if_softc;
 	txq = &sc->vmx_txq[0];
 	txr = &txq->vxtxq_cmd_ring;
 	tx = 0;
 
 	VMXNET3_TXQ_LOCK_ASSERT(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->vmx_link_active == 0)
 		return;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
 			break;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		/* Assume worse case if this mbuf is the head of a chain. */
 		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			break;
 		}
 
 		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
 			if (m_head != NULL)
 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			break;
 		}
 
 		tx++;
 		ETHER_BPF_MTAP(ifp, m_head);
 	}
 
 	if (tx > 0)
 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
 }
 
 static void
 vmxnet3_start(struct ifnet *ifp)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 
 	sc = ifp->if_softc;
 	txq = &sc->vmx_txq[0];
 
 	VMXNET3_TXQ_LOCK(txq);
 	vmxnet3_start_locked(ifp);
 	VMXNET3_TXQ_UNLOCK(txq);
 }
 
 #else /* !VMXNET3_LEGACY_TX */
 
 static int
 vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txring *txr;
 	struct buf_ring *br;
 	struct ifnet *ifp;
 	int tx, avail, error;
 
 	sc = txq->vxtxq_sc;
 	br = txq->vxtxq_br;
 	ifp = sc->vmx_ifp;
 	txr = &txq->vxtxq_cmd_ring;
 	tx = 0;
 	error = 0;
 
 	VMXNET3_TXQ_LOCK_ASSERT(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->vmx_link_active == 0) {
 		if (m != NULL)
 			error = drbr_enqueue(ifp, br, m);
 		return (error);
 	}
 
 	if (m != NULL) {
 		error = drbr_enqueue(ifp, br, m);
 		if (error)
 			return (error);
 	}
 
 	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
 		m = drbr_peek(ifp, br);
 		if (m == NULL)
 			break;
 
 		/* Assume worse case if this mbuf is the head of a chain. */
 		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
 		if (vmxnet3_txq_encap(txq, &m) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
 				drbr_advance(ifp, br);
 			break;
 		}
 		drbr_advance(ifp, br);
 
 		tx++;
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
 	if (tx > 0)
 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
 
 	return (0);
 }
 
 static int
 vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 	int i, ntxq, error;
 
 	sc = ifp->if_softc;
 	ntxq = sc->vmx_ntxqueues;
 
 	/* check if flowid is set */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % ntxq;
 	else
 		i = curcpu % ntxq;
 
 	txq = &sc->vmx_txq[i];
 
 	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
 		error = vmxnet3_txq_mq_start_locked(txq, m);
 		VMXNET3_TXQ_UNLOCK(txq);
 	} else {
 		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
 		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
 	}
 
 	return (error);
 }
 
 static void
 vmxnet3_txq_tq_deferred(void *xtxq, int pending)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 
 	txq = xtxq;
 	sc = txq->vxtxq_sc;
 
 	VMXNET3_TXQ_LOCK(txq);
 	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
 		vmxnet3_txq_mq_start_locked(txq, NULL);
 	VMXNET3_TXQ_UNLOCK(txq);
 }
 
 #endif /* VMXNET3_LEGACY_TX */
 
 static void
 vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
 {
 	struct vmxnet3_softc *sc;
 	struct ifnet *ifp;
 
 	sc = txq->vxtxq_sc;
 	ifp = sc->vmx_ifp;
 
 #ifdef VMXNET3_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		vmxnet3_start_locked(ifp);
 #else
 	if (!drbr_empty(ifp, txq->vxtxq_br))
 		vmxnet3_txq_mq_start_locked(txq, NULL);
 #endif
 }
 
 static void
 vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
 {
 	struct vmxnet3_txqueue *txq;
 	int i;
 
 	VMXNET3_CORE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		txq = &sc->vmx_txq[i];
 
 		VMXNET3_TXQ_LOCK(txq);
 		vmxnet3_txq_start(txq);
 		VMXNET3_TXQ_UNLOCK(txq);
 	}
 }
 
 static void
 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
 {
 	struct ifnet *ifp;
 	int idx, bit;
 
 	ifp = sc->vmx_ifp;
 	idx = (tag >> 5) & 0x7F;
 	bit = tag & 0x1F;
 
 	if (tag == 0 || tag > 4095)
 		return;
 
 	VMXNET3_CORE_LOCK(sc);
 
 	/* Update our private VLAN bitvector. */
 	if (add)
 		sc->vmx_vlan_filter[idx] |= (1 << bit);
 	else
 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
 		if (add)
 			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
 		else
 			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
 		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
 	}
 
 	VMXNET3_CORE_UNLOCK(sc);
 }
 
 static void
 vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 {
 
 	if (ifp->if_softc == arg)
 		vmxnet3_update_vlan_filter(arg, 1, tag);
 }
 
 static void
 vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 {
 
 	if (ifp->if_softc == arg)
 		vmxnet3_update_vlan_filter(arg, 0, tag);
 }
 
 static void
 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 	struct vmxnet3_driver_shared *ds;
 	struct ifmultiaddr *ifma;
 	u_int mode;
 
 	ifp = sc->vmx_ifp;
 	ds = sc->vmx_ds;
 
 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
 	if (ifp->if_flags & IFF_PROMISC)
 		mode |= VMXNET3_RXMODE_PROMISC;
 	if (ifp->if_flags & IFF_ALLMULTI)
 		mode |= VMXNET3_RXMODE_ALLMULTI;
 	else {
 		int cnt = 0, overflow = 0;
 
 		if_maddr_rlock(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			else if (cnt == VMXNET3_MULTICAST_MAX) {
 				overflow = 1;
 				break;
 			}
 
 			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
 			cnt++;
 		}
 		if_maddr_runlock(ifp);
 
 		if (overflow != 0) {
 			cnt = 0;
 			mode |= VMXNET3_RXMODE_ALLMULTI;
 		} else if (cnt > 0)
 			mode |= VMXNET3_RXMODE_MCAST;
 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
 	}
 
 	ds->rxmode = mode;
 
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
 }
 
 static int
 vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vmx_ifp;
 
 	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
 		return (EINVAL);
 
 	ifp->if_mtu = mtu;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vmxnet3_init_locked(sc);
 	}
 
 	return (0);
 }
 
 static int
 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct vmxnet3_softc *sc;
 	struct ifreq *ifr;
 	int reinit, mask, error;
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifp->if_mtu != ifr->ifr_mtu) {
 			VMXNET3_CORE_LOCK(sc);
 			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
 			VMXNET3_CORE_UNLOCK(sc);
 		}
 		break;
 
 	case SIOCSIFFLAGS:
 		VMXNET3_CORE_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				if ((ifp->if_flags ^ sc->vmx_if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					vmxnet3_set_rxfilter(sc);
 				}
 			} else
 				vmxnet3_init_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				vmxnet3_stop(sc);
 		}
 		sc->vmx_if_flags = ifp->if_flags;
 		VMXNET3_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		VMXNET3_CORE_LOCK(sc);
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			vmxnet3_set_rxfilter(sc);
 		VMXNET3_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
 		VMXNET3_CORE_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
 		if (mask & IFCAP_TXCSUM)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 		if (mask & IFCAP_TXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
 		if (mask & IFCAP_TSO6)
 			ifp->if_capenable ^= IFCAP_TSO6;
 
 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
 		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
 			/* Changing these features requires us to reinit. */
 			reinit = 1;
 
 			if (mask & IFCAP_RXCSUM)
 				ifp->if_capenable ^= IFCAP_RXCSUM;
 			if (mask & IFCAP_RXCSUM_IPV6)
 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 			if (mask & IFCAP_LRO)
 				ifp->if_capenable ^= IFCAP_LRO;
 			if (mask & IFCAP_VLAN_HWTAGGING)
 				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			if (mask & IFCAP_VLAN_HWFILTER)
 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 		} else
 			reinit = 0;
 
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 
 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			vmxnet3_init_locked(sc);
 		}
 
 		VMXNET3_CORE_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
 
 	return (error);
 }
 
 #ifndef VMXNET3_LEGACY_TX
 static void
 vmxnet3_qflush(struct ifnet *ifp)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 	struct mbuf *m;
 	int i;
 
 	sc = ifp->if_softc;
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		txq = &sc->vmx_txq[i];
 
 		VMXNET3_TXQ_LOCK(txq);
 		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
 			m_freem(m);
 		VMXNET3_TXQ_UNLOCK(txq);
 	}
 
 	if_qflush(ifp);
 }
 #endif
 
 static int
 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = txq->vxtxq_sc;
 
 	VMXNET3_TXQ_LOCK(txq);
 	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
 		VMXNET3_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VMXNET3_TXQ_UNLOCK(txq);
 
 	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
 	    txq->vxtxq_id);
 	return (1);
 }
 
 static void
 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
 {
 
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
 }
 
 static uint64_t
 vmxnet3_get_counter(struct ifnet *ifp, ift_counter cnt)
 {
 	struct vmxnet3_softc *sc;
 	uint64_t rv;
 
 	sc = if_getsoftc(ifp);
 	rv = 0;
 
 	/*
 	 * With the exception of if_ierrors, these ifnet statistics are
 	 * only updated in the driver, so just set them to our accumulated
 	 * values. if_ierrors is updated in ether_input() for malformed
 	 * frames that we should have already discarded.
 	 */
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		for (int i = 0; i < sc->vmx_nrxqueues; i++)
 			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ipackets;
 		return (rv);
 	case IFCOUNTER_IQDROPS:
 		for (int i = 0; i < sc->vmx_nrxqueues; i++)
 			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_iqdrops;
 		return (rv);
 	case IFCOUNTER_IERRORS:
 		for (int i = 0; i < sc->vmx_nrxqueues; i++)
 			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ierrors;
 		return (rv);
 	case IFCOUNTER_OPACKETS:
 		for (int i = 0; i < sc->vmx_ntxqueues; i++)
 			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_opackets;
 		return (rv);
 #ifndef VMXNET3_LEGACY_TX
 	case IFCOUNTER_OBYTES:
 		for (int i = 0; i < sc->vmx_ntxqueues; i++)
 			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_obytes;
 		return (rv);
 	case IFCOUNTER_OMCASTS:
 		for (int i = 0; i < sc->vmx_ntxqueues; i++)
 			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_omcasts;
 		return (rv);
 #endif
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 static void
 vmxnet3_tick(void *xsc)
 {
 	struct vmxnet3_softc *sc;
 	struct ifnet *ifp;
 	int i, timedout;
 
 	sc = xsc;
 	ifp = sc->vmx_ifp;
 	timedout = 0;
 
 	VMXNET3_CORE_LOCK_ASSERT(sc);
 
 	vmxnet3_refresh_host_stats(sc);
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++)
 		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
 
 	if (timedout != 0) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vmxnet3_init_locked(sc);
 	} else
 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
 }
 
 static int
 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
 {
 	uint32_t status;
 
 	/* Also update the link speed while here. */
 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
 	sc->vmx_link_speed = status >> 16;
 	return !!(status & 0x1);
 }
 
 static void
 vmxnet3_link_status(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 	int link;
 
 	ifp = sc->vmx_ifp;
 	link = vmxnet3_link_is_up(sc);
 
 	if (link != 0 && sc->vmx_link_active == 0) {
 		sc->vmx_link_active = 1;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (link == 0 && sc->vmx_link_active != 0) {
 		sc->vmx_link_active = 0;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
 static void
 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = ifp->if_softc;
 
 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
 	ifmr->ifm_status = IFM_AVALID;
 
 	VMXNET3_CORE_LOCK(sc);
 	if (vmxnet3_link_is_up(sc) != 0)
 		ifmr->ifm_status |= IFM_ACTIVE;
 	else
 		ifmr->ifm_status |= IFM_NONE;
 	VMXNET3_CORE_UNLOCK(sc);
 }
 
 static int
 vmxnet3_media_change(struct ifnet *ifp)
 {
 
 	/* Ignore. */
 	return (0);
 }
 
 static void
 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
 {
 	uint32_t ml, mh;
 
 	ml  = sc->vmx_lladdr[0];
 	ml |= sc->vmx_lladdr[1] << 8;
 	ml |= sc->vmx_lladdr[2] << 16;
 	ml |= sc->vmx_lladdr[3] << 24;
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
 
 	mh  = sc->vmx_lladdr[4];
 	mh |= sc->vmx_lladdr[5] << 8;
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
 }
 
 static void
 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
 {
 	uint32_t ml, mh;
 
 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
 
 	sc->vmx_lladdr[0] = ml;
 	sc->vmx_lladdr[1] = ml >> 8;
 	sc->vmx_lladdr[2] = ml >> 16;
 	sc->vmx_lladdr[3] = ml >> 24;
 	sc->vmx_lladdr[4] = mh;
 	sc->vmx_lladdr[5] = mh >> 8;
 }
 
 static void
 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	struct sysctl_oid *node, *txsnode;
 	struct sysctl_oid_list *list, *txslist;
 	struct vmxnet3_txq_stats *stats;
 	struct UPT1_TxStats *txstats;
 	char namebuf[16];
 
 	stats = &txq->vxtxq_stats;
 	txstats = &txq->vxtxq_ts->stats;
 
 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
 	    NULL, "Transmit Queue");
 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
 	    &stats->vmtxs_opackets, "Transmit packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
 	    &stats->vmtxs_obytes, "Transmit bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
 	    &stats->vmtxs_omcasts, "Transmit multicasts");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 	    &stats->vmtxs_csum, "Transmit checksum offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
 	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
 	    &stats->vmtxs_full, "Transmit ring full");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
 	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
 
 	/*
 	 * Add statistics reported by the host. These are updated once
 	 * per second.
 	 */
 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
 	    NULL, "Host Statistics");
 	txslist = SYSCTL_CHILDREN(txsnode);
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
 	    &txstats->TSO_packets, "TSO packets");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
 	    &txstats->TSO_bytes, "TSO bytes");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
 	    &txstats->ucast_packets, "Unicast packets");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
 	    &txstats->ucast_bytes, "Unicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
 	    &txstats->mcast_packets, "Multicast packets");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
 	    &txstats->mcast_bytes, "Multicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
 	    &txstats->error, "Errors");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
 	    &txstats->discard, "Discards");
 }
 
 static void
 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	struct sysctl_oid *node, *rxsnode;
 	struct sysctl_oid_list *list, *rxslist;
 	struct vmxnet3_rxq_stats *stats;
 	struct UPT1_RxStats *rxstats;
 	char namebuf[16];
 
 	stats = &rxq->vxrxq_stats;
 	rxstats = &rxq->vxrxq_rs->stats;
 
 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
 	    NULL, "Receive Queue");
 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
 	    &stats->vmrxs_ipackets, "Receive packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
 	    &stats->vmrxs_ibytes, "Receive bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
 	    &stats->vmrxs_iqdrops, "Receive drops");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
 	    &stats->vmrxs_ierrors, "Receive errors");
 
 	/*
 	 * Add statistics reported by the host. These are updated once
 	 * per second.
 	 */
 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
 	    NULL, "Host Statistics");
 	rxslist = SYSCTL_CHILDREN(rxsnode);
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
 	    &rxstats->LRO_packets, "LRO packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
 	    &rxstats->LRO_bytes, "LRO bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
 	    &rxstats->ucast_packets, "Unicast packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
 	    &rxstats->ucast_bytes, "Unicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
 	    &rxstats->mcast_packets, "Multicast packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
 	    &rxstats->mcast_bytes, "Multicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
 	    &rxstats->bcast_packets, "Broadcast packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
 	    &rxstats->bcast_bytes, "Broadcast bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
 	    &rxstats->nobuffer, "No buffer");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
 	    &rxstats->error, "Errors");
 }
 
 static void
 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	int i;
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
 
 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
 		    "debug", CTLFLAG_RD, NULL, "");
 		list = SYSCTL_CHILDREN(node);
 
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
 		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
 	}
 
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
 
 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
 		    "debug", CTLFLAG_RD, NULL, "");
 		list = SYSCTL_CHILDREN(node);
 
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
 		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
 	}
 }
 
 static void
 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	int i;
 
 	for (i = 0; i < sc->vmx_ntxqueues; i++)
 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
 	for (i = 0; i < sc->vmx_nrxqueues; i++)
 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
 
 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
 }
 
 static void
 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct vmxnet3_statistics *stats;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 
 	dev = sc->vmx_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
 	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
 	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
 	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
 	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
 
 	stats = &sc->vmx_stats;
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
 	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
 	    &stats->vmst_defrag_failed, 0,
 	    "Tx mbuf dropped because defrag failed");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
 	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
 	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
 
 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
 }
 
 static void
 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
 {
 
 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
 }
 
 static uint32_t
 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
 {
 
 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
 }
 
 static void
 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
 {
 
 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
 }
 
 static void
 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
 {
 
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
 }
 
 static uint32_t
 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
 {
 
 	vmxnet3_write_cmd(sc, cmd);
 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
 }
 
 static void
 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
 {
 
 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
 }
 
 static void
 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
 {
 
 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
 }
 
 static void
 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
 {
 	int i;
 
 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
 	for (i = 0; i < sc->vmx_nintrs; i++)
 		vmxnet3_enable_intr(sc, i);
 }
 
 static void
 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
 {
 	int i;
 
 	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
 	for (i = 0; i < sc->vmx_nintrs; i++)
 		vmxnet3_disable_intr(sc, i);
 }
 
 static void
 vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	bus_addr_t *baddr = arg;
 
 	if (error == 0)
 		*baddr = segs->ds_addr;
 }
 
 static int
 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
     struct vmxnet3_dma_alloc *dma)
 {
 	device_t dev;
 	int error;
 
 	dev = sc->vmx_dev;
 	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev),
 	    align, 0,		/* alignment, bounds */
 	    BUS_SPACE_MAXADDR,	/* lowaddr */
 	    BUS_SPACE_MAXADDR,	/* highaddr */
 	    NULL, NULL,		/* filter, filterarg */
 	    size,		/* maxsize */
 	    1,			/* nsegments */
 	    size,		/* maxsegsize */
 	    BUS_DMA_ALLOCNOW,	/* flags */
 	    NULL,		/* lockfunc */
 	    NULL,		/* lockfuncarg */
 	    &dma->dma_tag);
 	if (error) {
 		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
 		goto fail;
 	}
 
 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
 	if (error) {
 		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
 		goto fail;
 	}
 
 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
 	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
 	if (error) {
 		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
 		goto fail;
 	}
 
 	dma->dma_size = size;
 
 fail:
 	if (error)
 		vmxnet3_dma_free(sc, dma);
 
 	return (error);
 }
 
 static void
 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
 {
 
 	if (dma->dma_tag != NULL) {
 		if (dma->dma_paddr != 0) {
 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 		}
 
 		if (dma->dma_vaddr != NULL) {
 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
 			    dma->dma_map);
 		}
 
 		bus_dma_tag_destroy(dma->dma_tag);
 	}
 	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
 }
 
 static int
 vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
 {
 	char path[64];
 
 	snprintf(path, sizeof(path),
 	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
 	TUNABLE_INT_FETCH(path, &def);
 
 	return (def);
 }
 
 /*
  * Since this is a purely paravirtualized device, we do not have
  * to worry about DMA coherency. But at times, we must make sure
  * both the compiler and CPU do not reorder memory operations.
  */
 static inline void
 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
 {
 
 	switch (type) {
 	case VMXNET3_BARRIER_RD:
 		rmb();
 		break;
 	case VMXNET3_BARRIER_WR:
 		wmb();
 		break;
 	case VMXNET3_BARRIER_RDWR:
 		mb();
 		break;
 	default:
 		panic("%s: bad barrier type %d", __func__, type);
 	}
 }
Index: projects/powernv/dev/wpi/if_wpi.c
===================================================================
--- projects/powernv/dev/wpi/if_wpi.c	(revision 290990)
+++ projects/powernv/dev/wpi/if_wpi.c	(revision 290991)
@@ -1,5644 +1,5644 @@
 /*-
  * Copyright (c) 2006,2007
  *	Damien Bergamini <damien.bergamini@free.fr>
  *	Benjamin Close <Benjamin.Close@clearchain.com>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for Intel PRO/Wireless 3945ABG 802.11 network adapters.
  *
  * The 3945ABG network adapter doesn't use traditional hardware as
  * many other adaptors do. Instead at run time the eeprom is set into a known
  * state and told to load boot firmware. The boot firmware loads an init and a
  * main  binary firmware image into SRAM on the card via DMA.
  * Once the firmware is loaded, the driver/hw then
  * communicate by way of circular dma rings via the SRAM to the firmware.
  *
  * There is 6 memory rings. 1 command ring, 1 rx data ring & 4 tx data rings.
  * The 4 tx data rings allow for prioritization QoS.
  *
  * The rx data ring consists of 32 dma buffers. Two registers are used to
  * indicate where in the ring the driver and the firmware are up to. The
  * driver sets the initial read index (reg1) and the initial write index (reg2),
  * the firmware updates the read index (reg1) on rx of a packet and fires an
  * interrupt. The driver then processes the buffers starting at reg1 indicating
  * to the firmware which buffers have been accessed by updating reg2. At the
  * same time allocating new memory for the processed buffer.
  *
  * A similar thing happens with the tx rings. The difference is the firmware
  * stop processing buffers once the queue is full and until confirmation
  * of a successful transmition (tx_done) has occurred.
  *
  * The command ring operates in the same manner as the tx queues.
  *
  * All communication direct to the card (ie eeprom) is classed as Stage1
  * communication
  *
  * All communication via the firmware to the card is classed as State2.
  * The firmware consists of 2 parts. A bootstrap firmware and a runtime
  * firmware. The bootstrap firmware and runtime firmware are loaded
  * from host memory via dma to the card then told to execute. From this point
  * on the majority of communications between the driver and the card goes
  * via the firmware.
  */
 
 #include "opt_wlan.h"
 #include "opt_wpi.h"
 
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/linker.h>
 #include <sys/firmware.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_radiotap.h>
 #include <net80211/ieee80211_regdomain.h>
 #include <net80211/ieee80211_ratectl.h>
 
 #include <dev/wpi/if_wpireg.h>
 #include <dev/wpi/if_wpivar.h>
 #include <dev/wpi/if_wpi_debug.h>
 
 struct wpi_ident {
 	uint16_t	vendor;
 	uint16_t	device;
 	uint16_t	subdevice;
 	const char	*name;
 };
 
 static const struct wpi_ident wpi_ident_table[] = {
 	/* The below entries support ABG regardless of the subid */
 	{ 0x8086, 0x4222,    0x0, "Intel(R) PRO/Wireless 3945ABG" },
 	{ 0x8086, 0x4227,    0x0, "Intel(R) PRO/Wireless 3945ABG" },
 	/* The below entries only support BG */
 	{ 0x8086, 0x4222, 0x1005, "Intel(R) PRO/Wireless 3945BG"  },
 	{ 0x8086, 0x4222, 0x1034, "Intel(R) PRO/Wireless 3945BG"  },
 	{ 0x8086, 0x4227, 0x1014, "Intel(R) PRO/Wireless 3945BG"  },
 	{ 0x8086, 0x4222, 0x1044, "Intel(R) PRO/Wireless 3945BG"  },
 	{ 0, 0, 0, NULL }
 };
 
 static int	wpi_probe(device_t);
 static int	wpi_attach(device_t);
 static void	wpi_radiotap_attach(struct wpi_softc *);
 static void	wpi_sysctlattach(struct wpi_softc *);
 static void	wpi_init_beacon(struct wpi_vap *);
 static struct ieee80211vap *wpi_vap_create(struct ieee80211com *,
 		    const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
 		    const uint8_t [IEEE80211_ADDR_LEN],
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void	wpi_vap_delete(struct ieee80211vap *);
 static int	wpi_detach(device_t);
 static int	wpi_shutdown(device_t);
 static int	wpi_suspend(device_t);
 static int	wpi_resume(device_t);
 static int	wpi_nic_lock(struct wpi_softc *);
 static int	wpi_read_prom_data(struct wpi_softc *, uint32_t, void *, int);
 static void	wpi_dma_map_addr(void *, bus_dma_segment_t *, int, int);
 static int	wpi_dma_contig_alloc(struct wpi_softc *, struct wpi_dma_info *,
 		    void **, bus_size_t, bus_size_t);
 static void	wpi_dma_contig_free(struct wpi_dma_info *);
 static int	wpi_alloc_shared(struct wpi_softc *);
 static void	wpi_free_shared(struct wpi_softc *);
 static int	wpi_alloc_fwmem(struct wpi_softc *);
 static void	wpi_free_fwmem(struct wpi_softc *);
 static int	wpi_alloc_rx_ring(struct wpi_softc *);
 static void	wpi_update_rx_ring(struct wpi_softc *);
 static void	wpi_update_rx_ring_ps(struct wpi_softc *);
 static void	wpi_reset_rx_ring(struct wpi_softc *);
 static void	wpi_free_rx_ring(struct wpi_softc *);
 static int	wpi_alloc_tx_ring(struct wpi_softc *, struct wpi_tx_ring *,
 		    uint8_t);
 static void	wpi_update_tx_ring(struct wpi_softc *, struct wpi_tx_ring *);
 static void	wpi_update_tx_ring_ps(struct wpi_softc *,
 		    struct wpi_tx_ring *);
 static void	wpi_reset_tx_ring(struct wpi_softc *, struct wpi_tx_ring *);
 static void	wpi_free_tx_ring(struct wpi_softc *, struct wpi_tx_ring *);
 static int	wpi_read_eeprom(struct wpi_softc *,
 		    uint8_t macaddr[IEEE80211_ADDR_LEN]);
 static uint32_t	wpi_eeprom_channel_flags(struct wpi_eeprom_chan *);
 static void	wpi_read_eeprom_band(struct wpi_softc *, uint8_t);
 static int	wpi_read_eeprom_channels(struct wpi_softc *, uint8_t);
 static struct wpi_eeprom_chan *wpi_find_eeprom_channel(struct wpi_softc *,
 		    struct ieee80211_channel *);
 static int	wpi_setregdomain(struct ieee80211com *,
 		    struct ieee80211_regdomain *, int,
 		    struct ieee80211_channel[]);
 static int	wpi_read_eeprom_group(struct wpi_softc *, uint8_t);
 static struct ieee80211_node *wpi_node_alloc(struct ieee80211vap *,
 		    const uint8_t mac[IEEE80211_ADDR_LEN]);
 static void	wpi_node_free(struct ieee80211_node *);
 static void	wpi_ibss_recv_mgmt(struct ieee80211_node *, struct mbuf *, int,
 		    const struct ieee80211_rx_stats *,
 		    int, int);
 static void	wpi_restore_node(void *, struct ieee80211_node *);
 static void	wpi_restore_node_table(struct wpi_softc *, struct wpi_vap *);
 static int	wpi_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static void	wpi_calib_timeout(void *);
 static void	wpi_rx_done(struct wpi_softc *, struct wpi_rx_desc *,
 		    struct wpi_rx_data *);
 static void	wpi_rx_statistics(struct wpi_softc *, struct wpi_rx_desc *,
 		    struct wpi_rx_data *);
 static void	wpi_tx_done(struct wpi_softc *, struct wpi_rx_desc *);
 static void	wpi_cmd_done(struct wpi_softc *, struct wpi_rx_desc *);
 static void	wpi_notif_intr(struct wpi_softc *);
 static void	wpi_wakeup_intr(struct wpi_softc *);
 #ifdef WPI_DEBUG
 static void	wpi_debug_registers(struct wpi_softc *);
 #endif
 static void	wpi_fatal_intr(struct wpi_softc *);
 static void	wpi_intr(void *);
 static void	wpi_free_txfrags(struct wpi_softc *, uint16_t);
 static int	wpi_cmd2(struct wpi_softc *, struct wpi_buf *);
 static int	wpi_tx_data(struct wpi_softc *, struct mbuf *,
 		    struct ieee80211_node *);
 static int	wpi_tx_data_raw(struct wpi_softc *, struct mbuf *,
 		    struct ieee80211_node *,
 		    const struct ieee80211_bpf_params *);
 static int	wpi_raw_xmit(struct ieee80211_node *, struct mbuf *,
 		    const struct ieee80211_bpf_params *);
 static int	wpi_transmit(struct ieee80211com *, struct mbuf *);
 static void	wpi_watchdog_rfkill(void *);
 static void	wpi_scan_timeout(void *);
 static void	wpi_tx_timeout(void *);
 static void	wpi_parent(struct ieee80211com *);
 static int	wpi_cmd(struct wpi_softc *, uint8_t, const void *, uint16_t,
 		    int);
 static int	wpi_mrr_setup(struct wpi_softc *);
 static int	wpi_add_node(struct wpi_softc *, struct ieee80211_node *);
 static int	wpi_add_broadcast_node(struct wpi_softc *, int);
 static int	wpi_add_ibss_node(struct wpi_softc *, struct ieee80211_node *);
 static void	wpi_del_node(struct wpi_softc *, struct ieee80211_node *);
 static int	wpi_updateedca(struct ieee80211com *);
 static void	wpi_set_promisc(struct wpi_softc *);
 static void	wpi_update_promisc(struct ieee80211com *);
 static void	wpi_update_mcast(struct ieee80211com *);
 static void	wpi_set_led(struct wpi_softc *, uint8_t, uint8_t, uint8_t);
 static int	wpi_set_timing(struct wpi_softc *, struct ieee80211_node *);
 static void	wpi_power_calibration(struct wpi_softc *);
 static int	wpi_set_txpower(struct wpi_softc *, int);
 static int	wpi_get_power_index(struct wpi_softc *,
 		    struct wpi_power_group *, uint8_t, int, int);
 static int	wpi_set_pslevel(struct wpi_softc *, uint8_t, int, int);
 static int	wpi_send_btcoex(struct wpi_softc *);
 static int	wpi_send_rxon(struct wpi_softc *, int, int);
 static int	wpi_config(struct wpi_softc *);
 static uint16_t	wpi_get_active_dwell_time(struct wpi_softc *,
 		    struct ieee80211_channel *, uint8_t);
 static uint16_t	wpi_limit_dwell(struct wpi_softc *, uint16_t);
 static uint16_t	wpi_get_passive_dwell_time(struct wpi_softc *,
 		    struct ieee80211_channel *);
 static uint32_t	wpi_get_scan_pause_time(uint32_t, uint16_t);
 static int	wpi_scan(struct wpi_softc *, struct ieee80211_channel *);
 static int	wpi_auth(struct wpi_softc *, struct ieee80211vap *);
 static int	wpi_config_beacon(struct wpi_vap *);
 static int	wpi_setup_beacon(struct wpi_softc *, struct ieee80211_node *);
 static void	wpi_update_beacon(struct ieee80211vap *, int);
 static void	wpi_newassoc(struct ieee80211_node *, int);
 static int	wpi_run(struct wpi_softc *, struct ieee80211vap *);
 static int	wpi_load_key(struct ieee80211_node *,
 		    const struct ieee80211_key *);
 static void	wpi_load_key_cb(void *, struct ieee80211_node *);
 static int	wpi_set_global_keys(struct ieee80211_node *);
 static int	wpi_del_key(struct ieee80211_node *,
 		    const struct ieee80211_key *);
 static void	wpi_del_key_cb(void *, struct ieee80211_node *);
 static int	wpi_process_key(struct ieee80211vap *,
 		    const struct ieee80211_key *, int);
 static int	wpi_key_set(struct ieee80211vap *,
 		    const struct ieee80211_key *);
 static int	wpi_key_delete(struct ieee80211vap *,
 		    const struct ieee80211_key *);
 static int	wpi_post_alive(struct wpi_softc *);
 static int	wpi_load_bootcode(struct wpi_softc *, const uint8_t *,
 		    uint32_t);
 static int	wpi_load_firmware(struct wpi_softc *);
 static int	wpi_read_firmware(struct wpi_softc *);
 static void	wpi_unload_firmware(struct wpi_softc *);
 static int	wpi_clock_wait(struct wpi_softc *);
 static int	wpi_apm_init(struct wpi_softc *);
 static void	wpi_apm_stop_master(struct wpi_softc *);
 static void	wpi_apm_stop(struct wpi_softc *);
 static void	wpi_nic_config(struct wpi_softc *);
 static int	wpi_hw_init(struct wpi_softc *);
 static void	wpi_hw_stop(struct wpi_softc *);
 static void	wpi_radio_on(void *, int);
 static void	wpi_radio_off(void *, int);
 static int	wpi_init(struct wpi_softc *);
 static void	wpi_stop_locked(struct wpi_softc *);
 static void	wpi_stop(struct wpi_softc *);
 static void	wpi_scan_start(struct ieee80211com *);
 static void	wpi_scan_end(struct ieee80211com *);
 static void	wpi_set_channel(struct ieee80211com *);
 static void	wpi_scan_curchan(struct ieee80211_scan_state *, unsigned long);
 static void	wpi_scan_mindwell(struct ieee80211_scan_state *);
 static void	wpi_hw_reset(void *, int);
 
 static device_method_t wpi_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		wpi_probe),
 	DEVMETHOD(device_attach,	wpi_attach),
 	DEVMETHOD(device_detach,	wpi_detach),
 	DEVMETHOD(device_shutdown,	wpi_shutdown),
 	DEVMETHOD(device_suspend,	wpi_suspend),
 	DEVMETHOD(device_resume,	wpi_resume),
 
 	DEVMETHOD_END
 };
 
 static driver_t wpi_driver = {
 	"wpi",
 	wpi_methods,
 	sizeof (struct wpi_softc)
 };
 static devclass_t wpi_devclass;
 
 DRIVER_MODULE(wpi, pci, wpi_driver, wpi_devclass, NULL, NULL);
 
 MODULE_VERSION(wpi, 1);
 
 MODULE_DEPEND(wpi, pci,  1, 1, 1);
 MODULE_DEPEND(wpi, wlan, 1, 1, 1);
 MODULE_DEPEND(wpi, firmware, 1, 1, 1);
 
 static int
 wpi_probe(device_t dev)
 {
 	const struct wpi_ident *ident;
 
 	for (ident = wpi_ident_table; ident->name != NULL; ident++) {
 		if (pci_get_vendor(dev) == ident->vendor &&
 		    pci_get_device(dev) == ident->device) {
 			device_set_desc(dev, ident->name);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 	return ENXIO;
 }
 
 static int
 wpi_attach(device_t dev)
 {
 	struct wpi_softc *sc = (struct wpi_softc *)device_get_softc(dev);
 	struct ieee80211com *ic;
 	uint8_t i;
 	int error, rid;
 #ifdef WPI_DEBUG
 	int supportsa = 1;
 	const struct wpi_ident *ident;
 #endif
 
 	sc->sc_dev = dev;
 
 #ifdef WPI_DEBUG
 	error = resource_int_value(device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev), "debug", &(sc->sc_debug));
 	if (error != 0)
 		sc->sc_debug = 0;
 #else
 	sc->sc_debug = 0;
 #endif
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/*
 	 * Get the offset of the PCI Express Capability Structure in PCI
 	 * Configuration Space.
 	 */
 	error = pci_find_cap(dev, PCIY_EXPRESS, &sc->sc_cap_off);
 	if (error != 0) {
 		device_printf(dev, "PCIe capability structure not found!\n");
 		return error;
 	}
 
 	/*
 	 * Some card's only support 802.11b/g not a, check to see if
 	 * this is one such card. A 0x0 in the subdevice table indicates
 	 * the entire subdevice range is to be ignored.
 	 */
 #ifdef WPI_DEBUG
 	for (ident = wpi_ident_table; ident->name != NULL; ident++) {
 		if (ident->subdevice &&
 		    pci_get_subdevice(dev) == ident->subdevice) {
 		    supportsa = 0;
 		    break;
 		}
 	}
 #endif
 
 	/* Clear device-specific "PCI retry timeout" register (41h). */
 	pci_write_config(dev, 0x41, 0, 1);
 
 	/* Enable bus-mastering. */
 	pci_enable_busmaster(dev);
 
 	rid = PCIR_BAR(0);
 	sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->mem == NULL) {
 		device_printf(dev, "can't map mem space\n");
 		return ENOMEM;
 	}
 	sc->sc_st = rman_get_bustag(sc->mem);
 	sc->sc_sh = rman_get_bushandle(sc->mem);
 
 	rid = 1;
 	if (pci_alloc_msi(dev, &rid) == 0)
 		rid = 1;
 	else
 		rid = 0;
 	/* Install interrupt handler. */
 	sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE |
 	    (rid != 0 ? 0 : RF_SHAREABLE));
 	if (sc->irq == NULL) {
 		device_printf(dev, "can't map interrupt\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	WPI_LOCK_INIT(sc);
 	WPI_TX_LOCK_INIT(sc);
 	WPI_RXON_LOCK_INIT(sc);
 	WPI_NT_LOCK_INIT(sc);
 	WPI_TXQ_LOCK_INIT(sc);
 	WPI_TXQ_STATE_LOCK_INIT(sc);
 
 	/* Allocate DMA memory for firmware transfers. */
 	if ((error = wpi_alloc_fwmem(sc)) != 0) {
 		device_printf(dev,
 		    "could not allocate memory for firmware, error %d\n",
 		    error);
 		goto fail;
 	}
 
 	/* Allocate shared page. */
 	if ((error = wpi_alloc_shared(sc)) != 0) {
 		device_printf(dev, "could not allocate shared page\n");
 		goto fail;
 	}
 
 	/* Allocate TX rings - 4 for QoS purposes, 1 for commands. */
 	for (i = 0; i < WPI_DRV_NTXQUEUES; i++) {
 		if ((error = wpi_alloc_tx_ring(sc, &sc->txq[i], i)) != 0) {
 			device_printf(dev,
 			    "could not allocate TX ring %d, error %d\n", i,
 			    error);
 			goto fail;
 		}
 	}
 
 	/* Allocate RX ring. */
 	if ((error = wpi_alloc_rx_ring(sc)) != 0) {
 		device_printf(dev, "could not allocate RX ring, error %d\n",
 		    error);
 		goto fail;
 	}
 
 	/* Clear pending interrupts. */
 	WPI_WRITE(sc, WPI_INT, 0xffffffff);
 
 	ic = &sc->sc_ic;
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(dev);
 	ic->ic_phytype = IEEE80211_T_OFDM;	/* not only, but not used */
 	ic->ic_opmode = IEEE80211_M_STA;	/* default to BSS mode */
 
 	/* Set device capabilities. */
 	ic->ic_caps =
 		  IEEE80211_C_STA		/* station mode supported */
 		| IEEE80211_C_IBSS		/* IBSS mode supported */
 		| IEEE80211_C_HOSTAP		/* Host access point mode */
 		| IEEE80211_C_MONITOR		/* monitor mode supported */
 		| IEEE80211_C_AHDEMO		/* adhoc demo mode */
 		| IEEE80211_C_BGSCAN		/* capable of bg scanning */
 		| IEEE80211_C_TXFRAG		/* handle tx frags */
 		| IEEE80211_C_TXPMGT		/* tx power management */
 		| IEEE80211_C_SHSLOT		/* short slot time supported */
 		| IEEE80211_C_WPA		/* 802.11i */
 		| IEEE80211_C_SHPREAMBLE	/* short preamble supported */
 		| IEEE80211_C_WME		/* 802.11e */
 		| IEEE80211_C_PMGT		/* Station-side power mgmt */
 		;
 
 	ic->ic_cryptocaps =
 		  IEEE80211_CRYPTO_AES_CCM;
 
 	/*
 	 * Read in the eeprom and also setup the channels for
 	 * net80211. We don't set the rates as net80211 does this for us
 	 */
 	if ((error = wpi_read_eeprom(sc, ic->ic_macaddr)) != 0) {
 		device_printf(dev, "could not read EEPROM, error %d\n",
 		    error);
 		goto fail;
 	}
 
 #ifdef WPI_DEBUG
 	if (bootverbose) {
 		device_printf(sc->sc_dev, "Regulatory Domain: %.4s\n",
 		    sc->domain);
 		device_printf(sc->sc_dev, "Hardware Type: %c\n",
 		    sc->type > 1 ? 'B': '?');
 		device_printf(sc->sc_dev, "Hardware Revision: %c\n",
 		    ((sc->rev & 0xf0) == 0xd0) ? 'D': '?');
 		device_printf(sc->sc_dev, "SKU %s support 802.11a\n",
 		    supportsa ? "does" : "does not");
 
 		/* XXX hw_config uses the PCIDEV for the Hardware rev. Must
 		   check what sc->rev really represents - benjsc 20070615 */
 	}
 #endif
 
 	ieee80211_ifattach(ic);
 	ic->ic_vap_create = wpi_vap_create;
 	ic->ic_vap_delete = wpi_vap_delete;
 	ic->ic_parent = wpi_parent;
 	ic->ic_raw_xmit = wpi_raw_xmit;
 	ic->ic_transmit = wpi_transmit;
 	ic->ic_node_alloc = wpi_node_alloc;
 	sc->sc_node_free = ic->ic_node_free;
 	ic->ic_node_free = wpi_node_free;
 	ic->ic_wme.wme_update = wpi_updateedca;
 	ic->ic_update_promisc = wpi_update_promisc;
 	ic->ic_update_mcast = wpi_update_mcast;
 	ic->ic_newassoc = wpi_newassoc;
 	ic->ic_scan_start = wpi_scan_start;
 	ic->ic_scan_end = wpi_scan_end;
 	ic->ic_set_channel = wpi_set_channel;
 	ic->ic_scan_curchan = wpi_scan_curchan;
 	ic->ic_scan_mindwell = wpi_scan_mindwell;
 	ic->ic_setregdomain = wpi_setregdomain;
 
 	sc->sc_update_rx_ring = wpi_update_rx_ring;
 	sc->sc_update_tx_ring = wpi_update_tx_ring;
 
 	wpi_radiotap_attach(sc);
 
 	callout_init_mtx(&sc->calib_to, &sc->rxon_mtx, 0);
 	callout_init_mtx(&sc->scan_timeout, &sc->rxon_mtx, 0);
 	callout_init_mtx(&sc->tx_timeout, &sc->txq_state_mtx, 0);
 	callout_init_mtx(&sc->watchdog_rfkill, &sc->sc_mtx, 0);
 	TASK_INIT(&sc->sc_reinittask, 0, wpi_hw_reset, sc);
 	TASK_INIT(&sc->sc_radiooff_task, 0, wpi_radio_off, sc);
 	TASK_INIT(&sc->sc_radioon_task, 0, wpi_radio_on, sc);
 
 	sc->sc_tq = taskqueue_create("wpi_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &sc->sc_tq);
 	error = taskqueue_start_threads(&sc->sc_tq, 1, 0, "wpi_taskq");
 	if (error != 0) {
 		device_printf(dev, "can't start threads, error %d\n", error);
 		goto fail;
 	}
 
 	wpi_sysctlattach(sc);
 
 	/*
 	 * Hook our interrupt after all initialization is complete.
 	 */
 	error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE,
 	    NULL, wpi_intr, sc, &sc->sc_ih);
 	if (error != 0) {
 		device_printf(dev, "can't establish interrupt, error %d\n",
 		    error);
 		goto fail;
 	}
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 
 #ifdef WPI_DEBUG
 	if (sc->sc_debug & WPI_DEBUG_HW)
 		ieee80211_announce_channels(ic);
 #endif
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 	return 0;
 
 fail:	wpi_detach(dev);
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 	return error;
 }
 
 /*
  * Attach the interface to 802.11 radiotap.
  */
 static void
 wpi_radiotap_attach(struct wpi_softc *sc)
 {
 	struct wpi_rx_radiotap_header *rxtap = &sc->sc_rxtap;
 	struct wpi_tx_radiotap_header *txtap = &sc->sc_txtap;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 	ieee80211_radiotap_attach(&sc->sc_ic,
 	    &txtap->wt_ihdr, sizeof(*txtap), WPI_TX_RADIOTAP_PRESENT,
 	    &rxtap->wr_ihdr, sizeof(*rxtap), WPI_RX_RADIOTAP_PRESENT);
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 }
 
 static void
 wpi_sysctlattach(struct wpi_softc *sc)
 {
 #ifdef WPI_DEBUG
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev);
 
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 	    "debug", CTLFLAG_RW, &sc->sc_debug, sc->sc_debug,
 		"control debugging printfs");
 #endif
 }
 
 static void
 wpi_init_beacon(struct wpi_vap *wvp)
 {
 	struct wpi_buf *bcn = &wvp->wv_bcbuf;
 	struct wpi_cmd_beacon *cmd = (struct wpi_cmd_beacon *)&bcn->data;
 
 	cmd->id = WPI_ID_BROADCAST;
 	cmd->ofdm_mask = 0xff;
 	cmd->cck_mask = 0x0f;
 	cmd->lifetime = htole32(WPI_LIFETIME_INFINITE);
 
 	/*
 	 * XXX WPI_TX_AUTO_SEQ seems to be ignored - workaround this issue
 	 * XXX by using WPI_TX_NEED_ACK instead (with some side effects).
 	 */
 	cmd->flags = htole32(WPI_TX_NEED_ACK | WPI_TX_INSERT_TSTAMP);
 
 	bcn->code = WPI_CMD_SET_BEACON;
 	bcn->ac = WPI_CMD_QUEUE_NUM;
 	bcn->size = sizeof(struct wpi_cmd_beacon);
 }
 
 static struct ieee80211vap *
 wpi_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct wpi_vap *wvp;
 	struct ieee80211vap *vap;
 
 	if (!TAILQ_EMPTY(&ic->ic_vaps))		/* only one at a time */
 		return NULL;
 
 	wvp = malloc(sizeof(struct wpi_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	vap = &wvp->wv_vap;
 	ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
 
 	if (opmode == IEEE80211_M_IBSS || opmode == IEEE80211_M_HOSTAP) {
 		WPI_VAP_LOCK_INIT(wvp);
 		wpi_init_beacon(wvp);
 	}
 
 	/* Override with driver methods. */
 	vap->iv_key_set = wpi_key_set;
 	vap->iv_key_delete = wpi_key_delete;
 	if (opmode == IEEE80211_M_IBSS) {
 		wvp->wv_recv_mgmt = vap->iv_recv_mgmt;
 		vap->iv_recv_mgmt = wpi_ibss_recv_mgmt;
 	}
 	wvp->wv_newstate = vap->iv_newstate;
 	vap->iv_newstate = wpi_newstate;
 	vap->iv_update_beacon = wpi_update_beacon;
 	vap->iv_max_aid = WPI_ID_IBSS_MAX - WPI_ID_IBSS_MIN + 1;
 
 	ieee80211_ratectl_init(vap);
 	/* Complete setup. */
 	ieee80211_vap_attach(vap, ieee80211_media_change,
 	    ieee80211_media_status, mac);
 	ic->ic_opmode = opmode;
 	return vap;
 }
 
 static void
 wpi_vap_delete(struct ieee80211vap *vap)
 {
 	struct wpi_vap *wvp = WPI_VAP(vap);
 	struct wpi_buf *bcn = &wvp->wv_bcbuf;
 	enum ieee80211_opmode opmode = vap->iv_opmode;
 
 	ieee80211_ratectl_deinit(vap);
 	ieee80211_vap_detach(vap);
 
 	if (opmode == IEEE80211_M_IBSS || opmode == IEEE80211_M_HOSTAP) {
 		if (bcn->m != NULL)
 			m_freem(bcn->m);
 
 		WPI_VAP_LOCK_DESTROY(wvp);
 	}
 
 	free(wvp, M_80211_VAP);
 }
 
 static int
 wpi_detach(device_t dev)
 {
 	struct wpi_softc *sc = device_get_softc(dev);
 	struct ieee80211com *ic = &sc->sc_ic;
 	uint8_t qid;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	if (ic->ic_vap_create == wpi_vap_create) {
 		ieee80211_draintask(ic, &sc->sc_radioon_task);
 
 		wpi_stop(sc);
 
 		if (sc->sc_tq != NULL) {
 			taskqueue_drain_all(sc->sc_tq);
 			taskqueue_free(sc->sc_tq);
 		}
 
 		callout_drain(&sc->watchdog_rfkill);
 		callout_drain(&sc->tx_timeout);
 		callout_drain(&sc->scan_timeout);
 		callout_drain(&sc->calib_to);
 		ieee80211_ifdetach(ic);
 	}
 
 	/* Uninstall interrupt handler. */
 	if (sc->irq != NULL) {
 		bus_teardown_intr(dev, sc->irq, sc->sc_ih);
 		bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq),
 		    sc->irq);
 		pci_release_msi(dev);
 	}
 
 	if (sc->txq[0].data_dmat) {
 		/* Free DMA resources. */
 		for (qid = 0; qid < WPI_DRV_NTXQUEUES; qid++)
 			wpi_free_tx_ring(sc, &sc->txq[qid]);
 
 		wpi_free_rx_ring(sc);
 		wpi_free_shared(sc);
 	}
 
 	if (sc->fw_dma.tag)
 		wpi_free_fwmem(sc);
 		
 	if (sc->mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rman_get_rid(sc->mem), sc->mem);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 	WPI_TXQ_STATE_LOCK_DESTROY(sc);
 	WPI_TXQ_LOCK_DESTROY(sc);
 	WPI_NT_LOCK_DESTROY(sc);
 	WPI_RXON_LOCK_DESTROY(sc);
 	WPI_TX_LOCK_DESTROY(sc);
 	WPI_LOCK_DESTROY(sc);
 	return 0;
 }
 
 static int
 wpi_shutdown(device_t dev)
 {
 	struct wpi_softc *sc = device_get_softc(dev);
 
 	wpi_stop(sc);
 	return 0;
 }
 
 static int
 wpi_suspend(device_t dev)
 {
 	struct wpi_softc *sc = device_get_softc(dev);
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	ieee80211_suspend_all(ic);
 	return 0;
 }
 
 static int
 wpi_resume(device_t dev)
 {
 	struct wpi_softc *sc = device_get_softc(dev);
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	/* Clear device-specific "PCI retry timeout" register (41h). */
 	pci_write_config(dev, 0x41, 0, 1);
 
 	ieee80211_resume_all(ic);
 	return 0;
 }
 
 /*
  * Grab exclusive access to NIC memory.
  */
 static int
 wpi_nic_lock(struct wpi_softc *sc)
 {
 	int ntries;
 
 	/* Request exclusive access to NIC. */
 	WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ);
 
 	/* Spin until we actually get the lock. */
 	for (ntries = 0; ntries < 1000; ntries++) {
 		if ((WPI_READ(sc, WPI_GP_CNTRL) &
 		    (WPI_GP_CNTRL_MAC_ACCESS_ENA | WPI_GP_CNTRL_SLEEP)) ==
 		    WPI_GP_CNTRL_MAC_ACCESS_ENA)
 			return 0;
 		DELAY(10);
 	}
 
 	device_printf(sc->sc_dev, "could not lock memory\n");
 
 	return ETIMEDOUT;
 }
 
 /*
  * Release lock on NIC memory.
  */
 static __inline void
 wpi_nic_unlock(struct wpi_softc *sc)
 {
 	WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ);
 }
 
 static __inline uint32_t
 wpi_prph_read(struct wpi_softc *sc, uint32_t addr)
 {
 	WPI_WRITE(sc, WPI_PRPH_RADDR, WPI_PRPH_DWORD | addr);
 	WPI_BARRIER_READ_WRITE(sc);
 	return WPI_READ(sc, WPI_PRPH_RDATA);
 }
 
 static __inline void
 wpi_prph_write(struct wpi_softc *sc, uint32_t addr, uint32_t data)
 {
 	WPI_WRITE(sc, WPI_PRPH_WADDR, WPI_PRPH_DWORD | addr);
 	WPI_BARRIER_WRITE(sc);
 	WPI_WRITE(sc, WPI_PRPH_WDATA, data);
 }
 
 static __inline void
 wpi_prph_setbits(struct wpi_softc *sc, uint32_t addr, uint32_t mask)
 {
 	wpi_prph_write(sc, addr, wpi_prph_read(sc, addr) | mask);
 }
 
 static __inline void
 wpi_prph_clrbits(struct wpi_softc *sc, uint32_t addr, uint32_t mask)
 {
 	wpi_prph_write(sc, addr, wpi_prph_read(sc, addr) & ~mask);
 }
 
 static __inline void
 wpi_prph_write_region_4(struct wpi_softc *sc, uint32_t addr,
     const uint32_t *data, uint32_t count)
 {
 	for (; count != 0; count--, data++, addr += 4)
 		wpi_prph_write(sc, addr, *data);
 }
 
 static __inline uint32_t
 wpi_mem_read(struct wpi_softc *sc, uint32_t addr)
 {
 	WPI_WRITE(sc, WPI_MEM_RADDR, addr);
 	WPI_BARRIER_READ_WRITE(sc);
 	return WPI_READ(sc, WPI_MEM_RDATA);
 }
 
 static __inline void
 wpi_mem_read_region_4(struct wpi_softc *sc, uint32_t addr, uint32_t *data,
     int count)
 {
 	for (; count > 0; count--, addr += 4)
 		*data++ = wpi_mem_read(sc, addr);
 }
 
 static int
 wpi_read_prom_data(struct wpi_softc *sc, uint32_t addr, void *data, int count)
 {
 	uint8_t *out = data;
 	uint32_t val;
 	int error, ntries;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 
 	for (; count > 0; count -= 2, addr++) {
 		WPI_WRITE(sc, WPI_EEPROM, addr << 2);
 		for (ntries = 0; ntries < 10; ntries++) {
 			val = WPI_READ(sc, WPI_EEPROM);
 			if (val & WPI_EEPROM_READ_VALID)
 				break;
 			DELAY(5);
 		}
 		if (ntries == 10) {
 			device_printf(sc->sc_dev,
 			    "timeout reading ROM at 0x%x\n", addr);
 			return ETIMEDOUT;
 		}
 		*out++= val >> 16;
 		if (count > 1)
 			*out ++= val >> 24;
 	}
 
 	wpi_nic_unlock(sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 }
 
 static void
 wpi_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	if (error != 0)
 		return;
 	KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs));
 	*(bus_addr_t *)arg = segs[0].ds_addr;
 }
 
 /*
  * Allocates a contiguous block of dma memory of the requested size and
  * alignment.
  */
 static int
 wpi_dma_contig_alloc(struct wpi_softc *sc, struct wpi_dma_info *dma,
     void **kvap, bus_size_t size, bus_size_t alignment)
 {
 	int error;
 
 	dma->tag = NULL;
 	dma->size = size;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), alignment,
 	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size,
 	    1, size, 0, NULL, NULL, &dma->tag);
 	if (error != 0)
 		goto fail;
 
 	error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &dma->map);
 	if (error != 0)
 		goto fail;
 
 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
 	    wpi_dma_map_addr, &dma->paddr, BUS_DMA_NOWAIT);
 	if (error != 0)
 		goto fail;
 
 	bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 
 	if (kvap != NULL)
 		*kvap = dma->vaddr;
 
 	return 0;
 
 fail:	wpi_dma_contig_free(dma);
 	return error;
 }
 
 static void
 wpi_dma_contig_free(struct wpi_dma_info *dma)
 {
 	if (dma->vaddr != NULL) {
 		bus_dmamap_sync(dma->tag, dma->map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->tag, dma->map);
 		bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
 		dma->vaddr = NULL;
 	}
 	if (dma->tag != NULL) {
 		bus_dma_tag_destroy(dma->tag);
 		dma->tag = NULL;
 	}
 }
 
 /*
  * Allocate a shared page between host and NIC.
  */
 static int
 wpi_alloc_shared(struct wpi_softc *sc)
 {
 	/* Shared buffer must be aligned on a 4KB boundary. */
 	return wpi_dma_contig_alloc(sc, &sc->shared_dma,
 	    (void **)&sc->shared, sizeof (struct wpi_shared), 4096);
 }
 
 static void
 wpi_free_shared(struct wpi_softc *sc)
 {
 	wpi_dma_contig_free(&sc->shared_dma);
 }
 
 /*
  * Allocate DMA-safe memory for firmware transfer.
  */
 static int
 wpi_alloc_fwmem(struct wpi_softc *sc)
 {
 	/* Must be aligned on a 16-byte boundary. */
 	return wpi_dma_contig_alloc(sc, &sc->fw_dma, NULL,
 	    WPI_FW_TEXT_MAXSZ + WPI_FW_DATA_MAXSZ, 16);
 }
 
 static void
 wpi_free_fwmem(struct wpi_softc *sc)
 {
 	wpi_dma_contig_free(&sc->fw_dma);
 }
 
 static int
 wpi_alloc_rx_ring(struct wpi_softc *sc)
 {
 	struct wpi_rx_ring *ring = &sc->rxq;
 	bus_size_t size;
 	int i, error;
 
 	ring->cur = 0;
 	ring->update = 0;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/* Allocate RX descriptors (16KB aligned.) */
 	size = WPI_RX_RING_COUNT * sizeof (uint32_t);
 	error = wpi_dma_contig_alloc(sc, &ring->desc_dma,
 	    (void **)&ring->desc, size, WPI_RING_DMA_ALIGN);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not allocate RX ring DMA memory, error %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	/* Create RX buffer DMA tag. */
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, 
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    MJUMPAGESIZE, 1, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not create RX buf DMA tag, error %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	/*
 	 * Allocate and map RX buffers.
 	 */
 	for (i = 0; i < WPI_RX_RING_COUNT; i++) {
 		struct wpi_rx_data *data = &ring->data[i];
 		bus_addr_t paddr;
 
 		error = bus_dmamap_create(ring->data_dmat, 0, &data->map);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not create RX buf DMA map, error %d\n",
 			    __func__, error);
 			goto fail;
 		}
 
 		data->m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
 		if (data->m == NULL) {
 			device_printf(sc->sc_dev,
 			    "%s: could not allocate RX mbuf\n", __func__);
 			error = ENOBUFS;
 			goto fail;
 		}
 
 		error = bus_dmamap_load(ring->data_dmat, data->map,
 		    mtod(data->m, void *), MJUMPAGESIZE, wpi_dma_map_addr,
 		    &paddr, BUS_DMA_NOWAIT);
 		if (error != 0 && error != EFBIG) {
 			device_printf(sc->sc_dev,
 			    "%s: can't map mbuf (error %d)\n", __func__,
 			    error);
 			goto fail;
 		}
 
 		/* Set physical address of RX buffer. */
 		ring->desc[i] = htole32(paddr);
 	}
 
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 
 fail:	wpi_free_rx_ring(sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 
 	return error;
 }
 
 static void
 wpi_update_rx_ring(struct wpi_softc *sc)
 {
 	WPI_WRITE(sc, WPI_FH_RX_WPTR, sc->rxq.cur & ~7);
 }
 
 static void
 wpi_update_rx_ring_ps(struct wpi_softc *sc)
 {
 	struct wpi_rx_ring *ring = &sc->rxq;
 
 	if (ring->update != 0) {
 		/* Wait for INT_WAKEUP event. */
 		return;
 	}
 
 	WPI_TXQ_LOCK(sc);
 	WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ);
 	if (WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_SLEEP) {
 		DPRINTF(sc, WPI_DEBUG_PWRSAVE, "%s: wakeup request\n",
 		    __func__);
 		ring->update = 1;
 	} else {
 		wpi_update_rx_ring(sc);
 		WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ);
 	}
 	WPI_TXQ_UNLOCK(sc);
 }
 
 static void
 wpi_reset_rx_ring(struct wpi_softc *sc)
 {
 	struct wpi_rx_ring *ring = &sc->rxq;
 	int ntries;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (wpi_nic_lock(sc) == 0) {
 		WPI_WRITE(sc, WPI_FH_RX_CONFIG, 0);
 		for (ntries = 0; ntries < 1000; ntries++) {
 			if (WPI_READ(sc, WPI_FH_RX_STATUS) &
 			    WPI_FH_RX_STATUS_IDLE)
 				break;
 			DELAY(10);
 		}
 		wpi_nic_unlock(sc);
 	}
 
 	ring->cur = 0;
 	ring->update = 0;
 }
 
 static void
 wpi_free_rx_ring(struct wpi_softc *sc)
 {
 	struct wpi_rx_ring *ring = &sc->rxq;
 	int i;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	wpi_dma_contig_free(&ring->desc_dma);
 
 	for (i = 0; i < WPI_RX_RING_COUNT; i++) {
 		struct wpi_rx_data *data = &ring->data[i];
 
 		if (data->m != NULL) {
 			bus_dmamap_sync(ring->data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(ring->data_dmat, data->map);
 			m_freem(data->m);
 			data->m = NULL;
 		}
 		if (data->map != NULL)
 			bus_dmamap_destroy(ring->data_dmat, data->map);
 	}
 	if (ring->data_dmat != NULL) {
 		bus_dma_tag_destroy(ring->data_dmat);
 		ring->data_dmat = NULL;
 	}
 }
 
 static int
 wpi_alloc_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring, uint8_t qid)
 {
 	bus_addr_t paddr;
 	bus_size_t size;
 	int i, error;
 
 	ring->qid = qid;
 	ring->queued = 0;
 	ring->cur = 0;
 	ring->pending = 0;
 	ring->update = 0;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/* Allocate TX descriptors (16KB aligned.) */
 	size = WPI_TX_RING_COUNT * sizeof (struct wpi_tx_desc);
 	error = wpi_dma_contig_alloc(sc, &ring->desc_dma, (void **)&ring->desc,
 	    size, WPI_RING_DMA_ALIGN);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not allocate TX ring DMA memory, error %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	/* Update shared area with ring physical address. */
 	sc->shared->txbase[qid] = htole32(ring->desc_dma.paddr);
 	bus_dmamap_sync(sc->shared_dma.tag, sc->shared_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 	size = WPI_TX_RING_COUNT * sizeof (struct wpi_tx_cmd);
 	error = wpi_dma_contig_alloc(sc, &ring->cmd_dma, (void **)&ring->cmd,
 	    size, 4);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not allocate TX cmd DMA memory, error %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
 	    WPI_MAX_SCATTER - 1, MCLBYTES, 0, NULL, NULL, &ring->data_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not create TX buf DMA tag, error %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	paddr = ring->cmd_dma.paddr;
 	for (i = 0; i < WPI_TX_RING_COUNT; i++) {
 		struct wpi_tx_data *data = &ring->data[i];
 
 		data->cmd_paddr = paddr;
 		paddr += sizeof (struct wpi_tx_cmd);
 
 		error = bus_dmamap_create(ring->data_dmat, 0, &data->map);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not create TX buf DMA map, error %d\n",
 			    __func__, error);
 			goto fail;
 		}
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 
 fail:	wpi_free_tx_ring(sc, ring);
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 	return error;
 }
 
 static void
 wpi_update_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring)
 {
 	WPI_WRITE(sc, WPI_HBUS_TARG_WRPTR, ring->qid << 8 | ring->cur);
 }
 
 static void
 wpi_update_tx_ring_ps(struct wpi_softc *sc, struct wpi_tx_ring *ring)
 {
 
 	if (ring->update != 0) {
 		/* Wait for INT_WAKEUP event. */
 		return;
 	}
 
 	WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ);
 	if (WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_SLEEP) {
 		DPRINTF(sc, WPI_DEBUG_PWRSAVE, "%s (%d): requesting wakeup\n",
 		    __func__, ring->qid);
 		ring->update = 1;
 	} else {
 		wpi_update_tx_ring(sc, ring);
 		WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ);
 	}
 }
 
 static void
 wpi_reset_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring)
 {
 	int i;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	for (i = 0; i < WPI_TX_RING_COUNT; i++) {
 		struct wpi_tx_data *data = &ring->data[i];
 
 		if (data->m != NULL) {
 			bus_dmamap_sync(ring->data_dmat, data->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(ring->data_dmat, data->map);
 			m_freem(data->m);
 			data->m = NULL;
 		}
 		if (data->ni != NULL) {
 			ieee80211_free_node(data->ni);
 			data->ni = NULL;
 		}
 	}
 	/* Clear TX descriptors. */
 	memset(ring->desc, 0, ring->desc_dma.size);
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 	ring->queued = 0;
 	ring->cur = 0;
 	ring->pending = 0;
 	ring->update = 0;
 }
 
 static void
 wpi_free_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring)
 {
 	int i;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	wpi_dma_contig_free(&ring->desc_dma);
 	wpi_dma_contig_free(&ring->cmd_dma);
 
 	for (i = 0; i < WPI_TX_RING_COUNT; i++) {
 		struct wpi_tx_data *data = &ring->data[i];
 
 		if (data->m != NULL) {
 			bus_dmamap_sync(ring->data_dmat, data->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(ring->data_dmat, data->map);
 			m_freem(data->m);
 		}
 		if (data->map != NULL)
 			bus_dmamap_destroy(ring->data_dmat, data->map);
 	}
 	if (ring->data_dmat != NULL) {
 		bus_dma_tag_destroy(ring->data_dmat);
 		ring->data_dmat = NULL;
 	}
 }
 
 /*
  * Extract various information from EEPROM.
  */
 static int
 wpi_read_eeprom(struct wpi_softc *sc, uint8_t macaddr[IEEE80211_ADDR_LEN])
 {
 #define WPI_CHK(res) do {		\
 	if ((error = res) != 0)		\
 		goto fail;		\
 } while (0)
 	uint8_t i;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/* Adapter has to be powered on for EEPROM access to work. */
 	if ((error = wpi_apm_init(sc)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not power ON adapter, error %d\n", __func__,
 		    error);
 		return error;
 	}
 
 	if ((WPI_READ(sc, WPI_EEPROM_GP) & 0x6) == 0) {
 		device_printf(sc->sc_dev, "bad EEPROM signature\n");
 		error = EIO;
 		goto fail;
 	}
 	/* Clear HW ownership of EEPROM. */
 	WPI_CLRBITS(sc, WPI_EEPROM_GP, WPI_EEPROM_GP_IF_OWNER);
 
 	/* Read the hardware capabilities, revision and SKU type. */
 	WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_SKU_CAP, &sc->cap,
 	    sizeof(sc->cap)));
 	WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_REVISION, &sc->rev,
 	    sizeof(sc->rev)));
 	WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_TYPE, &sc->type,
 	    sizeof(sc->type)));
 
 	sc->rev = le16toh(sc->rev);
 	DPRINTF(sc, WPI_DEBUG_EEPROM, "cap=%x rev=%x type=%x\n", sc->cap,
 	    sc->rev, sc->type);
 
 	/* Read the regulatory domain (4 ASCII characters.) */
 	WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_DOMAIN, sc->domain,
 	    sizeof(sc->domain)));
 
 	/* Read MAC address. */
 	WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_MAC, macaddr,
 	    IEEE80211_ADDR_LEN));
 
 	/* Read the list of authorized channels. */
 	for (i = 0; i < WPI_CHAN_BANDS_COUNT; i++)
 		WPI_CHK(wpi_read_eeprom_channels(sc, i));
 
 	/* Read the list of TX power groups. */
 	for (i = 0; i < WPI_POWER_GROUPS_COUNT; i++)
 		WPI_CHK(wpi_read_eeprom_group(sc, i));
 
 fail:	wpi_apm_stop(sc);	/* Power OFF adapter. */
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, error ? TRACE_STR_END_ERR : TRACE_STR_END,
 	    __func__);
 
 	return error;
 #undef WPI_CHK
 }
 
 /*
  * Translate EEPROM flags to net80211.
  */
 static uint32_t
 wpi_eeprom_channel_flags(struct wpi_eeprom_chan *channel)
 {
 	uint32_t nflags;
 
 	nflags = 0;
 	if ((channel->flags & WPI_EEPROM_CHAN_ACTIVE) == 0)
 		nflags |= IEEE80211_CHAN_PASSIVE;
 	if ((channel->flags & WPI_EEPROM_CHAN_IBSS) == 0)
 		nflags |= IEEE80211_CHAN_NOADHOC;
 	if (channel->flags & WPI_EEPROM_CHAN_RADAR) {
 		nflags |= IEEE80211_CHAN_DFS;
 		/* XXX apparently IBSS may still be marked */
 		nflags |= IEEE80211_CHAN_NOADHOC;
 	}
 
 	/* XXX HOSTAP uses WPI_MODE_IBSS */
 	if (nflags & IEEE80211_CHAN_NOADHOC)
 		nflags |= IEEE80211_CHAN_NOHOSTAP;
 
 	return nflags;
 }
 
 static void
 wpi_read_eeprom_band(struct wpi_softc *sc, uint8_t n)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct wpi_eeprom_chan *channels = sc->eeprom_channels[n];
 	const struct wpi_chan_band *band = &wpi_bands[n];
 	struct ieee80211_channel *c;
 	uint32_t nflags;
 	uint8_t chan, i;
 
 	for (i = 0; i < band->nchan; i++) {
 		if (!(channels[i].flags & WPI_EEPROM_CHAN_VALID)) {
 			DPRINTF(sc, WPI_DEBUG_EEPROM,
 			    "Channel Not Valid: %d, band %d\n",
 			     band->chan[i],n);
 			continue;
 		}
 
 		chan = band->chan[i];
 		nflags = wpi_eeprom_channel_flags(&channels[i]);
 
 		c = &ic->ic_channels[ic->ic_nchans++];
 		c->ic_ieee = chan;
 		c->ic_maxregpower = channels[i].maxpwr;
 		c->ic_maxpower = 2*c->ic_maxregpower;
 
 		if (n == 0) {	/* 2GHz band */
 			c->ic_freq = ieee80211_ieee2mhz(chan,
 			    IEEE80211_CHAN_G);
 
 			/* G =>'s B is supported */
 			c->ic_flags = IEEE80211_CHAN_B | nflags;
 			c = &ic->ic_channels[ic->ic_nchans++];
 			c[0] = c[-1];
 			c->ic_flags = IEEE80211_CHAN_G | nflags;
 		} else {	/* 5GHz band */
 			c->ic_freq = ieee80211_ieee2mhz(chan,
 			    IEEE80211_CHAN_A);
 
 			c->ic_flags = IEEE80211_CHAN_A | nflags;
 		}
 
 		/* Save maximum allowed TX power for this channel. */
 		sc->maxpwr[chan] = channels[i].maxpwr;
 
 		DPRINTF(sc, WPI_DEBUG_EEPROM,
 		    "adding chan %d (%dMHz) flags=0x%x maxpwr=%d passive=%d,"
 		    " offset %d\n", chan, c->ic_freq,
 		    channels[i].flags, sc->maxpwr[chan],
 		    IEEE80211_IS_CHAN_PASSIVE(c), ic->ic_nchans);
 	}
 }
 
 /**
  * Read the eeprom to find out what channels are valid for the given
  * band and update net80211 with what we find.
  */
 static int
 wpi_read_eeprom_channels(struct wpi_softc *sc, uint8_t n)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	const struct wpi_chan_band *band = &wpi_bands[n];
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	error = wpi_read_prom_data(sc, band->addr, &sc->eeprom_channels[n],
 	    band->nchan * sizeof (struct wpi_eeprom_chan));
 	if (error != 0) {
 		DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 		return error;
 	}
 
 	wpi_read_eeprom_band(sc, n);
 
 	ieee80211_sort_channels(ic->ic_channels, ic->ic_nchans);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 }
 
 static struct wpi_eeprom_chan *
 wpi_find_eeprom_channel(struct wpi_softc *sc, struct ieee80211_channel *c)
 {
 	int i, j;
 
 	for (j = 0; j < WPI_CHAN_BANDS_COUNT; j++)
 		for (i = 0; i < wpi_bands[j].nchan; i++)
 			if (wpi_bands[j].chan[i] == c->ic_ieee)
 				return &sc->eeprom_channels[j][i];
 
 	return NULL;
 }
 
 /*
  * Enforce flags read from EEPROM.
  */
 static int
 wpi_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *rd,
     int nchan, struct ieee80211_channel chans[])
 {
 	struct wpi_softc *sc = ic->ic_softc;
 	int i;
 
 	for (i = 0; i < nchan; i++) {
 		struct ieee80211_channel *c = &chans[i];
 		struct wpi_eeprom_chan *channel;
 
 		channel = wpi_find_eeprom_channel(sc, c);
 		if (channel == NULL) {
 			ic_printf(ic, "%s: invalid channel %u freq %u/0x%x\n",
 			    __func__, c->ic_ieee, c->ic_freq, c->ic_flags);
 			return EINVAL;
 		}
 		c->ic_flags |= wpi_eeprom_channel_flags(channel);
 	}
 
 	return 0;
 }
 
 static int
 wpi_read_eeprom_group(struct wpi_softc *sc, uint8_t n)
 {
 	struct wpi_power_group *group = &sc->groups[n];
 	struct wpi_eeprom_group rgroup;
 	int i, error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	if ((error = wpi_read_prom_data(sc, WPI_EEPROM_POWER_GRP + n * 32,
 	    &rgroup, sizeof rgroup)) != 0) {
 		DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 		return error;
 	}
 
 	/* Save TX power group information. */
 	group->chan   = rgroup.chan;
 	group->maxpwr = rgroup.maxpwr;
 	/* Retrieve temperature at which the samples were taken. */
 	group->temp   = (int16_t)le16toh(rgroup.temp);
 
 	DPRINTF(sc, WPI_DEBUG_EEPROM,
 	    "power group %d: chan=%d maxpwr=%d temp=%d\n", n, group->chan,
 	    group->maxpwr, group->temp);
 
 	for (i = 0; i < WPI_SAMPLES_COUNT; i++) {
 		group->samples[i].index = rgroup.samples[i].index;
 		group->samples[i].power = rgroup.samples[i].power;
 
 		DPRINTF(sc, WPI_DEBUG_EEPROM,
 		    "\tsample %d: index=%d power=%d\n", i,
 		    group->samples[i].index, group->samples[i].power);
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 }
 
 static __inline uint8_t
 wpi_add_node_entry_adhoc(struct wpi_softc *sc)
 {
 	uint8_t newid = WPI_ID_IBSS_MIN;
 
 	for (; newid <= WPI_ID_IBSS_MAX; newid++) {
 		if ((sc->nodesmsk & (1 << newid)) == 0) {
 			sc->nodesmsk |= 1 << newid;
 			return newid;
 		}
 	}
 
 	return WPI_ID_UNDEFINED;
 }
 
 static __inline uint8_t
 wpi_add_node_entry_sta(struct wpi_softc *sc)
 {
 	sc->nodesmsk |= 1 << WPI_ID_BSS;
 
 	return WPI_ID_BSS;
 }
 
 static __inline int
 wpi_check_node_entry(struct wpi_softc *sc, uint8_t id)
 {
 	if (id == WPI_ID_UNDEFINED)
 		return 0;
 
 	return (sc->nodesmsk >> id) & 1;
 }
 
 static __inline void
 wpi_clear_node_table(struct wpi_softc *sc)
 {
 	sc->nodesmsk = 0;
 }
 
 static __inline void
 wpi_del_node_entry(struct wpi_softc *sc, uint8_t id)
 {
 	sc->nodesmsk &= ~(1 << id);
 }
 
 static struct ieee80211_node *
 wpi_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct wpi_node *wn;
 
 	wn = malloc(sizeof (struct wpi_node), M_80211_NODE,
 	    M_NOWAIT | M_ZERO);
 
 	if (wn == NULL)
 		return NULL;
 
 	wn->id = WPI_ID_UNDEFINED;
 
 	return &wn->ni;
 }
 
 static void
 wpi_node_free(struct ieee80211_node *ni)
 {
 	struct wpi_softc *sc = ni->ni_ic->ic_softc;
 	struct wpi_node *wn = WPI_NODE(ni);
 
 	if (wn->id != WPI_ID_UNDEFINED) {
 		WPI_NT_LOCK(sc);
 		if (wpi_check_node_entry(sc, wn->id)) {
 			wpi_del_node_entry(sc, wn->id);
 			wpi_del_node(sc, ni);
 		}
 		WPI_NT_UNLOCK(sc);
 	}
 
 	sc->sc_node_free(ni);
 }
 
 static __inline int
 wpi_check_bss_filter(struct wpi_softc *sc)
 {
 	return (sc->rxon.filter & htole32(WPI_FILTER_BSS)) != 0;
 }
 
 static void
 wpi_ibss_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m, int subtype,
     const struct ieee80211_rx_stats *rxs,
     int rssi, int nf)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct wpi_softc *sc = vap->iv_ic->ic_softc;
 	struct wpi_vap *wvp = WPI_VAP(vap);
 	uint64_t ni_tstamp, rx_tstamp;
 
 	wvp->wv_recv_mgmt(ni, m, subtype, rxs, rssi, nf);
 
 	if (vap->iv_state == IEEE80211_S_RUN &&
 	    (subtype == IEEE80211_FC0_SUBTYPE_BEACON ||
 	    subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP)) {
 		ni_tstamp = le64toh(ni->ni_tstamp.tsf);
 		rx_tstamp = le64toh(sc->rx_tstamp);
 
 		if (ni_tstamp >= rx_tstamp) {
 			DPRINTF(sc, WPI_DEBUG_STATE,
 			    "ibss merge, tsf %ju tstamp %ju\n",
 			    (uintmax_t)rx_tstamp, (uintmax_t)ni_tstamp);
 			(void) ieee80211_ibss_merge(ni);
 		}
 	}
 }
 
 static void
 wpi_restore_node(void *arg, struct ieee80211_node *ni)
 {
 	struct wpi_softc *sc = arg;
 	struct wpi_node *wn = WPI_NODE(ni);
 	int error;
 
 	WPI_NT_LOCK(sc);
 	if (wn->id != WPI_ID_UNDEFINED) {
 		wn->id = WPI_ID_UNDEFINED;
 		if ((error = wpi_add_ibss_node(sc, ni)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not add IBSS node, error %d\n",
 			    __func__, error);
 		}
 	}
 	WPI_NT_UNLOCK(sc);
 }
 
 static void
 wpi_restore_node_table(struct wpi_softc *sc, struct wpi_vap *wvp)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	/* Set group keys once. */
 	WPI_NT_LOCK(sc);
 	wvp->wv_gtk = 0;
 	WPI_NT_UNLOCK(sc);
 
 	ieee80211_iterate_nodes(&ic->ic_sta, wpi_restore_node, sc);
 	ieee80211_crypto_reload_keys(ic);
 }
 
 /**
  * Called by net80211 when ever there is a change to 80211 state machine
  */
 static int
 wpi_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct wpi_vap *wvp = WPI_VAP(vap);
 	struct ieee80211com *ic = vap->iv_ic;
 	struct wpi_softc *sc = ic->ic_softc;
 	int error = 0;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	WPI_TXQ_LOCK(sc);
 	if (nstate > IEEE80211_S_INIT && sc->sc_running == 0) {
 		DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 		WPI_TXQ_UNLOCK(sc);
 
 		return ENXIO;
 	}
 	WPI_TXQ_UNLOCK(sc);
 
 	DPRINTF(sc, WPI_DEBUG_STATE, "%s: %s -> %s\n", __func__,
 		ieee80211_state_name[vap->iv_state],
 		ieee80211_state_name[nstate]);
 
 	if (vap->iv_state == IEEE80211_S_RUN && nstate < IEEE80211_S_RUN) {
 		if ((error = wpi_set_pslevel(sc, 0, 0, 1)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not set power saving level\n",
 			    __func__);
 			return error;
 		}
 
 		wpi_set_led(sc, WPI_LED_LINK, 1, 0);
 	}
 
 	switch (nstate) {
 	case IEEE80211_S_SCAN:
 		WPI_RXON_LOCK(sc);
 		if (wpi_check_bss_filter(sc) != 0) {
 			sc->rxon.filter &= ~htole32(WPI_FILTER_BSS);
 			if ((error = wpi_send_rxon(sc, 0, 1)) != 0) {
 				device_printf(sc->sc_dev,
 				    "%s: could not send RXON\n", __func__);
 			}
 		}
 		WPI_RXON_UNLOCK(sc);
 		break;
 
 	case IEEE80211_S_ASSOC:
 		if (vap->iv_state != IEEE80211_S_RUN)
 			break;
 		/* FALLTHROUGH */
 	case IEEE80211_S_AUTH:
 		/*
 		 * NB: do not optimize AUTH -> AUTH state transmission -
 		 * this will break powersave with non-QoS AP!
 		 */
 
 		/*
 		 * The node must be registered in the firmware before auth.
 		 * Also the associd must be cleared on RUN -> ASSOC
 		 * transitions.
 		 */
 		if ((error = wpi_auth(sc, vap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not move to AUTH state, error %d\n",
 			    __func__, error);
 		}
 		break;
 
 	case IEEE80211_S_RUN:
 		/*
 		 * RUN -> RUN transition:
 		 * STA mode: Just restart the timers.
 		 * IBSS mode: Process IBSS merge.
 		 */
 		if (vap->iv_state == IEEE80211_S_RUN) {
 			if (vap->iv_opmode != IEEE80211_M_IBSS) {
 				WPI_RXON_LOCK(sc);
 				wpi_calib_timeout(sc);
 				WPI_RXON_UNLOCK(sc);
 				break;
 			} else {
 				/*
 				 * Drop the BSS_FILTER bit
 				 * (there is no another way to change bssid).
 				 */
 				WPI_RXON_LOCK(sc);
 				sc->rxon.filter &= ~htole32(WPI_FILTER_BSS);
 				if ((error = wpi_send_rxon(sc, 0, 1)) != 0) {
 					device_printf(sc->sc_dev,
 					    "%s: could not send RXON\n",
 					    __func__);
 				}
 				WPI_RXON_UNLOCK(sc);
 
 				/* Restore all what was lost. */
 				wpi_restore_node_table(sc, wvp);
 
 				/* XXX set conditionally? */
 				wpi_updateedca(ic);
 			}
 		}
 
 		/*
 		 * !RUN -> RUN requires setting the association id
 		 * which is done with a firmware cmd.  We also defer
 		 * starting the timers until that work is done.
 		 */
 		if ((error = wpi_run(sc, vap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not move to RUN state\n", __func__);
 		}
 		break;
 
 	default:
 		break;
 	}
 	if (error != 0) {
 		DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 		return error;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return wvp->wv_newstate(vap, nstate, arg);
 }
 
 static void
 wpi_calib_timeout(void *arg)
 {
 	struct wpi_softc *sc = arg;
 
 	if (wpi_check_bss_filter(sc) == 0)
 		return;
 
 	wpi_power_calibration(sc);
 
 	callout_reset(&sc->calib_to, 60*hz, wpi_calib_timeout, sc);
 }
 
 static __inline uint8_t
 rate2plcp(const uint8_t rate)
 {
 	switch (rate) {
 	case 12:	return 0xd;
 	case 18:	return 0xf;
 	case 24:	return 0x5;
 	case 36:	return 0x7;
 	case 48:	return 0x9;
 	case 72:	return 0xb;
 	case 96:	return 0x1;
 	case 108:	return 0x3;
 	case 2:		return 10;
 	case 4:		return 20;
 	case 11:	return 55;
 	case 22:	return 110;
 	default:	return 0;
 	}
 }
 
 static __inline uint8_t
 plcp2rate(const uint8_t plcp)
 {
 	switch (plcp) {
 	case 0xd:	return 12;
 	case 0xf:	return 18;
 	case 0x5:	return 24;
 	case 0x7:	return 36;
 	case 0x9:	return 48;
 	case 0xb:	return 72;
 	case 0x1:	return 96;
 	case 0x3:	return 108;
 	case 10:	return 2;
 	case 20:	return 4;
 	case 55:	return 11;
 	case 110:	return 22;
 	default:	return 0;
 	}
 }
 
 /* Quickly determine if a given rate is CCK or OFDM. */
 #define WPI_RATE_IS_OFDM(rate)	((rate) >= 12 && (rate) != 22)
 
 static void
 wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc,
     struct wpi_rx_data *data)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct wpi_rx_ring *ring = &sc->rxq;
 	struct wpi_rx_stat *stat;
 	struct wpi_rx_head *head;
 	struct wpi_rx_tail *tail;
 	struct ieee80211_frame *wh;
 	struct ieee80211_node *ni;
 	struct mbuf *m, *m1;
 	bus_addr_t paddr;
 	uint32_t flags;
 	uint16_t len;
 	int error;
 
 	stat = (struct wpi_rx_stat *)(desc + 1);
 
 	if (__predict_false(stat->len > WPI_STAT_MAXLEN)) {
 		device_printf(sc->sc_dev, "invalid RX statistic header\n");
 		goto fail1;
 	}
 
 	bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);
 	head = (struct wpi_rx_head *)((caddr_t)(stat + 1) + stat->len);
 	len = le16toh(head->len);
 	tail = (struct wpi_rx_tail *)((caddr_t)(head + 1) + len);
 	flags = le32toh(tail->flags);
 
 	DPRINTF(sc, WPI_DEBUG_RECV, "%s: idx %d len %d stat len %u rssi %d"
 	    " rate %x chan %d tstamp %ju\n", __func__, ring->cur,
 	    le32toh(desc->len), len, (int8_t)stat->rssi,
 	    head->plcp, head->chan, (uintmax_t)le64toh(tail->tstamp));
 
 	/* Discard frames with a bad FCS early. */
 	if ((flags & WPI_RX_NOERROR) != WPI_RX_NOERROR) {
 		DPRINTF(sc, WPI_DEBUG_RECV, "%s: RX flags error %x\n",
 		    __func__, flags);
 		goto fail1;
 	}
 	/* Discard frames that are too short. */
 	if (len < sizeof (struct ieee80211_frame_ack)) {
 		DPRINTF(sc, WPI_DEBUG_RECV, "%s: frame too short: %d\n",
 		    __func__, len);
 		goto fail1;
 	}
 
 	m1 = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
 	if (__predict_false(m1 == NULL)) {
 		DPRINTF(sc, WPI_DEBUG_ANY, "%s: no mbuf to restock ring\n",
 		    __func__);
 		goto fail1;
 	}
 	bus_dmamap_unload(ring->data_dmat, data->map);
 
 	error = bus_dmamap_load(ring->data_dmat, data->map, mtod(m1, void *),
 	    MJUMPAGESIZE, wpi_dma_map_addr, &paddr, BUS_DMA_NOWAIT);
 	if (__predict_false(error != 0 && error != EFBIG)) {
 		device_printf(sc->sc_dev,
 		    "%s: bus_dmamap_load failed, error %d\n", __func__, error);
 		m_freem(m1);
 
 		/* Try to reload the old mbuf. */
 		error = bus_dmamap_load(ring->data_dmat, data->map,
 		    mtod(data->m, void *), MJUMPAGESIZE, wpi_dma_map_addr,
 		    &paddr, BUS_DMA_NOWAIT);
 		if (error != 0 && error != EFBIG) {
 			panic("%s: could not load old RX mbuf", __func__);
 		}
 		/* Physical address may have changed. */
 		ring->desc[ring->cur] = htole32(paddr);
 		bus_dmamap_sync(ring->data_dmat, ring->desc_dma.map,
 		    BUS_DMASYNC_PREWRITE);
 		goto fail1;
 	}
 
 	m = data->m;
 	data->m = m1;
 	/* Update RX descriptor. */
 	ring->desc[ring->cur] = htole32(paddr);
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 	/* Finalize mbuf. */
 	m->m_data = (caddr_t)(head + 1);
 	m->m_pkthdr.len = m->m_len = len;
 
 	/* Grab a reference to the source node. */
 	wh = mtod(m, struct ieee80211_frame *);
 
 	if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED) &&
 	    (flags & WPI_RX_CIPHER_MASK) == WPI_RX_CIPHER_CCMP) {
 		/* Check whether decryption was successful or not. */
 		if ((flags & WPI_RX_DECRYPT_MASK) != WPI_RX_DECRYPT_OK) {
 			DPRINTF(sc, WPI_DEBUG_RECV,
 			    "CCMP decryption failed 0x%x\n", flags);
 			goto fail2;
 		}
 		m->m_flags |= M_WEP;
 	}
 
 	if (len >= sizeof(struct ieee80211_frame_min))
 		ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);
 	else
 		ni = NULL;
 
 	sc->rx_tstamp = tail->tstamp;
 
 	if (ieee80211_radiotap_active(ic)) {
 		struct wpi_rx_radiotap_header *tap = &sc->sc_rxtap;
 
 		tap->wr_flags = 0;
 		if (head->flags & htole16(WPI_STAT_FLAG_SHPREAMBLE))
 			tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;
 		tap->wr_dbm_antsignal = (int8_t)(stat->rssi + WPI_RSSI_OFFSET);
 		tap->wr_dbm_antnoise = WPI_RSSI_OFFSET;
 		tap->wr_tsft = tail->tstamp;
 		tap->wr_antenna = (le16toh(head->flags) >> 4) & 0xf;
 		tap->wr_rate = plcp2rate(head->plcp);
 	}
 
 	WPI_UNLOCK(sc);
 
 	/* Send the frame to the 802.11 layer. */
 	if (ni != NULL) {
 		(void)ieee80211_input(ni, m, stat->rssi, WPI_RSSI_OFFSET);
 		/* Node is no longer needed. */
 		ieee80211_free_node(ni);
 	} else
 		(void)ieee80211_input_all(ic, m, stat->rssi, WPI_RSSI_OFFSET);
 
 	WPI_LOCK(sc);
 
 	return;
 
 fail2:	m_freem(m);
 
 fail1:	counter_u64_add(ic->ic_ierrors, 1);
 }
 
 static void
 wpi_rx_statistics(struct wpi_softc *sc, struct wpi_rx_desc *desc,
     struct wpi_rx_data *data)
 {
 	/* Ignore */
 }
 
 static void
 wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc)
 {
 	struct wpi_tx_ring *ring = &sc->txq[desc->qid & 0x3];
 	struct wpi_tx_data *data = &ring->data[desc->idx];
 	struct wpi_tx_stat *stat = (struct wpi_tx_stat *)(desc + 1);
 	struct mbuf *m;
 	struct ieee80211_node *ni;
 	struct ieee80211vap *vap;
 	struct ieee80211com *ic;
 	uint32_t status = le32toh(stat->status);
 	int ackfailcnt = stat->ackfailcnt / WPI_NTRIES_DEFAULT;
 
 	KASSERT(data->ni != NULL, ("no node"));
 	KASSERT(data->m != NULL, ("no mbuf"));
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	DPRINTF(sc, WPI_DEBUG_XMIT, "%s: "
 	    "qid %d idx %d retries %d btkillcnt %d rate %x duration %d "
 	    "status %x\n", __func__, desc->qid, desc->idx, stat->ackfailcnt,
 	    stat->btkillcnt, stat->rate, le32toh(stat->duration), status);
 
 	/* Unmap and free mbuf. */
 	bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(ring->data_dmat, data->map);
 	m = data->m, data->m = NULL;
 	ni = data->ni, data->ni = NULL;
 	vap = ni->ni_vap;
 	ic = vap->iv_ic;
 
 	/*
 	 * Update rate control statistics for the node.
 	 */
 	if (status & WPI_TX_STATUS_FAIL) {
 		ieee80211_ratectl_tx_complete(vap, ni,
 		    IEEE80211_RATECTL_TX_FAILURE, &ackfailcnt, NULL);
 	} else
 		ieee80211_ratectl_tx_complete(vap, ni,
 		    IEEE80211_RATECTL_TX_SUCCESS, &ackfailcnt, NULL);
 
 	ieee80211_tx_complete(ni, m, (status & WPI_TX_STATUS_FAIL) != 0);
 
 	WPI_TXQ_STATE_LOCK(sc);
 	if (--ring->queued > 0)
 		callout_reset(&sc->tx_timeout, 5*hz, wpi_tx_timeout, sc);
 	else
 		callout_stop(&sc->tx_timeout);
 	WPI_TXQ_STATE_UNLOCK(sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 }
 
 /*
  * Process a "command done" firmware notification.  This is where we wakeup
  * processes waiting for a synchronous command completion.
  */
 static void
 wpi_cmd_done(struct wpi_softc *sc, struct wpi_rx_desc *desc)
 {
 	struct wpi_tx_ring *ring = &sc->txq[WPI_CMD_QUEUE_NUM];
 	struct wpi_tx_data *data;
 	struct wpi_tx_cmd *cmd;
 
 	DPRINTF(sc, WPI_DEBUG_CMD, "cmd notification qid %x idx %d flags %x "
 				   "type %s len %d\n", desc->qid, desc->idx,
 				   desc->flags, wpi_cmd_str(desc->type),
 				   le32toh(desc->len));
 
 	if ((desc->qid & WPI_RX_DESC_QID_MSK) != WPI_CMD_QUEUE_NUM)
 		return;	/* Not a command ack. */
 
 	KASSERT(ring->queued == 0, ("ring->queued must be 0"));
 
 	data = &ring->data[desc->idx];
 	cmd = &ring->cmd[desc->idx];
 
 	/* If the command was mapped in an mbuf, free it. */
 	if (data->m != NULL) {
 		bus_dmamap_sync(ring->data_dmat, data->map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(ring->data_dmat, data->map);
 		m_freem(data->m);
 		data->m = NULL;
 	}
 
 	wakeup(cmd);
 
 	if (desc->type == WPI_CMD_SET_POWER_MODE) {
 		struct wpi_pmgt_cmd *pcmd = (struct wpi_pmgt_cmd *)cmd->data;
 
 		bus_dmamap_sync(ring->data_dmat, ring->cmd_dma.map,
 		    BUS_DMASYNC_POSTREAD);
 
 		WPI_TXQ_LOCK(sc);
 		if (le16toh(pcmd->flags) & WPI_PS_ALLOW_SLEEP) {
 			sc->sc_update_rx_ring = wpi_update_rx_ring_ps;
 			sc->sc_update_tx_ring = wpi_update_tx_ring_ps;
 		} else {
 			sc->sc_update_rx_ring = wpi_update_rx_ring;
 			sc->sc_update_tx_ring = wpi_update_tx_ring;
 		}
 		WPI_TXQ_UNLOCK(sc);
 	}
 }
 
 static void
 wpi_notif_intr(struct wpi_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	uint32_t hw;
 
 	bus_dmamap_sync(sc->shared_dma.tag, sc->shared_dma.map,
 	    BUS_DMASYNC_POSTREAD);
 
 	hw = le32toh(sc->shared->next) & 0xfff;
 	hw = (hw == 0) ? WPI_RX_RING_COUNT - 1 : hw - 1;
 
 	while (sc->rxq.cur != hw) {
 		sc->rxq.cur = (sc->rxq.cur + 1) % WPI_RX_RING_COUNT;
 
 		struct wpi_rx_data *data = &sc->rxq.data[sc->rxq.cur];
 		struct wpi_rx_desc *desc;
 
 		bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 		    BUS_DMASYNC_POSTREAD);
 		desc = mtod(data->m, struct wpi_rx_desc *);
 
 		DPRINTF(sc, WPI_DEBUG_NOTIFY,
 		    "%s: cur=%d; qid %x idx %d flags %x type %d(%s) len %d\n",
 		    __func__, sc->rxq.cur, desc->qid, desc->idx, desc->flags,
 		    desc->type, wpi_cmd_str(desc->type), le32toh(desc->len));
 
 		if (!(desc->qid & WPI_UNSOLICITED_RX_NOTIF)) {
 			/* Reply to a command. */
 			wpi_cmd_done(sc, desc);
 		}
 
 		switch (desc->type) {
 		case WPI_RX_DONE:
 			/* An 802.11 frame has been received. */
 			wpi_rx_done(sc, desc, data);
 
 			if (__predict_false(sc->sc_running == 0)) {
 				/* wpi_stop() was called. */
 				return;
 			}
 
 			break;
 
 		case WPI_TX_DONE:
 			/* An 802.11 frame has been transmitted. */
 			wpi_tx_done(sc, desc);
 			break;
 
 		case WPI_RX_STATISTICS:
 		case WPI_BEACON_STATISTICS:
 			wpi_rx_statistics(sc, desc, data);
 			break;
 
 		case WPI_BEACON_MISSED:
 		{
 			struct wpi_beacon_missed *miss =
 			    (struct wpi_beacon_missed *)(desc + 1);
 			uint32_t expected, misses, received, threshold;
 
 			bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 
 			misses = le32toh(miss->consecutive);
 			expected = le32toh(miss->expected);
 			received = le32toh(miss->received);
 			threshold = MAX(2, vap->iv_bmissthreshold);
 
 			DPRINTF(sc, WPI_DEBUG_BMISS,
 			    "%s: beacons missed %u(%u) (received %u/%u)\n",
 			    __func__, misses, le32toh(miss->total), received,
 			    expected);
 
 			if (misses >= threshold ||
 			    (received == 0 && expected >= threshold)) {
 				WPI_RXON_LOCK(sc);
 				if (callout_pending(&sc->scan_timeout)) {
 					wpi_cmd(sc, WPI_CMD_SCAN_ABORT, NULL,
 					    0, 1);
 				}
 				WPI_RXON_UNLOCK(sc);
 				if (vap->iv_state == IEEE80211_S_RUN &&
 				    (ic->ic_flags & IEEE80211_F_SCAN) == 0)
 					ieee80211_beacon_miss(ic);
 			}
 
 			break;
 		}
 #ifdef WPI_DEBUG
 		case WPI_BEACON_SENT:
 		{
 			struct wpi_tx_stat *stat =
 			    (struct wpi_tx_stat *)(desc + 1);
 			uint64_t *tsf = (uint64_t *)(stat + 1);
 			uint32_t *mode = (uint32_t *)(tsf + 1);
 
 			bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 
 			DPRINTF(sc, WPI_DEBUG_BEACON,
 			    "beacon sent: rts %u, ack %u, btkill %u, rate %u, "
 			    "duration %u, status %x, tsf %ju, mode %x\n",
 			    stat->rtsfailcnt, stat->ackfailcnt,
 			    stat->btkillcnt, stat->rate, le32toh(stat->duration),
 			    le32toh(stat->status), le64toh(*tsf),
 			    le32toh(*mode));
 
 			break;
 		}
 #endif
 		case WPI_UC_READY:
 		{
 			struct wpi_ucode_info *uc =
 			    (struct wpi_ucode_info *)(desc + 1);
 
 			/* The microcontroller is ready. */
 			bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 			DPRINTF(sc, WPI_DEBUG_RESET,
 			    "microcode alive notification version=%d.%d "
 			    "subtype=%x alive=%x\n", uc->major, uc->minor,
 			    uc->subtype, le32toh(uc->valid));
 
 			if (le32toh(uc->valid) != 1) {
 				device_printf(sc->sc_dev,
 				    "microcontroller initialization failed\n");
 				wpi_stop_locked(sc);
 				return;
 			}
 			/* Save the address of the error log in SRAM. */
 			sc->errptr = le32toh(uc->errptr);
 			break;
 		}
 		case WPI_STATE_CHANGED:
 		{
 			bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 
 			uint32_t *status = (uint32_t *)(desc + 1);
 
 			DPRINTF(sc, WPI_DEBUG_STATE, "state changed to %x\n",
 			    le32toh(*status));
 
 			if (le32toh(*status) & 1) {
 				WPI_NT_LOCK(sc);
 				wpi_clear_node_table(sc);
 				WPI_NT_UNLOCK(sc);
 				taskqueue_enqueue(sc->sc_tq,
 				    &sc->sc_radiooff_task);
 				return;
 			}
 			break;
 		}
 #ifdef WPI_DEBUG
 		case WPI_START_SCAN:
 		{
 			bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 
 			struct wpi_start_scan *scan =
 			    (struct wpi_start_scan *)(desc + 1);
 			DPRINTF(sc, WPI_DEBUG_SCAN,
 			    "%s: scanning channel %d status %x\n",
 			    __func__, scan->chan, le32toh(scan->status));
 
 			break;
 		}
 #endif
 		case WPI_STOP_SCAN:
 		{
 			bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 
 			struct wpi_stop_scan *scan =
 			    (struct wpi_stop_scan *)(desc + 1);
 
 			DPRINTF(sc, WPI_DEBUG_SCAN,
 			    "scan finished nchan=%d status=%d chan=%d\n",
 			    scan->nchan, scan->status, scan->chan);
 
 			WPI_RXON_LOCK(sc);
 			callout_stop(&sc->scan_timeout);
 			WPI_RXON_UNLOCK(sc);
 			if (scan->status == WPI_SCAN_ABORTED)
 				ieee80211_cancel_scan(vap);
 			else
 				ieee80211_scan_next(vap);
 			break;
 		}
 		}
 
 		if (sc->rxq.cur % 8 == 0) {
 			/* Tell the firmware what we have processed. */
 			sc->sc_update_rx_ring(sc);
 		}
 	}
 }
 
 /*
  * Process an INT_WAKEUP interrupt raised when the microcontroller wakes up
  * from power-down sleep mode.
  */
 static void
 wpi_wakeup_intr(struct wpi_softc *sc)
 {
 	int qid;
 
 	DPRINTF(sc, WPI_DEBUG_PWRSAVE,
 	    "%s: ucode wakeup from power-down sleep\n", __func__);
 
 	/* Wakeup RX and TX rings. */
 	if (sc->rxq.update) {
 		sc->rxq.update = 0;
 		wpi_update_rx_ring(sc);
 	}
 	WPI_TXQ_LOCK(sc);
 	for (qid = 0; qid < WPI_DRV_NTXQUEUES; qid++) {
 		struct wpi_tx_ring *ring = &sc->txq[qid];
 
 		if (ring->update) {
 			ring->update = 0;
 			wpi_update_tx_ring(sc, ring);
 		}
 	}
 	WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ);
 	WPI_TXQ_UNLOCK(sc);
 }
 
 /*
  * This function prints firmware registers
  */
 #ifdef WPI_DEBUG
 static void
 wpi_debug_registers(struct wpi_softc *sc)
 {
 	size_t i;
 	static const uint32_t csr_tbl[] = {
 		WPI_HW_IF_CONFIG,
 		WPI_INT,
 		WPI_INT_MASK,
 		WPI_FH_INT,
 		WPI_GPIO_IN,
 		WPI_RESET,
 		WPI_GP_CNTRL,
 		WPI_EEPROM,
 		WPI_EEPROM_GP,
 		WPI_GIO,
 		WPI_UCODE_GP1,
 		WPI_UCODE_GP2,
 		WPI_GIO_CHICKEN,
 		WPI_ANA_PLL,
 		WPI_DBG_HPET_MEM,
 	};
 	static const uint32_t prph_tbl[] = {
 		WPI_APMG_CLK_CTRL,
 		WPI_APMG_PS,
 		WPI_APMG_PCI_STT,
 		WPI_APMG_RFKILL,
 	};
 
 	DPRINTF(sc, WPI_DEBUG_REGISTER,"%s","\n");
 
 	for (i = 0; i < nitems(csr_tbl); i++) {
 		DPRINTF(sc, WPI_DEBUG_REGISTER, "  %-18s: 0x%08x ",
 		    wpi_get_csr_string(csr_tbl[i]), WPI_READ(sc, csr_tbl[i]));
 
 		if ((i + 1) % 2 == 0)
 			DPRINTF(sc, WPI_DEBUG_REGISTER, "\n");
 	}
 	DPRINTF(sc, WPI_DEBUG_REGISTER, "\n\n");
 
 	if (wpi_nic_lock(sc) == 0) {
 		for (i = 0; i < nitems(prph_tbl); i++) {
 			DPRINTF(sc, WPI_DEBUG_REGISTER, "  %-18s: 0x%08x ",
 			    wpi_get_prph_string(prph_tbl[i]),
 			    wpi_prph_read(sc, prph_tbl[i]));
 
 			if ((i + 1) % 2 == 0)
 				DPRINTF(sc, WPI_DEBUG_REGISTER, "\n");
 		}
 		DPRINTF(sc, WPI_DEBUG_REGISTER, "\n");
 		wpi_nic_unlock(sc);
 	} else {
 		DPRINTF(sc, WPI_DEBUG_REGISTER,
 		    "Cannot access internal registers.\n");
 	}
 }
 #endif
 
 /*
  * Dump the error log of the firmware when a firmware panic occurs.  Although
  * we can't debug the firmware because it is neither open source nor free, it
  * can help us to identify certain classes of problems.
  */
 static void
 wpi_fatal_intr(struct wpi_softc *sc)
 {
 	struct wpi_fw_dump dump;
 	uint32_t i, offset, count;
 
 	/* Check that the error log address is valid. */
 	if (sc->errptr < WPI_FW_DATA_BASE ||
 	    sc->errptr + sizeof (dump) >
 	    WPI_FW_DATA_BASE + WPI_FW_DATA_MAXSZ) {
 		printf("%s: bad firmware error log address 0x%08x\n", __func__,
 		    sc->errptr);
 		return;
 	}
 	if (wpi_nic_lock(sc) != 0) {
 		printf("%s: could not read firmware error log\n", __func__);
 		return;
 	}
 	/* Read number of entries in the log. */
 	count = wpi_mem_read(sc, sc->errptr);
 	if (count == 0 || count * sizeof (dump) > WPI_FW_DATA_MAXSZ) {
 		printf("%s: invalid count field (count = %u)\n", __func__,
 		    count);
 		wpi_nic_unlock(sc);
 		return;
 	}
 	/* Skip "count" field. */
 	offset = sc->errptr + sizeof (uint32_t);
 	printf("firmware error log (count = %u):\n", count);
 	for (i = 0; i < count; i++) {
 		wpi_mem_read_region_4(sc, offset, (uint32_t *)&dump,
 		    sizeof (dump) / sizeof (uint32_t));
 
 		printf("  error type = \"%s\" (0x%08X)\n",
 		    (dump.desc < nitems(wpi_fw_errmsg)) ?
 		        wpi_fw_errmsg[dump.desc] : "UNKNOWN",
 		    dump.desc);
 		printf("  error data      = 0x%08X\n",
 		    dump.data);
 		printf("  branch link     = 0x%08X%08X\n",
 		    dump.blink[0], dump.blink[1]);
 		printf("  interrupt link  = 0x%08X%08X\n",
 		    dump.ilink[0], dump.ilink[1]);
 		printf("  time            = %u\n", dump.time);
 
 		offset += sizeof (dump);
 	}
 	wpi_nic_unlock(sc);
 	/* Dump driver status (TX and RX rings) while we're here. */
 	printf("driver status:\n");
 	WPI_TXQ_LOCK(sc);
 	for (i = 0; i < WPI_DRV_NTXQUEUES; i++) {
 		struct wpi_tx_ring *ring = &sc->txq[i];
 		printf("  tx ring %2d: qid=%-2d cur=%-3d queued=%-3d\n",
 		    i, ring->qid, ring->cur, ring->queued);
 	}
 	WPI_TXQ_UNLOCK(sc);
 	printf("  rx ring: cur=%d\n", sc->rxq.cur);
 }
 
 static void
 wpi_intr(void *arg)
 {
 	struct wpi_softc *sc = arg;
 	uint32_t r1, r2;
 
 	WPI_LOCK(sc);
 
 	/* Disable interrupts. */
 	WPI_WRITE(sc, WPI_INT_MASK, 0);
 
 	r1 = WPI_READ(sc, WPI_INT);
 
 	if (__predict_false(r1 == 0xffffffff ||
 			   (r1 & 0xfffffff0) == 0xa5a5a5a0))
 		goto end;	/* Hardware gone! */
 
 	r2 = WPI_READ(sc, WPI_FH_INT);
 
 	DPRINTF(sc, WPI_DEBUG_INTR, "%s: reg1=0x%08x reg2=0x%08x\n", __func__,
 	    r1, r2);
 
 	if (r1 == 0 && r2 == 0)
 		goto done;	/* Interrupt not for us. */
 
 	/* Acknowledge interrupts. */
 	WPI_WRITE(sc, WPI_INT, r1);
 	WPI_WRITE(sc, WPI_FH_INT, r2);
 
 	if (__predict_false(r1 & (WPI_INT_SW_ERR | WPI_INT_HW_ERR))) {
 		device_printf(sc->sc_dev, "fatal firmware error\n");
 #ifdef WPI_DEBUG
 		wpi_debug_registers(sc);
 #endif
 		wpi_fatal_intr(sc);
 		DPRINTF(sc, WPI_DEBUG_HW,
 		    "(%s)\n", (r1 & WPI_INT_SW_ERR) ? "(Software Error)" :
 		    "(Hardware Error)");
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_reinittask);
 		goto end;
 	}
 
 	if ((r1 & (WPI_INT_FH_RX | WPI_INT_SW_RX)) ||
 	    (r2 & WPI_FH_INT_RX))
 		wpi_notif_intr(sc);
 
 	if (r1 & WPI_INT_ALIVE)
 		wakeup(sc);	/* Firmware is alive. */
 
 	if (r1 & WPI_INT_WAKEUP)
 		wpi_wakeup_intr(sc);
 
 done:
 	/* Re-enable interrupts. */
 	if (__predict_true(sc->sc_running))
 		WPI_WRITE(sc, WPI_INT_MASK, WPI_INT_MASK_DEF);
 
 end:	WPI_UNLOCK(sc);
 }
 
 static void
 wpi_free_txfrags(struct wpi_softc *sc, uint16_t ac)
 {
 	struct wpi_tx_ring *ring;
 	struct wpi_tx_data *data;
 	uint8_t cur;
 
 	WPI_TXQ_LOCK(sc);
 	ring = &sc->txq[ac];
 
 	while (ring->pending != 0) {
 		ring->pending--;
 		cur = (ring->cur + ring->pending) % WPI_TX_RING_COUNT;
 		data = &ring->data[cur];
 
 		bus_dmamap_sync(ring->data_dmat, data->map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(ring->data_dmat, data->map);
 		m_freem(data->m);
 		data->m = NULL;
 
 		ieee80211_node_decref(data->ni);
 		data->ni = NULL;
 	}
 
 	WPI_TXQ_UNLOCK(sc);
 }
 
 static int
 wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf)
 {
 	struct ieee80211_frame *wh;
 	struct wpi_tx_cmd *cmd;
 	struct wpi_tx_data *data;
 	struct wpi_tx_desc *desc;
 	struct wpi_tx_ring *ring;
 	struct mbuf *m1;
 	bus_dma_segment_t *seg, segs[WPI_MAX_SCATTER];
 	uint8_t cur, pad;
 	uint16_t hdrlen;
 	int error, i, nsegs, totlen, frag;
 
 	WPI_TXQ_LOCK(sc);
 
 	KASSERT(buf->size <= sizeof(buf->data), ("buffer overflow"));
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	if (__predict_false(sc->sc_running == 0)) {
 		/* wpi_stop() was called */
 		error = ENETDOWN;
 		goto end;
 	}
 
 	wh = mtod(buf->m, struct ieee80211_frame *);
 	hdrlen = ieee80211_anyhdrsize(wh);
 	totlen = buf->m->m_pkthdr.len;
 	frag = ((buf->m->m_flags & (M_FRAG | M_LASTFRAG)) == M_FRAG);
 
 	if (__predict_false(totlen < sizeof(struct ieee80211_frame_min))) {
 		error = EINVAL;
 		goto end;
 	}
 
 	if (hdrlen & 3) {
 		/* First segment length must be a multiple of 4. */
 		pad = 4 - (hdrlen & 3);
 	} else
 		pad = 0;
 
 	ring = &sc->txq[buf->ac];
 	cur = (ring->cur + ring->pending) % WPI_TX_RING_COUNT;
 	desc = &ring->desc[cur];
 	data = &ring->data[cur];
 
 	/* Prepare TX firmware command. */
 	cmd = &ring->cmd[cur];
 	cmd->code = buf->code;
 	cmd->flags = 0;
 	cmd->qid = ring->qid;
 	cmd->idx = cur;
 
 	memcpy(cmd->data, buf->data, buf->size);
 
 	/* Save and trim IEEE802.11 header. */
 	memcpy((uint8_t *)(cmd->data + buf->size), wh, hdrlen);
 	m_adj(buf->m, hdrlen);
 
 	error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, buf->m,
 	    segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0 && error != EFBIG) {
 		device_printf(sc->sc_dev,
 		    "%s: can't map mbuf (error %d)\n", __func__, error);
 		goto end;
 	}
 	if (error != 0) {
 		/* Too many DMA segments, linearize mbuf. */
 		m1 = m_collapse(buf->m, M_NOWAIT, WPI_MAX_SCATTER - 1);
 		if (m1 == NULL) {
 			device_printf(sc->sc_dev,
 			    "%s: could not defrag mbuf\n", __func__);
 			error = ENOBUFS;
 			goto end;
 		}
 		buf->m = m1;
 
 		error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map,
 		    buf->m, segs, &nsegs, BUS_DMA_NOWAIT);
 		if (__predict_false(error != 0)) {
 			/* XXX fix this (applicable to the iwn(4) too) */
 			/*
 			 * NB: Do not return error;
 			 * original mbuf does not exist anymore.
 			 */
 			device_printf(sc->sc_dev,
 			    "%s: can't map mbuf (error %d)\n", __func__,
 			    error);
 			if (ring->qid < WPI_CMD_QUEUE_NUM) {
 				if_inc_counter(buf->ni->ni_vap->iv_ifp,
 				    IFCOUNTER_OERRORS, 1);
 				if (!frag)
 					ieee80211_free_node(buf->ni);
 			}
 			m_freem(buf->m);
 			error = 0;
 			goto end;
 		}
 	}
 
 	KASSERT(nsegs < WPI_MAX_SCATTER,
 	    ("too many DMA segments, nsegs (%d) should be less than %d",
 	     nsegs, WPI_MAX_SCATTER));
 
 	data->m = buf->m;
 	data->ni = buf->ni;
 
 	DPRINTF(sc, WPI_DEBUG_XMIT, "%s: qid %d idx %d len %d nsegs %d\n",
 	    __func__, ring->qid, cur, totlen, nsegs);
 
 	/* Fill TX descriptor. */
 	desc->nsegs = WPI_PAD32(totlen + pad) << 4 | (1 + nsegs);
 	/* First DMA segment is used by the TX command. */
 	desc->segs[0].addr = htole32(data->cmd_paddr);
 	desc->segs[0].len  = htole32(4 + buf->size + hdrlen + pad);
 	/* Other DMA segments are for data payload. */
 	seg = &segs[0];
 	for (i = 1; i <= nsegs; i++) {
 		desc->segs[i].addr = htole32(seg->ds_addr);
 		desc->segs[i].len  = htole32(seg->ds_len);
 		seg++;
 	}
 
 	bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(ring->data_dmat, ring->cmd_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 	ring->pending += 1;
 
 	if (!frag) {
 		if (ring->qid < WPI_CMD_QUEUE_NUM) {
 			WPI_TXQ_STATE_LOCK(sc);
 			ring->queued += ring->pending;
 			callout_reset(&sc->tx_timeout, 5*hz, wpi_tx_timeout,
 			    sc);
 			WPI_TXQ_STATE_UNLOCK(sc);
 		}
 
 		/* Kick TX ring. */
 		ring->cur = (ring->cur + ring->pending) % WPI_TX_RING_COUNT;
 		ring->pending = 0;
 		sc->sc_update_tx_ring(sc, ring);
 	} else
 		ieee80211_node_incref(data->ni);
 
 end:	DPRINTF(sc, WPI_DEBUG_TRACE, error ? TRACE_STR_END_ERR : TRACE_STR_END,
 	    __func__);
 
 	WPI_TXQ_UNLOCK(sc);
 
 	return (error);
 }
 
 /*
  * Construct the data packet for a transmit buffer.
  */
 static int
 wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
 {
 	const struct ieee80211_txparam *tp;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct wpi_node *wn = WPI_NODE(ni);
 	struct ieee80211_channel *chan;
 	struct ieee80211_frame *wh;
 	struct ieee80211_key *k = NULL;
 	struct wpi_buf tx_data;
 	struct wpi_cmd_data *tx = (struct wpi_cmd_data *)&tx_data.data;
 	uint32_t flags;
 	uint16_t ac, qos;
 	uint8_t tid, type, rate;
 	int swcrypt, ismcast, totlen;
 
 	wh = mtod(m, struct ieee80211_frame *);
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
 	swcrypt = 1;
 
 	/* Select EDCA Access Category and TX ring for this frame. */
 	if (IEEE80211_QOS_HAS_SEQ(wh)) {
 		qos = ((const struct ieee80211_qosframe *)wh)->i_qos[0];
 		tid = qos & IEEE80211_QOS_TID;
 	} else {
 		qos = 0;
 		tid = 0;
 	}
 	ac = M_WME_GETAC(m);
 
 	chan = (ni->ni_chan != IEEE80211_CHAN_ANYC) ?
 		ni->ni_chan : ic->ic_curchan;
 	tp = &vap->iv_txparms[ieee80211_chan2mode(chan)];
 
 	/* Choose a TX rate index. */
 	if (type == IEEE80211_FC0_TYPE_MGT)
 		rate = tp->mgmtrate;
 	else if (ismcast)
 		rate = tp->mcastrate;
 	else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE)
 		rate = tp->ucastrate;
 	else if (m->m_flags & M_EAPOL)
 		rate = tp->mgmtrate;
 	else {
 		/* XXX pass pktlen */
 		(void) ieee80211_ratectl_rate(ni, NULL, 0);
 		rate = ni->ni_txrate;
 	}
 
 	/* Encrypt the frame if need be. */
 	if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
 		/* Retrieve key for TX. */
 		k = ieee80211_crypto_encap(ni, m);
 		if (k == NULL)
 			return (ENOBUFS);
 
 		swcrypt = k->wk_flags & IEEE80211_KEY_SWCRYPT;
 
 		/* 802.11 header may have moved. */
 		wh = mtod(m, struct ieee80211_frame *);
 	}
 	totlen = m->m_pkthdr.len;
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		struct wpi_tx_radiotap_header *tap = &sc->sc_txtap;
 
 		tap->wt_flags = 0;
 		tap->wt_rate = rate;
 		if (k != NULL)
 			tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP;
 		if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG)
 			tap->wt_flags |= IEEE80211_RADIOTAP_F_FRAG;
 
 		ieee80211_radiotap_tx(vap, m);
 	}
 
 	flags = 0;
 	if (!ismcast) {
 		/* Unicast frame, check if an ACK is expected. */
 		if (!qos || (qos & IEEE80211_QOS_ACKPOLICY) !=
 		    IEEE80211_QOS_ACKPOLICY_NOACK)
 			flags |= WPI_TX_NEED_ACK;
 	}
 
 	if (!IEEE80211_QOS_HAS_SEQ(wh))
 		flags |= WPI_TX_AUTO_SEQ;
 	if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG)
 		flags |= WPI_TX_MORE_FRAG;
 
 	/* Check if frame must be protected using RTS/CTS or CTS-to-self. */
 	if (!ismcast) {
 		/* NB: Group frames are sent using CCK in 802.11b/g. */
 		if (totlen + IEEE80211_CRC_LEN > vap->iv_rtsthreshold) {
 			flags |= WPI_TX_NEED_RTS;
 		} else if ((ic->ic_flags & IEEE80211_F_USEPROT) &&
 		    WPI_RATE_IS_OFDM(rate)) {
 			if (ic->ic_protmode == IEEE80211_PROT_CTSONLY)
 				flags |= WPI_TX_NEED_CTS;
 			else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS)
 				flags |= WPI_TX_NEED_RTS;
 		}
 
 		if (flags & (WPI_TX_NEED_RTS | WPI_TX_NEED_CTS))
 			flags |= WPI_TX_FULL_TXOP;
 	}
 
 	memset(tx, 0, sizeof (struct wpi_cmd_data));
 	if (type == IEEE80211_FC0_TYPE_MGT) {
 		uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 
 		/* Tell HW to set timestamp in probe responses. */
 		if (subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP)
 			flags |= WPI_TX_INSERT_TSTAMP;
 		if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ ||
 		    subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ)
 			tx->timeout = htole16(3);
 		else
 			tx->timeout = htole16(2);
 	}
 
 	if (ismcast || type != IEEE80211_FC0_TYPE_DATA)
 		tx->id = WPI_ID_BROADCAST;
 	else {
 		if (wn->id == WPI_ID_UNDEFINED) {
 			device_printf(sc->sc_dev,
 			    "%s: undefined node id\n", __func__);
 			return (EINVAL);
 		}
 
 		tx->id = wn->id;
 	}
 
 	if (!swcrypt) {
 		switch (k->wk_cipher->ic_cipher) {
 		case IEEE80211_CIPHER_AES_CCM:
 			tx->security = WPI_CIPHER_CCMP;
 			break;
 
 		default:
 			break;
 		}
 
 		memcpy(tx->key, k->wk_key, k->wk_keylen);
 	}
 
 	if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) {
 		struct mbuf *next = m->m_nextpkt;
 
 		tx->lnext = htole16(next->m_pkthdr.len);
 		tx->fnext = htole32(tx->security |
 				    (flags & WPI_TX_NEED_ACK) |
 				    WPI_NEXT_STA_ID(tx->id));
 	}
 
 	tx->len = htole16(totlen);
 	tx->flags = htole32(flags);
 	tx->plcp = rate2plcp(rate);
 	tx->tid = tid;
 	tx->lifetime = htole32(WPI_LIFETIME_INFINITE);
 	tx->ofdm_mask = 0xff;
 	tx->cck_mask = 0x0f;
 	tx->rts_ntries = 7;
 	tx->data_ntries = tp->maxretry;
 
 	tx_data.ni = ni;
 	tx_data.m = m;
 	tx_data.size = sizeof(struct wpi_cmd_data);
 	tx_data.code = WPI_CMD_TX_DATA;
 	tx_data.ac = ac;
 
 	return wpi_cmd2(sc, &tx_data);
 }
 
 static int
 wpi_tx_data_raw(struct wpi_softc *sc, struct mbuf *m,
     struct ieee80211_node *ni, const struct ieee80211_bpf_params *params)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_key *k = NULL;
 	struct ieee80211_frame *wh;
 	struct wpi_buf tx_data;
 	struct wpi_cmd_data *tx = (struct wpi_cmd_data *)&tx_data.data;
 	uint32_t flags;
 	uint8_t ac, type, rate;
 	int swcrypt, totlen;
 
 	wh = mtod(m, struct ieee80211_frame *);
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 	swcrypt = 1;
 
 	ac = params->ibp_pri & 3;
 
 	/* Choose a TX rate index. */
 	rate = params->ibp_rate0;
 
 	flags = 0;
 	if (!IEEE80211_QOS_HAS_SEQ(wh))
 		flags |= WPI_TX_AUTO_SEQ;
 	if ((params->ibp_flags & IEEE80211_BPF_NOACK) == 0)
 		flags |= WPI_TX_NEED_ACK;
 	if (params->ibp_flags & IEEE80211_BPF_RTS)
 		flags |= WPI_TX_NEED_RTS;
 	if (params->ibp_flags & IEEE80211_BPF_CTS)
 		flags |= WPI_TX_NEED_CTS;
 	if (flags & (WPI_TX_NEED_RTS | WPI_TX_NEED_CTS))
 		flags |= WPI_TX_FULL_TXOP;
 
 	/* Encrypt the frame if need be. */
 	if (params->ibp_flags & IEEE80211_BPF_CRYPTO) {
 		/* Retrieve key for TX. */
 		k = ieee80211_crypto_encap(ni, m);
 		if (k == NULL)
 			return (ENOBUFS);
 
 		swcrypt = k->wk_flags & IEEE80211_KEY_SWCRYPT;
 
 		/* 802.11 header may have moved. */
 		wh = mtod(m, struct ieee80211_frame *);
 	}
 	totlen = m->m_pkthdr.len;
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		struct wpi_tx_radiotap_header *tap = &sc->sc_txtap;
 
 		tap->wt_flags = 0;
 		tap->wt_rate = rate;
 		if (params->ibp_flags & IEEE80211_BPF_CRYPTO)
 			tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP;
 
 		ieee80211_radiotap_tx(vap, m);
 	}
 
 	memset(tx, 0, sizeof (struct wpi_cmd_data));
 	if (type == IEEE80211_FC0_TYPE_MGT) {
 		uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 
 		/* Tell HW to set timestamp in probe responses. */
 		if (subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP)
 			flags |= WPI_TX_INSERT_TSTAMP;
 		if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ ||
 		    subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ)
 			tx->timeout = htole16(3);
 		else
 			tx->timeout = htole16(2);
 	}
 
 	if (!swcrypt) {
 		switch (k->wk_cipher->ic_cipher) {
 		case IEEE80211_CIPHER_AES_CCM:
 			tx->security = WPI_CIPHER_CCMP;
 			break;
 
 		default:
 			break;
 		}
 
 		memcpy(tx->key, k->wk_key, k->wk_keylen);
 	}
 
 	tx->len = htole16(totlen);
 	tx->flags = htole32(flags);
 	tx->plcp = rate2plcp(rate);
 	tx->id = WPI_ID_BROADCAST;
 	tx->lifetime = htole32(WPI_LIFETIME_INFINITE);
 	tx->rts_ntries = params->ibp_try1;
 	tx->data_ntries = params->ibp_try0;
 
 	tx_data.ni = ni;
 	tx_data.m = m;
 	tx_data.size = sizeof(struct wpi_cmd_data);
 	tx_data.code = WPI_CMD_TX_DATA;
 	tx_data.ac = ac;
 
 	return wpi_cmd2(sc, &tx_data);
 }
 
 static __inline int
 wpi_tx_ring_free_space(struct wpi_softc *sc, uint16_t ac)
 {
 	struct wpi_tx_ring *ring = &sc->txq[ac];
 	int retval;
 
 	WPI_TXQ_STATE_LOCK(sc);
 	retval = WPI_TX_RING_HIMARK - ring->queued;
 	WPI_TXQ_STATE_UNLOCK(sc);
 
 	return retval;
 }
 
 static int
 wpi_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
     const struct ieee80211_bpf_params *params)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct wpi_softc *sc = ic->ic_softc;
 	uint16_t ac;
 	int error = 0;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	ac = M_WME_GETAC(m);
 
 	WPI_TX_LOCK(sc);
 
 	/* NB: no fragments here */
 	if (sc->sc_running == 0 || wpi_tx_ring_free_space(sc, ac) < 1) {
 		error = sc->sc_running ? ENOBUFS : ENETDOWN;
 		goto unlock;
 	}
 
 	if (params == NULL) {
 		/*
 		 * Legacy path; interpret frame contents to decide
 		 * precisely how to send the frame.
 		 */
 		error = wpi_tx_data(sc, m, ni);
 	} else {
 		/*
 		 * Caller supplied explicit parameters to use in
 		 * sending the frame.
 		 */
 		error = wpi_tx_data_raw(sc, m, ni, params);
 	}
 
 unlock:	WPI_TX_UNLOCK(sc);
 
 	if (error != 0) {
 		m_freem(m);
 		DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 
 		return error;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 }
 
 static int
 wpi_transmit(struct ieee80211com *ic, struct mbuf *m)
 {
 	struct wpi_softc *sc = ic->ic_softc;
 	struct ieee80211_node *ni;
 	struct mbuf *mnext;
 	uint16_t ac;
 	int error, nmbufs;
 
 	WPI_TX_LOCK(sc);
 	DPRINTF(sc, WPI_DEBUG_XMIT, "%s: called\n", __func__);
 
 	/* Check if interface is up & running. */
 	if (__predict_false(sc->sc_running == 0)) {
 		error = ENXIO;
 		goto unlock;
 	}
 
 	nmbufs = 1;
 	for (mnext = m->m_nextpkt; mnext != NULL; mnext = mnext->m_nextpkt)
 		nmbufs++;
 
 	/* Check for available space. */
 	ac = M_WME_GETAC(m);
 	if (wpi_tx_ring_free_space(sc, ac) < nmbufs) {
 		error = ENOBUFS;
 		goto unlock;
 	}
 
 	error = 0;
 	ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 	do {
 		mnext = m->m_nextpkt;
 		if (wpi_tx_data(sc, m, ni) != 0) {
 			if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS,
 			    nmbufs);
 			wpi_free_txfrags(sc, ac);
 			ieee80211_free_mbuf(m);
 			ieee80211_free_node(ni);
 			break;
 		}
 	} while((m = mnext) != NULL);
 
 	DPRINTF(sc, WPI_DEBUG_XMIT, "%s: done\n", __func__);
 
 unlock:	WPI_TX_UNLOCK(sc);
 
 	return (error);
 }
 
 static void
 wpi_watchdog_rfkill(void *arg)
 {
 	struct wpi_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	DPRINTF(sc, WPI_DEBUG_WATCHDOG, "RFkill Watchdog: tick\n");
 
 	/* No need to lock firmware memory. */
 	if ((wpi_prph_read(sc, WPI_APMG_RFKILL) & 0x1) == 0) {
 		/* Radio kill switch is still off. */
 		callout_reset(&sc->watchdog_rfkill, hz, wpi_watchdog_rfkill,
 		    sc);
 	} else
 		ieee80211_runtask(ic, &sc->sc_radioon_task);
 }
 
 static void
 wpi_scan_timeout(void *arg)
 {
 	struct wpi_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	ic_printf(ic, "scan timeout\n");
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_reinittask);
 }
 
 static void
 wpi_tx_timeout(void *arg)
 {
 	struct wpi_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	ic_printf(ic, "device timeout\n");
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_reinittask);
 }
 
 static void
 wpi_parent(struct ieee80211com *ic)
 {
 	struct wpi_softc *sc = ic->ic_softc;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	if (ic->ic_nrunning > 0) {
 		if (wpi_init(sc) == 0) {
 			ieee80211_notify_radio(ic, 1);
 			ieee80211_start_all(ic);
 		} else {
 			ieee80211_notify_radio(ic, 0);
 			ieee80211_stop(vap);
 		}
 	} else
 		wpi_stop(sc);
 }
 
 /*
  * Send a command to the firmware.
  */
 static int
 wpi_cmd(struct wpi_softc *sc, uint8_t code, const void *buf, uint16_t size,
     int async)
 {
 	struct wpi_tx_ring *ring = &sc->txq[WPI_CMD_QUEUE_NUM];
 	struct wpi_tx_desc *desc;
 	struct wpi_tx_data *data;
 	struct wpi_tx_cmd *cmd;
 	struct mbuf *m;
 	bus_addr_t paddr;
 	uint16_t totlen;
 	int error;
 
 	WPI_TXQ_LOCK(sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	if (__predict_false(sc->sc_running == 0)) {
 		/* wpi_stop() was called */
 		if (code == WPI_CMD_SCAN)
 			error = ENETDOWN;
 		else
 			error = 0;
 
 		goto fail;
 	}
 
 	if (async == 0)
 		WPI_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, WPI_DEBUG_CMD, "%s: cmd %s size %u async %d\n",
 	    __func__, wpi_cmd_str(code), size, async);
 
 	desc = &ring->desc[ring->cur];
 	data = &ring->data[ring->cur];
 	totlen = 4 + size;
 
 	if (size > sizeof cmd->data) {
 		/* Command is too large to fit in a descriptor. */
 		if (totlen > MCLBYTES) {
 			error = EINVAL;
 			goto fail;
 		}
 		m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
 		if (m == NULL) {
 			error = ENOMEM;
 			goto fail;
 		}
 		cmd = mtod(m, struct wpi_tx_cmd *);
 		error = bus_dmamap_load(ring->data_dmat, data->map, cmd,
 		    totlen, wpi_dma_map_addr, &paddr, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(m);
 			goto fail;
 		}
 		data->m = m;
 	} else {
 		cmd = &ring->cmd[ring->cur];
 		paddr = data->cmd_paddr;
 	}
 
 	cmd->code = code;
 	cmd->flags = 0;
 	cmd->qid = ring->qid;
 	cmd->idx = ring->cur;
 	memcpy(cmd->data, buf, size);
 
 	desc->nsegs = 1 + (WPI_PAD32(size) << 4);
 	desc->segs[0].addr = htole32(paddr);
 	desc->segs[0].len  = htole32(totlen);
 
 	if (size > sizeof cmd->data) {
 		bus_dmamap_sync(ring->data_dmat, data->map,
 		    BUS_DMASYNC_PREWRITE);
 	} else {
 		bus_dmamap_sync(ring->data_dmat, ring->cmd_dma.map,
 		    BUS_DMASYNC_PREWRITE);
 	}
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 	/* Kick command ring. */
 	ring->cur = (ring->cur + 1) % WPI_TX_RING_COUNT;
 	sc->sc_update_tx_ring(sc, ring);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	WPI_TXQ_UNLOCK(sc);
 
 	return async ? 0 : mtx_sleep(cmd, &sc->sc_mtx, PCATCH, "wpicmd", hz);
 
 fail:	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 
 	WPI_TXQ_UNLOCK(sc);
 
 	return error;
 }
 
 /*
  * Configure HW multi-rate retries.
  */
 static int
 wpi_mrr_setup(struct wpi_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct wpi_mrr_setup mrr;
 	uint8_t i;
 	int error;
 
 	/* CCK rates (not used with 802.11a). */
 	for (i = WPI_RIDX_CCK1; i <= WPI_RIDX_CCK11; i++) {
 		mrr.rates[i].flags = 0;
 		mrr.rates[i].plcp = wpi_ridx_to_plcp[i];
 		/* Fallback to the immediate lower CCK rate (if any.) */
 		mrr.rates[i].next =
 		    (i == WPI_RIDX_CCK1) ? WPI_RIDX_CCK1 : i - 1;
 		/* Try twice at this rate before falling back to "next". */
 		mrr.rates[i].ntries = WPI_NTRIES_DEFAULT;
 	}
 	/* OFDM rates (not used with 802.11b). */
 	for (i = WPI_RIDX_OFDM6; i <= WPI_RIDX_OFDM54; i++) {
 		mrr.rates[i].flags = 0;
 		mrr.rates[i].plcp = wpi_ridx_to_plcp[i];
 		/* Fallback to the immediate lower rate (if any.) */
 		/* We allow fallback from OFDM/6 to CCK/2 in 11b/g mode. */
 		mrr.rates[i].next = (i == WPI_RIDX_OFDM6) ?
 		    ((ic->ic_curmode == IEEE80211_MODE_11A) ?
 			WPI_RIDX_OFDM6 : WPI_RIDX_CCK2) :
 		    i - 1;
 		/* Try twice at this rate before falling back to "next". */
 		mrr.rates[i].ntries = WPI_NTRIES_DEFAULT;
 	}
 	/* Setup MRR for control frames. */
 	mrr.which = htole32(WPI_MRR_CTL);
 	error = wpi_cmd(sc, WPI_CMD_MRR_SETUP, &mrr, sizeof mrr, 0);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not setup MRR for control frames\n");
 		return error;
 	}
 	/* Setup MRR for data frames. */
 	mrr.which = htole32(WPI_MRR_DATA);
 	error = wpi_cmd(sc, WPI_CMD_MRR_SETUP, &mrr, sizeof mrr, 0);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not setup MRR for data frames\n");
 		return error;
 	}
 	return 0;
 }
 
 static int
 wpi_add_node(struct wpi_softc *sc, struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct wpi_vap *wvp = WPI_VAP(ni->ni_vap);
 	struct wpi_node *wn = WPI_NODE(ni);
 	struct wpi_node_info node;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (wn->id == WPI_ID_UNDEFINED)
 		return EINVAL;
 
 	memset(&node, 0, sizeof node);
 	IEEE80211_ADDR_COPY(node.macaddr, ni->ni_macaddr);
 	node.id = wn->id;
 	node.plcp = (ic->ic_curmode == IEEE80211_MODE_11A) ?
 	    wpi_ridx_to_plcp[WPI_RIDX_OFDM6] : wpi_ridx_to_plcp[WPI_RIDX_CCK1];
 	node.action = htole32(WPI_ACTION_SET_RATE);
 	node.antenna = WPI_ANTENNA_BOTH;
 
 	DPRINTF(sc, WPI_DEBUG_NODE, "%s: adding node %d (%s)\n", __func__,
 	    wn->id, ether_sprintf(ni->ni_macaddr));
 
 	error = wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, 1);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: wpi_cmd() call failed with error code %d\n", __func__,
 		    error);
 		return error;
 	}
 
 	if (wvp->wv_gtk != 0) {
 		error = wpi_set_global_keys(ni);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: error while setting global keys\n", __func__);
 			return ENXIO;
 		}
 	}
 
 	return 0;
 }
 
 /*
  * Broadcast node is used to send group-addressed and management frames.
  */
 static int
 wpi_add_broadcast_node(struct wpi_softc *sc, int async)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct wpi_node_info node;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	memset(&node, 0, sizeof node);
 	IEEE80211_ADDR_COPY(node.macaddr, ieee80211broadcastaddr);
 	node.id = WPI_ID_BROADCAST;
 	node.plcp = (ic->ic_curmode == IEEE80211_MODE_11A) ?
 	    wpi_ridx_to_plcp[WPI_RIDX_OFDM6] : wpi_ridx_to_plcp[WPI_RIDX_CCK1];
 	node.action = htole32(WPI_ACTION_SET_RATE);
 	node.antenna = WPI_ANTENNA_BOTH;
 
 	DPRINTF(sc, WPI_DEBUG_NODE, "%s: adding broadcast node\n", __func__);
 
 	return wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, async);
 }
 
 static int
 wpi_add_sta_node(struct wpi_softc *sc, struct ieee80211_node *ni)
 {
 	struct wpi_node *wn = WPI_NODE(ni);
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	wn->id = wpi_add_node_entry_sta(sc);
 
 	if ((error = wpi_add_node(sc, ni)) != 0) {
 		wpi_del_node_entry(sc, wn->id);
 		wn->id = WPI_ID_UNDEFINED;
 		return error;
 	}
 
 	return 0;
 }
 
 static int
 wpi_add_ibss_node(struct wpi_softc *sc, struct ieee80211_node *ni)
 {
 	struct wpi_node *wn = WPI_NODE(ni);
 	int error;
 
 	KASSERT(wn->id == WPI_ID_UNDEFINED,
 	    ("the node %d was added before", wn->id));
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if ((wn->id = wpi_add_node_entry_adhoc(sc)) == WPI_ID_UNDEFINED) {
 		device_printf(sc->sc_dev, "%s: h/w table is full\n", __func__);
 		return ENOMEM;
 	}
 
 	if ((error = wpi_add_node(sc, ni)) != 0) {
 		wpi_del_node_entry(sc, wn->id);
 		wn->id = WPI_ID_UNDEFINED;
 		return error;
 	}
 
 	return 0;
 }
 
 static void
 wpi_del_node(struct wpi_softc *sc, struct ieee80211_node *ni)
 {
 	struct wpi_node *wn = WPI_NODE(ni);
 	struct wpi_cmd_del_node node;
 	int error;
 
 	KASSERT(wn->id != WPI_ID_UNDEFINED, ("undefined node id passed"));
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	memset(&node, 0, sizeof node);
 	IEEE80211_ADDR_COPY(node.macaddr, ni->ni_macaddr);
 	node.count = 1;
 
 	DPRINTF(sc, WPI_DEBUG_NODE, "%s: deleting node %d (%s)\n", __func__,
 	    wn->id, ether_sprintf(ni->ni_macaddr));
 
 	error = wpi_cmd(sc, WPI_CMD_DEL_NODE, &node, sizeof node, 1);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not delete node %u, error %d\n", __func__,
 		    wn->id, error);
 	}
 }
 
 static int
 wpi_updateedca(struct ieee80211com *ic)
 {
 #define WPI_EXP2(x)	((1 << (x)) - 1)	/* CWmin = 2^ECWmin - 1 */
 	struct wpi_softc *sc = ic->ic_softc;
 	struct wpi_edca_params cmd;
 	int aci, error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	memset(&cmd, 0, sizeof cmd);
 	cmd.flags = htole32(WPI_EDCA_UPDATE);
 	for (aci = 0; aci < WME_NUM_AC; aci++) {
 		const struct wmeParams *ac =
 		    &ic->ic_wme.wme_chanParams.cap_wmeParams[aci];
 		cmd.ac[aci].aifsn = ac->wmep_aifsn;
 		cmd.ac[aci].cwmin = htole16(WPI_EXP2(ac->wmep_logcwmin));
 		cmd.ac[aci].cwmax = htole16(WPI_EXP2(ac->wmep_logcwmax));
 		cmd.ac[aci].txoplimit = 
 		    htole16(IEEE80211_TXOP_TO_US(ac->wmep_txopLimit));
 
 		DPRINTF(sc, WPI_DEBUG_EDCA,
 		    "setting WME for queue %d aifsn=%d cwmin=%d cwmax=%d "
 		    "txoplimit=%d\n", aci, cmd.ac[aci].aifsn,
 		    cmd.ac[aci].cwmin, cmd.ac[aci].cwmax,
 		    cmd.ac[aci].txoplimit);
 	}
 	error = wpi_cmd(sc, WPI_CMD_EDCA_PARAMS, &cmd, sizeof cmd, 1);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return error;
 #undef WPI_EXP2
 }
 
 static void
 wpi_set_promisc(struct wpi_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	uint32_t promisc_filter;
 
 	promisc_filter = WPI_FILTER_CTL;
 	if (vap != NULL && vap->iv_opmode != IEEE80211_M_HOSTAP)
 		promisc_filter |= WPI_FILTER_PROMISC;
 
 	if (ic->ic_promisc > 0)
 		sc->rxon.filter |= htole32(promisc_filter);
 	else
 		sc->rxon.filter &= ~htole32(promisc_filter);
 }
 
 static void
 wpi_update_promisc(struct ieee80211com *ic)
 {
 	struct wpi_softc *sc = ic->ic_softc;
 
 	WPI_RXON_LOCK(sc);
 	wpi_set_promisc(sc);
 
 	if (wpi_send_rxon(sc, 1, 1) != 0) {
 		device_printf(sc->sc_dev, "%s: could not send RXON\n",
 		    __func__);
 	}
 	WPI_RXON_UNLOCK(sc);
 }
 
 static void
 wpi_update_mcast(struct ieee80211com *ic)
 {
 	/* Ignore */
 }
 
 static void
 wpi_set_led(struct wpi_softc *sc, uint8_t which, uint8_t off, uint8_t on)
 {
 	struct wpi_cmd_led led;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	led.which = which;
 	led.unit = htole32(100000);	/* on/off in unit of 100ms */
 	led.off = off;
 	led.on = on;
 	(void)wpi_cmd(sc, WPI_CMD_SET_LED, &led, sizeof led, 1);
 }
 
 static int
 wpi_set_timing(struct wpi_softc *sc, struct ieee80211_node *ni)
 {
 	struct wpi_cmd_timing cmd;
 	uint64_t val, mod;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	memset(&cmd, 0, sizeof cmd);
 	memcpy(&cmd.tstamp, ni->ni_tstamp.data, sizeof (uint64_t));
 	cmd.bintval = htole16(ni->ni_intval);
 	cmd.lintval = htole16(10);
 
 	/* Compute remaining time until next beacon. */
 	val = (uint64_t)ni->ni_intval * IEEE80211_DUR_TU;
 	mod = le64toh(cmd.tstamp) % val;
 	cmd.binitval = htole32((uint32_t)(val - mod));
 
 	DPRINTF(sc, WPI_DEBUG_RESET, "timing bintval=%u tstamp=%ju, init=%u\n",
 	    ni->ni_intval, le64toh(cmd.tstamp), (uint32_t)(val - mod));
 
 	return wpi_cmd(sc, WPI_CMD_TIMING, &cmd, sizeof cmd, 1);
 }
 
 /*
  * This function is called periodically (every 60 seconds) to adjust output
  * power to temperature changes.
  */
 static void
 wpi_power_calibration(struct wpi_softc *sc)
 {
 	int temp;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	/* Update sensor data. */
 	temp = (int)WPI_READ(sc, WPI_UCODE_GP2);
 	DPRINTF(sc, WPI_DEBUG_TEMP, "Temp in calibration is: %d\n", temp);
 
 	/* Sanity-check read value. */
 	if (temp < -260 || temp > 25) {
 		/* This can't be correct, ignore. */
 		DPRINTF(sc, WPI_DEBUG_TEMP,
 		    "out-of-range temperature reported: %d\n", temp);
 		return;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TEMP, "temperature %d->%d\n", sc->temp, temp);
 
 	/* Adjust Tx power if need be. */
 	if (abs(temp - sc->temp) <= 6)
 		return;
 
 	sc->temp = temp;
 
 	if (wpi_set_txpower(sc, 1) != 0) {
 		/* just warn, too bad for the automatic calibration... */
 		device_printf(sc->sc_dev,"could not adjust Tx power\n");
 	}
 }
 
 /*
  * Set TX power for current channel.
  */
 static int
 wpi_set_txpower(struct wpi_softc *sc, int async)
 {
 	struct wpi_power_group *group;
 	struct wpi_cmd_txpower cmd;
 	uint8_t chan;
 	int idx, is_chan_5ghz, i;
 
 	/* Retrieve current channel from last RXON. */
 	chan = sc->rxon.chan;
 	is_chan_5ghz = (sc->rxon.flags & htole32(WPI_RXON_24GHZ)) == 0;
 
 	/* Find the TX power group to which this channel belongs. */
 	if (is_chan_5ghz) {
 		for (group = &sc->groups[1]; group < &sc->groups[4]; group++)
 			if (chan <= group->chan)
 				break;
 	} else
 		group = &sc->groups[0];
 
 	memset(&cmd, 0, sizeof cmd);
 	cmd.band = is_chan_5ghz ? WPI_BAND_5GHZ : WPI_BAND_2GHZ;
 	cmd.chan = htole16(chan);
 
 	/* Set TX power for all OFDM and CCK rates. */
 	for (i = 0; i <= WPI_RIDX_MAX ; i++) {
 		/* Retrieve TX power for this channel/rate. */
 		idx = wpi_get_power_index(sc, group, chan, is_chan_5ghz, i);
 
 		cmd.rates[i].plcp = wpi_ridx_to_plcp[i];
 
 		if (is_chan_5ghz) {
 			cmd.rates[i].rf_gain = wpi_rf_gain_5ghz[idx];
 			cmd.rates[i].dsp_gain = wpi_dsp_gain_5ghz[idx];
 		} else {
 			cmd.rates[i].rf_gain = wpi_rf_gain_2ghz[idx];
 			cmd.rates[i].dsp_gain = wpi_dsp_gain_2ghz[idx];
 		}
 		DPRINTF(sc, WPI_DEBUG_TEMP,
 		    "chan %d/ridx %d: power index %d\n", chan, i, idx);
 	}
 
 	return wpi_cmd(sc, WPI_CMD_TXPOWER, &cmd, sizeof cmd, async);
 }
 
 /*
  * Determine Tx power index for a given channel/rate combination.
  * This takes into account the regulatory information from EEPROM and the
  * current temperature.
  */
 static int
 wpi_get_power_index(struct wpi_softc *sc, struct wpi_power_group *group,
     uint8_t chan, int is_chan_5ghz, int ridx)
 {
 /* Fixed-point arithmetic division using a n-bit fractional part. */
 #define fdivround(a, b, n)	\
 	((((1 << n) * (a)) / (b) + (1 << n) / 2) / (1 << n))
 
 /* Linear interpolation. */
 #define interpolate(x, x1, y1, x2, y2, n)	\
 	((y1) + fdivround(((x) - (x1)) * ((y2) - (y1)), (x2) - (x1), n))
 
 	struct wpi_power_sample *sample;
 	int pwr, idx;
 
 	/* Default TX power is group maximum TX power minus 3dB. */
 	pwr = group->maxpwr / 2;
 
 	/* Decrease TX power for highest OFDM rates to reduce distortion. */
 	switch (ridx) {
 	case WPI_RIDX_OFDM36:
 		pwr -= is_chan_5ghz ?  5 : 0;
 		break;
 	case WPI_RIDX_OFDM48:
 		pwr -= is_chan_5ghz ? 10 : 7;
 		break;
 	case WPI_RIDX_OFDM54:
 		pwr -= is_chan_5ghz ? 12 : 9;
 		break;
 	}
 
 	/* Never exceed the channel maximum allowed TX power. */
 	pwr = min(pwr, sc->maxpwr[chan]);
 
 	/* Retrieve TX power index into gain tables from samples. */
 	for (sample = group->samples; sample < &group->samples[3]; sample++)
 		if (pwr > sample[1].power)
 			break;
 	/* Fixed-point linear interpolation using a 19-bit fractional part. */
 	idx = interpolate(pwr, sample[0].power, sample[0].index,
 	    sample[1].power, sample[1].index, 19);
 
 	/*-
 	 * Adjust power index based on current temperature:
 	 * - if cooler than factory-calibrated: decrease output power
 	 * - if warmer than factory-calibrated: increase output power
 	 */
 	idx -= (sc->temp - group->temp) * 11 / 100;
 
 	/* Decrease TX power for CCK rates (-5dB). */
 	if (ridx >= WPI_RIDX_CCK1)
 		idx += 10;
 
 	/* Make sure idx stays in a valid range. */
 	if (idx < 0)
 		return 0;
 	if (idx > WPI_MAX_PWR_INDEX)
 		return WPI_MAX_PWR_INDEX;
 	return idx;
 
 #undef interpolate
 #undef fdivround
 }
 
 /*
  * Set STA mode power saving level (between 0 and 5).
  * Level 0 is CAM (Continuously Aware Mode), 5 is for maximum power saving.
  */
 static int
 wpi_set_pslevel(struct wpi_softc *sc, uint8_t dtim, int level, int async)
 {
 	struct wpi_pmgt_cmd cmd;
 	const struct wpi_pmgt *pmgt;
 	uint32_t max, reg;
 	uint8_t skip_dtim;
 	int i;
 
 	DPRINTF(sc, WPI_DEBUG_PWRSAVE,
 	    "%s: dtim=%d, level=%d, async=%d\n",
 	    __func__, dtim, level, async);
 
 	/* Select which PS parameters to use. */
 	if (dtim <= 10)
 		pmgt = &wpi_pmgt[0][level];
 	else
 		pmgt = &wpi_pmgt[1][level];
 
 	memset(&cmd, 0, sizeof cmd);
 	if (level != 0)	/* not CAM */
 		cmd.flags |= htole16(WPI_PS_ALLOW_SLEEP);
 	/* Retrieve PCIe Active State Power Management (ASPM). */
-	reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + 0x10, 1);
-	if (!(reg & 0x1))	/* L0s Entry disabled. */
+	reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, 1);
+	if (!(reg & PCIEM_LINK_CTL_ASPMC_L0S))	/* L0s Entry disabled. */
 		cmd.flags |= htole16(WPI_PS_PCI_PMGT);
 
 	cmd.rxtimeout = htole32(pmgt->rxtimeout * IEEE80211_DUR_TU);
 	cmd.txtimeout = htole32(pmgt->txtimeout * IEEE80211_DUR_TU);
 
 	if (dtim == 0) {
 		dtim = 1;
 		skip_dtim = 0;
 	} else
 		skip_dtim = pmgt->skip_dtim;
 
 	if (skip_dtim != 0) {
 		cmd.flags |= htole16(WPI_PS_SLEEP_OVER_DTIM);
 		max = pmgt->intval[4];
 		if (max == (uint32_t)-1)
 			max = dtim * (skip_dtim + 1);
 		else if (max > dtim)
 			max = (max / dtim) * dtim;
 	} else
 		max = dtim;
 
 	for (i = 0; i < 5; i++)
 		cmd.intval[i] = htole32(MIN(max, pmgt->intval[i]));
 
 	return wpi_cmd(sc, WPI_CMD_SET_POWER_MODE, &cmd, sizeof cmd, async);
 }
 
 static int
 wpi_send_btcoex(struct wpi_softc *sc)
 {
 	struct wpi_bluetooth cmd;
 
 	memset(&cmd, 0, sizeof cmd);
 	cmd.flags = WPI_BT_COEX_MODE_4WIRE;
 	cmd.lead_time = WPI_BT_LEAD_TIME_DEF;
 	cmd.max_kill = WPI_BT_MAX_KILL_DEF;
 	DPRINTF(sc, WPI_DEBUG_RESET, "%s: configuring bluetooth coexistence\n",
 	    __func__);
 	return wpi_cmd(sc, WPI_CMD_BT_COEX, &cmd, sizeof(cmd), 0);
 }
 
 static int
 wpi_send_rxon(struct wpi_softc *sc, int assoc, int async)
 {
 	int error;
 
 	if (async)
 		WPI_RXON_LOCK_ASSERT(sc);
 
 	if (assoc && wpi_check_bss_filter(sc) != 0) {
 		struct wpi_assoc rxon_assoc;
 
 		rxon_assoc.flags = sc->rxon.flags;
 		rxon_assoc.filter = sc->rxon.filter;
 		rxon_assoc.ofdm_mask = sc->rxon.ofdm_mask;
 		rxon_assoc.cck_mask = sc->rxon.cck_mask;
 		rxon_assoc.reserved = 0;
 
 		error = wpi_cmd(sc, WPI_CMD_RXON_ASSOC, &rxon_assoc,
 		    sizeof (struct wpi_assoc), async);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "RXON_ASSOC command failed, error %d\n", error);
 			return error;
 		}
 	} else {
 		if (async) {
 			WPI_NT_LOCK(sc);
 			error = wpi_cmd(sc, WPI_CMD_RXON, &sc->rxon,
 			    sizeof (struct wpi_rxon), async);
 			if (error == 0)
 				wpi_clear_node_table(sc);
 			WPI_NT_UNLOCK(sc);
 		} else {
 			error = wpi_cmd(sc, WPI_CMD_RXON, &sc->rxon,
 			    sizeof (struct wpi_rxon), async);
 			if (error == 0)
 				wpi_clear_node_table(sc);
 		}
 
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "RXON command failed, error %d\n", error);
 			return error;
 		}
 
 		/* Add broadcast node. */
 		error = wpi_add_broadcast_node(sc, async);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "could not add broadcast node, error %d\n", error);
 			return error;
 		}
 	}
 
 	/* Configuration has changed, set Tx power accordingly. */
 	if ((error = wpi_set_txpower(sc, async)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not set TX power, error %d\n", __func__, error);
 		return error;
 	}
 
 	return 0;
 }
 
 /**
  * Configure the card to listen to a particular channel, this transisions the
  * card in to being able to receive frames from remote devices.
  */
 static int
 wpi_config(struct wpi_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct ieee80211_channel *c = ic->ic_curchan;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/* Set power saving level to CAM during initialization. */
 	if ((error = wpi_set_pslevel(sc, 0, 0, 0)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not set power saving level\n", __func__);
 		return error;
 	}
 
 	/* Configure bluetooth coexistence. */
 	if ((error = wpi_send_btcoex(sc)) != 0) {
 		device_printf(sc->sc_dev,
 		    "could not configure bluetooth coexistence\n");
 		return error;
 	}
 
 	/* Configure adapter. */
 	memset(&sc->rxon, 0, sizeof (struct wpi_rxon));
 	IEEE80211_ADDR_COPY(sc->rxon.myaddr, vap->iv_myaddr);
 
 	/* Set default channel. */
 	sc->rxon.chan = ieee80211_chan2ieee(ic, c);
 	sc->rxon.flags = htole32(WPI_RXON_TSF | WPI_RXON_CTS_TO_SELF);
 	if (IEEE80211_IS_CHAN_2GHZ(c))
 		sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ);
 
 	sc->rxon.filter = WPI_FILTER_MULTICAST;
 	switch (ic->ic_opmode) {
 	case IEEE80211_M_STA:
 		sc->rxon.mode = WPI_MODE_STA;
 		break;
 	case IEEE80211_M_IBSS:
 		sc->rxon.mode = WPI_MODE_IBSS;
 		sc->rxon.filter |= WPI_FILTER_BEACON;
 		break;
 	case IEEE80211_M_HOSTAP:
 		/* XXX workaround for beaconing */
 		sc->rxon.mode = WPI_MODE_IBSS;
 		sc->rxon.filter |= WPI_FILTER_ASSOC | WPI_FILTER_PROMISC;
 		break;
 	case IEEE80211_M_AHDEMO:
 		sc->rxon.mode = WPI_MODE_HOSTAP;
 		break;
 	case IEEE80211_M_MONITOR:
 		sc->rxon.mode = WPI_MODE_MONITOR;
 		break;
 	default:
 		device_printf(sc->sc_dev, "unknown opmode %d\n",
 		    ic->ic_opmode);
 		return EINVAL;
 	}
 	sc->rxon.filter = htole32(sc->rxon.filter);
 	wpi_set_promisc(sc);
 	sc->rxon.cck_mask  = 0x0f;	/* not yet negotiated */
 	sc->rxon.ofdm_mask = 0xff;	/* not yet negotiated */
 
 	if ((error = wpi_send_rxon(sc, 0, 0)) != 0) {
 		device_printf(sc->sc_dev, "%s: could not send RXON\n",
 		    __func__);
 		return error;
 	}
 
 	/* Setup rate scalling. */
 	if ((error = wpi_mrr_setup(sc)) != 0) {
 		device_printf(sc->sc_dev, "could not setup MRR, error %d\n",
 		    error);
 		return error;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 }
 
 static uint16_t
 wpi_get_active_dwell_time(struct wpi_softc *sc,
     struct ieee80211_channel *c, uint8_t n_probes)
 {
 	/* No channel? Default to 2GHz settings. */
 	if (c == NULL || IEEE80211_IS_CHAN_2GHZ(c)) {
 		return (WPI_ACTIVE_DWELL_TIME_2GHZ +
 		WPI_ACTIVE_DWELL_FACTOR_2GHZ * (n_probes + 1));
 	}
 
 	/* 5GHz dwell time. */
 	return (WPI_ACTIVE_DWELL_TIME_5GHZ +
 	    WPI_ACTIVE_DWELL_FACTOR_5GHZ * (n_probes + 1));
 }
 
 /*
  * Limit the total dwell time.
  *
  * Returns the dwell time in milliseconds.
  */
 static uint16_t
 wpi_limit_dwell(struct wpi_softc *sc, uint16_t dwell_time)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	uint16_t bintval = 0;
 
 	/* bintval is in TU (1.024mS) */
 	if (vap != NULL)
 		bintval = vap->iv_bss->ni_intval;
 
 	/*
 	 * If it's non-zero, we should calculate the minimum of
 	 * it and the DWELL_BASE.
 	 *
 	 * XXX Yes, the math should take into account that bintval
 	 * is 1.024mS, not 1mS..
 	 */
 	if (bintval > 0) {
 		DPRINTF(sc, WPI_DEBUG_SCAN, "%s: bintval=%d\n", __func__,
 		    bintval);
 		return (MIN(dwell_time, bintval - WPI_CHANNEL_TUNE_TIME * 2));
 	}
 
 	/* No association context? Default. */
 	return dwell_time;
 }
 
 static uint16_t
 wpi_get_passive_dwell_time(struct wpi_softc *sc, struct ieee80211_channel *c)
 {
 	uint16_t passive;
 
 	if (c == NULL || IEEE80211_IS_CHAN_2GHZ(c))
 		passive = WPI_PASSIVE_DWELL_BASE + WPI_PASSIVE_DWELL_TIME_2GHZ;
 	else
 		passive = WPI_PASSIVE_DWELL_BASE + WPI_PASSIVE_DWELL_TIME_5GHZ;
 
 	/* Clamp to the beacon interval if we're associated. */
 	return (wpi_limit_dwell(sc, passive));
 }
 
 static uint32_t
 wpi_get_scan_pause_time(uint32_t time, uint16_t bintval)
 {
 	uint32_t mod = (time % bintval) * IEEE80211_DUR_TU;
 	uint32_t nbeacons = time / bintval;
 
 	if (mod > WPI_PAUSE_MAX_TIME)
 		mod = WPI_PAUSE_MAX_TIME;
 
 	return WPI_PAUSE_SCAN(nbeacons, mod);
 }
 
 /*
  * Send a scan request to the firmware.
  */
 static int
 wpi_scan(struct wpi_softc *sc, struct ieee80211_channel *c)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_scan_state *ss = ic->ic_scan;
 	struct ieee80211vap *vap = ss->ss_vap;
 	struct wpi_scan_hdr *hdr;
 	struct wpi_cmd_data *tx;
 	struct wpi_scan_essid *essids;
 	struct wpi_scan_chan *chan;
 	struct ieee80211_frame *wh;
 	struct ieee80211_rateset *rs;
 	uint16_t bintval, buflen, dwell_active, dwell_passive;
 	uint8_t *buf, *frm, i, nssid;
 	int bgscan, error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/*
 	 * We are absolutely not allowed to send a scan command when another
 	 * scan command is pending.
 	 */
 	if (callout_pending(&sc->scan_timeout)) {
 		device_printf(sc->sc_dev, "%s: called whilst scanning!\n",
 		    __func__);
 		error = EAGAIN;
 		goto fail;
 	}
 
 	bgscan = wpi_check_bss_filter(sc);
 	bintval = vap->iv_bss->ni_intval;
 	if (bgscan != 0 &&
 	    bintval < WPI_QUIET_TIME_DEFAULT + WPI_CHANNEL_TUNE_TIME * 2) {
 		error = EOPNOTSUPP;
 		goto fail;
 	}
 
 	buf = malloc(WPI_SCAN_MAXSZ, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (buf == NULL) {
 		device_printf(sc->sc_dev,
 		    "%s: could not allocate buffer for scan command\n",
 		    __func__);
 		error = ENOMEM;
 		goto fail;
 	}
 	hdr = (struct wpi_scan_hdr *)buf;
 
 	/*
 	 * Move to the next channel if no packets are received within 10 msecs
 	 * after sending the probe request.
 	 */
 	hdr->quiet_time = htole16(WPI_QUIET_TIME_DEFAULT);
 	hdr->quiet_threshold = htole16(1);
 
 	if (bgscan != 0) {
 		/*
 		 * Max needs to be greater than active and passive and quiet!
 		 * It's also in microseconds!
 		 */
 		hdr->max_svc = htole32(250 * IEEE80211_DUR_TU);
 		hdr->pause_svc = htole32(wpi_get_scan_pause_time(100,
 		    bintval));
 	}
 
 	hdr->filter = htole32(WPI_FILTER_MULTICAST | WPI_FILTER_BEACON);
 
 	tx = (struct wpi_cmd_data *)(hdr + 1);
 	tx->flags = htole32(WPI_TX_AUTO_SEQ);
 	tx->id = WPI_ID_BROADCAST;
 	tx->lifetime = htole32(WPI_LIFETIME_INFINITE);
 
 	if (IEEE80211_IS_CHAN_5GHZ(c)) {
 		/* Send probe requests at 6Mbps. */
 		tx->plcp = wpi_ridx_to_plcp[WPI_RIDX_OFDM6];
 		rs = &ic->ic_sup_rates[IEEE80211_MODE_11A];
 	} else {
 		hdr->flags = htole32(WPI_RXON_24GHZ | WPI_RXON_AUTO);
 		/* Send probe requests at 1Mbps. */
 		tx->plcp = wpi_ridx_to_plcp[WPI_RIDX_CCK1];
 		rs = &ic->ic_sup_rates[IEEE80211_MODE_11G];
 	}
 
 	essids = (struct wpi_scan_essid *)(tx + 1);
 	nssid = MIN(ss->ss_nssid, WPI_SCAN_MAX_ESSIDS);
 	for (i = 0; i < nssid; i++) {
 		essids[i].id = IEEE80211_ELEMID_SSID;
 		essids[i].len = MIN(ss->ss_ssid[i].len, IEEE80211_NWID_LEN);
 		memcpy(essids[i].data, ss->ss_ssid[i].ssid, essids[i].len);
 #ifdef WPI_DEBUG
 		if (sc->sc_debug & WPI_DEBUG_SCAN) {
 			printf("Scanning Essid: ");
 			ieee80211_print_essid(essids[i].data, essids[i].len);
 			printf("\n");
 		}
 #endif
 	}
 
 	/*
 	 * Build a probe request frame.  Most of the following code is a
 	 * copy & paste of what is done in net80211.
 	 */
 	wh = (struct ieee80211_frame *)(essids + WPI_SCAN_MAX_ESSIDS);
 	wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT |
 		IEEE80211_FC0_SUBTYPE_PROBE_REQ;
 	wh->i_fc[1] = IEEE80211_FC1_DIR_NODS;
 	IEEE80211_ADDR_COPY(wh->i_addr1, ieee80211broadcastaddr);
 	IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr);
 	IEEE80211_ADDR_COPY(wh->i_addr3, ieee80211broadcastaddr);
 
 	frm = (uint8_t *)(wh + 1);
 	frm = ieee80211_add_ssid(frm, NULL, 0);
 	frm = ieee80211_add_rates(frm, rs);
 	if (rs->rs_nrates > IEEE80211_RATE_SIZE)
 		frm = ieee80211_add_xrates(frm, rs);
 
 	/* Set length of probe request. */
 	tx->len = htole16(frm - (uint8_t *)wh);
 
 	/*
 	 * Construct information about the channel that we
 	 * want to scan. The firmware expects this to be directly
 	 * after the scan probe request
 	 */
 	chan = (struct wpi_scan_chan *)frm;
 	chan->chan = ieee80211_chan2ieee(ic, c);
 	chan->flags = 0;
 	if (nssid) {
 		hdr->crc_threshold = WPI_SCAN_CRC_TH_DEFAULT;
 		chan->flags |= WPI_CHAN_NPBREQS(nssid);
 	} else
 		hdr->crc_threshold = WPI_SCAN_CRC_TH_NEVER;
 
 	if (!IEEE80211_IS_CHAN_PASSIVE(c))
 		chan->flags |= WPI_CHAN_ACTIVE;
 
 	/*
 	 * Calculate the active/passive dwell times.
 	 */
 	dwell_active = wpi_get_active_dwell_time(sc, c, nssid);
 	dwell_passive = wpi_get_passive_dwell_time(sc, c);
 
 	/* Make sure they're valid. */
 	if (dwell_active > dwell_passive)
 		dwell_active = dwell_passive;
 
 	chan->active = htole16(dwell_active);
 	chan->passive = htole16(dwell_passive);
 
 	chan->dsp_gain = 0x6e;  /* Default level */
 
 	if (IEEE80211_IS_CHAN_5GHZ(c))
 		chan->rf_gain = 0x3b;
 	else
 		chan->rf_gain = 0x28;
 
 	DPRINTF(sc, WPI_DEBUG_SCAN, "Scanning %u Passive: %d\n",
 	    chan->chan, IEEE80211_IS_CHAN_PASSIVE(c));
 
 	hdr->nchan++;
 
 	if (hdr->nchan == 1 && sc->rxon.chan == chan->chan) {
 		/* XXX Force probe request transmission. */
 		memcpy(chan + 1, chan, sizeof (struct wpi_scan_chan));
 
 		chan++;
 
 		/* Reduce unnecessary delay. */
 		chan->flags = 0;
 		chan->passive = chan->active = hdr->quiet_time;
 
 		hdr->nchan++;
 	}
 
 	chan++;
 
 	buflen = (uint8_t *)chan - buf;
 	hdr->len = htole16(buflen);
 
 	DPRINTF(sc, WPI_DEBUG_CMD, "sending scan command nchan=%d\n",
 	    hdr->nchan);
 	error = wpi_cmd(sc, WPI_CMD_SCAN, buf, buflen, 1);
 	free(buf, M_DEVBUF);
 
 	if (error != 0)
 		goto fail;
 
 	callout_reset(&sc->scan_timeout, 5*hz, wpi_scan_timeout, sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 
 fail:	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 
 	return error;
 }
 
 static int
 wpi_auth(struct wpi_softc *sc, struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_node *ni = vap->iv_bss;
 	struct ieee80211_channel *c = ni->ni_chan;
 	int error;
 
 	WPI_RXON_LOCK(sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/* Update adapter configuration. */
 	sc->rxon.associd = 0;
 	sc->rxon.filter &= ~htole32(WPI_FILTER_BSS);
 	IEEE80211_ADDR_COPY(sc->rxon.bssid, ni->ni_bssid);
 	sc->rxon.chan = ieee80211_chan2ieee(ic, c);
 	sc->rxon.flags = htole32(WPI_RXON_TSF | WPI_RXON_CTS_TO_SELF);
 	if (IEEE80211_IS_CHAN_2GHZ(c))
 		sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ);
 	if (ic->ic_flags & IEEE80211_F_SHSLOT)
 		sc->rxon.flags |= htole32(WPI_RXON_SHSLOT);
 	if (ic->ic_flags & IEEE80211_F_SHPREAMBLE)
 		sc->rxon.flags |= htole32(WPI_RXON_SHPREAMBLE);
 	if (IEEE80211_IS_CHAN_A(c)) {
 		sc->rxon.cck_mask  = 0;
 		sc->rxon.ofdm_mask = 0x15;
 	} else if (IEEE80211_IS_CHAN_B(c)) {
 		sc->rxon.cck_mask  = 0x03;
 		sc->rxon.ofdm_mask = 0;
 	} else {
 		/* Assume 802.11b/g. */
 		sc->rxon.cck_mask  = 0x0f;
 		sc->rxon.ofdm_mask = 0x15;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_STATE, "rxon chan %d flags %x cck %x ofdm %x\n",
 	    sc->rxon.chan, sc->rxon.flags, sc->rxon.cck_mask,
 	    sc->rxon.ofdm_mask);
 
 	if ((error = wpi_send_rxon(sc, 0, 1)) != 0) {
 		device_printf(sc->sc_dev, "%s: could not send RXON\n",
 		    __func__);
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	WPI_RXON_UNLOCK(sc);
 
 	return error;
 }
 
 static int
 wpi_config_beacon(struct wpi_vap *wvp)
 {
 	struct ieee80211vap *vap = &wvp->wv_vap;
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off;
 	struct wpi_buf *bcn = &wvp->wv_bcbuf;
 	struct wpi_softc *sc = ic->ic_softc;
 	struct wpi_cmd_beacon *cmd = (struct wpi_cmd_beacon *)&bcn->data;
 	struct ieee80211_tim_ie *tie;
 	struct mbuf *m;
 	uint8_t *ptr;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	WPI_VAP_LOCK_ASSERT(wvp);
 
 	cmd->len = htole16(bcn->m->m_pkthdr.len);
 	cmd->plcp = (ic->ic_curmode == IEEE80211_MODE_11A) ?
 	    wpi_ridx_to_plcp[WPI_RIDX_OFDM6] : wpi_ridx_to_plcp[WPI_RIDX_CCK1];
 
 	/* XXX seems to be unused */
 	if (*(bo->bo_tim) == IEEE80211_ELEMID_TIM) {
 		tie = (struct ieee80211_tim_ie *) bo->bo_tim;
 		ptr = mtod(bcn->m, uint8_t *);
 
 		cmd->tim = htole16(bo->bo_tim - ptr);
 		cmd->timsz = tie->tim_len;
 	}
 
 	/* Necessary for recursion in ieee80211_beacon_update(). */
 	m = bcn->m;
 	bcn->m = m_dup(m, M_NOWAIT);
 	if (bcn->m == NULL) {
 		device_printf(sc->sc_dev,
 		    "%s: could not copy beacon frame\n", __func__);
 		error = ENOMEM;
 		goto end;
 	}
 
 	if ((error = wpi_cmd2(sc, bcn)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not update beacon frame, error %d", __func__,
 		    error);
 		m_freem(bcn->m);
 	}
 
 	/* Restore mbuf. */
 end:	bcn->m = m;
 
 	return error;
 }
 
 static int
 wpi_setup_beacon(struct wpi_softc *sc, struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct wpi_vap *wvp = WPI_VAP(vap);
 	struct wpi_buf *bcn = &wvp->wv_bcbuf;
 	struct mbuf *m;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (ni->ni_chan == IEEE80211_CHAN_ANYC)
 		return EINVAL;
 
 	m = ieee80211_beacon_alloc(ni);
 	if (m == NULL) {
 		device_printf(sc->sc_dev,
 		    "%s: could not allocate beacon frame\n", __func__);
 		return ENOMEM;
 	}
 
 	WPI_VAP_LOCK(wvp);
 	if (bcn->m != NULL)
 		m_freem(bcn->m);
 
 	bcn->m = m;
 
 	error = wpi_config_beacon(wvp);
 	WPI_VAP_UNLOCK(wvp);
 
 	return error;
 }
 
 static void
 wpi_update_beacon(struct ieee80211vap *vap, int item)
 {
 	struct wpi_softc *sc = vap->iv_ic->ic_softc;
 	struct wpi_vap *wvp = WPI_VAP(vap);
 	struct wpi_buf *bcn = &wvp->wv_bcbuf;
 	struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off;
 	struct ieee80211_node *ni = vap->iv_bss;
 	int mcast = 0;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	WPI_VAP_LOCK(wvp);
 	if (bcn->m == NULL) {
 		bcn->m = ieee80211_beacon_alloc(ni);
 		if (bcn->m == NULL) {
 			device_printf(sc->sc_dev,
 			    "%s: could not allocate beacon frame\n", __func__);
 
 			DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR,
 			    __func__);
 
 			WPI_VAP_UNLOCK(wvp);
 			return;
 		}
 	}
 	WPI_VAP_UNLOCK(wvp);
 
 	if (item == IEEE80211_BEACON_TIM)
 		mcast = 1;	/* TODO */
 
 	setbit(bo->bo_flags, item);
 	ieee80211_beacon_update(ni, bcn->m, mcast);
 
 	WPI_VAP_LOCK(wvp);
 	wpi_config_beacon(wvp);
 	WPI_VAP_UNLOCK(wvp);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 }
 
 static void
 wpi_newassoc(struct ieee80211_node *ni, int isnew)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct wpi_softc *sc = ni->ni_ic->ic_softc;
 	struct wpi_node *wn = WPI_NODE(ni);
 	int error;
 
 	WPI_NT_LOCK(sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (vap->iv_opmode != IEEE80211_M_STA && wn->id == WPI_ID_UNDEFINED) {
 		if ((error = wpi_add_ibss_node(sc, ni)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not add IBSS node, error %d\n",
 			    __func__, error);
 		}
 	}
 	WPI_NT_UNLOCK(sc);
 }
 
 static int
 wpi_run(struct wpi_softc *sc, struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_node *ni = vap->iv_bss;
 	struct ieee80211_channel *c = ni->ni_chan;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	if (vap->iv_opmode == IEEE80211_M_MONITOR) {
 		/* Link LED blinks while monitoring. */
 		wpi_set_led(sc, WPI_LED_LINK, 5, 5);
 		return 0;
 	}
 
 	/* XXX kernel panic workaround */
 	if (c == IEEE80211_CHAN_ANYC) {
 		device_printf(sc->sc_dev, "%s: incomplete configuration\n",
 		    __func__);
 		return EINVAL;
 	}
 
 	if ((error = wpi_set_timing(sc, ni)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not set timing, error %d\n", __func__, error);
 		return error;
 	}
 
 	/* Update adapter configuration. */
 	WPI_RXON_LOCK(sc);
 	IEEE80211_ADDR_COPY(sc->rxon.bssid, ni->ni_bssid);
 	sc->rxon.associd = htole16(IEEE80211_NODE_AID(ni));
 	sc->rxon.chan = ieee80211_chan2ieee(ic, c);
 	sc->rxon.flags = htole32(WPI_RXON_TSF | WPI_RXON_CTS_TO_SELF);
 	if (IEEE80211_IS_CHAN_2GHZ(c))
 		sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ);
 	if (ic->ic_flags & IEEE80211_F_SHSLOT)
 		sc->rxon.flags |= htole32(WPI_RXON_SHSLOT);
 	if (ic->ic_flags & IEEE80211_F_SHPREAMBLE)
 		sc->rxon.flags |= htole32(WPI_RXON_SHPREAMBLE);
 	if (IEEE80211_IS_CHAN_A(c)) {
 		sc->rxon.cck_mask  = 0;
 		sc->rxon.ofdm_mask = 0x15;
 	} else if (IEEE80211_IS_CHAN_B(c)) {
 		sc->rxon.cck_mask  = 0x03;
 		sc->rxon.ofdm_mask = 0;
 	} else {
 		/* Assume 802.11b/g. */
 		sc->rxon.cck_mask  = 0x0f;
 		sc->rxon.ofdm_mask = 0x15;
 	}
 	sc->rxon.filter |= htole32(WPI_FILTER_BSS);
 
 	DPRINTF(sc, WPI_DEBUG_STATE, "rxon chan %d flags %x\n",
 	    sc->rxon.chan, sc->rxon.flags);
 
 	if ((error = wpi_send_rxon(sc, 0, 1)) != 0) {
 		device_printf(sc->sc_dev, "%s: could not send RXON\n",
 		    __func__);
 		return error;
 	}
 
 	/* Start periodic calibration timer. */
 	callout_reset(&sc->calib_to, 60*hz, wpi_calib_timeout, sc);
 
 	WPI_RXON_UNLOCK(sc);
 
 	if (vap->iv_opmode == IEEE80211_M_IBSS ||
 	    vap->iv_opmode == IEEE80211_M_HOSTAP) {
 		if ((error = wpi_setup_beacon(sc, ni)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not setup beacon, error %d\n", __func__,
 			    error);
 			return error;
 		}
 	}
 
 	if (vap->iv_opmode == IEEE80211_M_STA) {
 		/* Add BSS node. */
 		WPI_NT_LOCK(sc);
 		error = wpi_add_sta_node(sc, ni);
 		WPI_NT_UNLOCK(sc);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not add BSS node, error %d\n", __func__,
 			    error);
 			return error;
 		}
 	}
 
 	/* Link LED always on while associated. */
 	wpi_set_led(sc, WPI_LED_LINK, 0, 1);
 
 	/* Enable power-saving mode if requested by user. */
 	if ((vap->iv_flags & IEEE80211_F_PMGTON) &&
 	    vap->iv_opmode != IEEE80211_M_IBSS)
 		(void)wpi_set_pslevel(sc, 0, 3, 1);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	return 0;
 }
 
 static int
 wpi_load_key(struct ieee80211_node *ni, const struct ieee80211_key *k)
 {
 	const struct ieee80211_cipher *cip = k->wk_cipher;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct wpi_softc *sc = ni->ni_ic->ic_softc;
 	struct wpi_node *wn = WPI_NODE(ni);
 	struct wpi_node_info node;
 	uint16_t kflags;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (wpi_check_node_entry(sc, wn->id) == 0) {
 		device_printf(sc->sc_dev, "%s: node does not exist\n",
 		    __func__);
 		return 0;
 	}
 
 	switch (cip->ic_cipher) {
 	case IEEE80211_CIPHER_AES_CCM:
 		kflags = WPI_KFLAG_CCMP;
 		break;
 
 	default:
 		device_printf(sc->sc_dev, "%s: unknown cipher %d\n", __func__,
 		    cip->ic_cipher);
 		return 0;
 	}
 
 	kflags |= WPI_KFLAG_KID(k->wk_keyix);
 	if (k->wk_flags & IEEE80211_KEY_GROUP)
 		kflags |= WPI_KFLAG_MULTICAST;
 
 	memset(&node, 0, sizeof node);
 	node.id = wn->id;
 	node.control = WPI_NODE_UPDATE;
 	node.flags = WPI_FLAG_KEY_SET;
 	node.kflags = htole16(kflags);
 	memcpy(node.key, k->wk_key, k->wk_keylen);
 again:
 	DPRINTF(sc, WPI_DEBUG_KEY,
 	    "%s: setting %s key id %d for node %d (%s)\n", __func__,
 	    (kflags & WPI_KFLAG_MULTICAST) ? "group" : "ucast", k->wk_keyix,
 	    node.id, ether_sprintf(ni->ni_macaddr));
 
 	error = wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, 1);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "can't update node info, error %d\n",
 		    error);
 		return !error;
 	}
 
 	if (!(kflags & WPI_KFLAG_MULTICAST) && &vap->iv_nw_keys[0] <= k &&
 	    k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) {
 		kflags |= WPI_KFLAG_MULTICAST;
 		node.kflags = htole16(kflags);
 
 		goto again;
 	}
 
 	return 1;
 }
 
 static void
 wpi_load_key_cb(void *arg, struct ieee80211_node *ni)
 {
 	const struct ieee80211_key *k = arg;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct wpi_softc *sc = ni->ni_ic->ic_softc;
 	struct wpi_node *wn = WPI_NODE(ni);
 	int error;
 
 	if (vap->iv_bss == ni && wn->id == WPI_ID_UNDEFINED)
 		return;
 
 	WPI_NT_LOCK(sc);
 	error = wpi_load_key(ni, k);
 	WPI_NT_UNLOCK(sc);
 
 	if (error == 0) {
 		device_printf(sc->sc_dev, "%s: error while setting key\n",
 		    __func__);
 	}
 }
 
 static int
 wpi_set_global_keys(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_key *wk = &vap->iv_nw_keys[0];
 	int error = 1;
 
 	for (; wk < &vap->iv_nw_keys[IEEE80211_WEP_NKID] && error; wk++)
 		if (wk->wk_keyix != IEEE80211_KEYIX_NONE)
 			error = wpi_load_key(ni, wk);
 
 	return !error;
 }
 
 static int
 wpi_del_key(struct ieee80211_node *ni, const struct ieee80211_key *k)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct wpi_softc *sc = ni->ni_ic->ic_softc;
 	struct wpi_node *wn = WPI_NODE(ni);
 	struct wpi_node_info node;
 	uint16_t kflags;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (wpi_check_node_entry(sc, wn->id) == 0) {
 		DPRINTF(sc, WPI_DEBUG_KEY, "%s: node was removed\n", __func__);
 		return 1;	/* Nothing to do. */
 	}
 
 	kflags = WPI_KFLAG_KID(k->wk_keyix);
 	if (k->wk_flags & IEEE80211_KEY_GROUP)
 		kflags |= WPI_KFLAG_MULTICAST;
 
 	memset(&node, 0, sizeof node);
 	node.id = wn->id;
 	node.control = WPI_NODE_UPDATE;
 	node.flags = WPI_FLAG_KEY_SET;
 	node.kflags = htole16(kflags);
 again:
 	DPRINTF(sc, WPI_DEBUG_KEY, "%s: deleting %s key %d for node %d (%s)\n",
 	    __func__, (kflags & WPI_KFLAG_MULTICAST) ? "group" : "ucast",
 	    k->wk_keyix, node.id, ether_sprintf(ni->ni_macaddr));
 
 	error = wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, 1);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "can't update node info, error %d\n",
 		    error);
 		return !error;
 	}
 
 	if (!(kflags & WPI_KFLAG_MULTICAST) && &vap->iv_nw_keys[0] <= k &&
 	    k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) {
 		kflags |= WPI_KFLAG_MULTICAST;
 		node.kflags = htole16(kflags);
 
 		goto again;
 	}
 
 	return 1;
 }
 
 static void
 wpi_del_key_cb(void *arg, struct ieee80211_node *ni)
 {
 	const struct ieee80211_key *k = arg;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct wpi_softc *sc = ni->ni_ic->ic_softc;
 	struct wpi_node *wn = WPI_NODE(ni);
 	int error;
 
 	if (vap->iv_bss == ni && wn->id == WPI_ID_UNDEFINED)
 		return;
 
 	WPI_NT_LOCK(sc);
 	error = wpi_del_key(ni, k);
 	WPI_NT_UNLOCK(sc);
 
 	if (error == 0) {
 		device_printf(sc->sc_dev, "%s: error while deleting key\n",
 		    __func__);
 	}
 }
 
 static int
 wpi_process_key(struct ieee80211vap *vap, const struct ieee80211_key *k,
     int set)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct wpi_softc *sc = ic->ic_softc;
 	struct wpi_vap *wvp = WPI_VAP(vap);
 	struct ieee80211_node *ni;
 	int error, ni_ref = 0;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (k->wk_flags & IEEE80211_KEY_SWCRYPT) {
 		/* Not for us. */
 		return 1;
 	}
 
 	if (!(k->wk_flags & IEEE80211_KEY_RECV)) {
 		/* XMIT keys are handled in wpi_tx_data(). */
 		return 1;
 	}
 
 	/* Handle group keys. */
 	if (&vap->iv_nw_keys[0] <= k &&
 	    k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) {
 		WPI_NT_LOCK(sc);
 		if (set)
 			wvp->wv_gtk |= WPI_VAP_KEY(k->wk_keyix);
 		else
 			wvp->wv_gtk &= ~WPI_VAP_KEY(k->wk_keyix);
 		WPI_NT_UNLOCK(sc);
 
 		if (vap->iv_state == IEEE80211_S_RUN) {
 			ieee80211_iterate_nodes(&ic->ic_sta,
 			    set ? wpi_load_key_cb : wpi_del_key_cb,
 			    __DECONST(void *, k));
 		}
 
 		return 1;
 	}
 
 	switch (vap->iv_opmode) {
 	case IEEE80211_M_STA:
 		ni = vap->iv_bss;
 		break;
 
 	case IEEE80211_M_IBSS:
 	case IEEE80211_M_AHDEMO:
 	case IEEE80211_M_HOSTAP:
 		ni = ieee80211_find_vap_node(&ic->ic_sta, vap, k->wk_macaddr);
 		if (ni == NULL)
 			return 0;	/* should not happen */
 
 		ni_ref = 1;
 		break;
 
 	default:
 		device_printf(sc->sc_dev, "%s: unknown opmode %d\n", __func__,
 		    vap->iv_opmode);
 		return 0;
 	}
 
 	WPI_NT_LOCK(sc);
 	if (set)
 		error = wpi_load_key(ni, k);
 	else
 		error = wpi_del_key(ni, k);
 	WPI_NT_UNLOCK(sc);
 
 	if (ni_ref)
 		ieee80211_node_decref(ni);
 
 	return error;
 }
 
 static int
 wpi_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k)
 {
 	return wpi_process_key(vap, k, 1);
 }
 
 static int
 wpi_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
 {
 	return wpi_process_key(vap, k, 0);
 }
 
 /*
  * This function is called after the runtime firmware notifies us of its
  * readiness (called in a process context).
  */
 static int
 wpi_post_alive(struct wpi_softc *sc)
 {
 	int ntries, error;
 
 	/* Check (again) that the radio is not disabled. */
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	/* NB: Runtime firmware must be up and running. */
 	if (!(wpi_prph_read(sc, WPI_APMG_RFKILL) & 1)) {
 		device_printf(sc->sc_dev,
 		    "RF switch: radio disabled (%s)\n", __func__);
 		wpi_nic_unlock(sc);
 		return EPERM;   /* :-) */
 	}
 	wpi_nic_unlock(sc);
 
 	/* Wait for thermal sensor to calibrate. */
 	for (ntries = 0; ntries < 1000; ntries++) {
 		if ((sc->temp = (int)WPI_READ(sc, WPI_UCODE_GP2)) != 0)
 			break;
 		DELAY(10);
 	}
 
 	if (ntries == 1000) {
 		device_printf(sc->sc_dev,
 		    "timeout waiting for thermal sensor calibration\n");
 		return ETIMEDOUT;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TEMP, "temperature %d\n", sc->temp);
 	return 0;
 }
 
 /*
  * The firmware boot code is small and is intended to be copied directly into
  * the NIC internal memory (no DMA transfer).
  */
 static int
 wpi_load_bootcode(struct wpi_softc *sc, const uint8_t *ucode, uint32_t size)
 {
 	int error, ntries;
 
 	DPRINTF(sc, WPI_DEBUG_HW, "Loading microcode size 0x%x\n", size);
 
 	size /= sizeof (uint32_t);
 
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 
 	/* Copy microcode image into NIC memory. */
 	wpi_prph_write_region_4(sc, WPI_BSM_SRAM_BASE,
 	    (const uint32_t *)ucode, size);
 
 	wpi_prph_write(sc, WPI_BSM_WR_MEM_SRC, 0);
 	wpi_prph_write(sc, WPI_BSM_WR_MEM_DST, WPI_FW_TEXT_BASE);
 	wpi_prph_write(sc, WPI_BSM_WR_DWCOUNT, size);
 
 	/* Start boot load now. */
 	wpi_prph_write(sc, WPI_BSM_WR_CTRL, WPI_BSM_WR_CTRL_START);
 
 	/* Wait for transfer to complete. */
 	for (ntries = 0; ntries < 1000; ntries++) {
 		uint32_t status = WPI_READ(sc, WPI_FH_TX_STATUS);
 		DPRINTF(sc, WPI_DEBUG_HW,
 		    "firmware status=0x%x, val=0x%x, result=0x%x\n", status,
 		    WPI_FH_TX_STATUS_IDLE(6),
 		    status & WPI_FH_TX_STATUS_IDLE(6));
 		if (status & WPI_FH_TX_STATUS_IDLE(6)) {
 			DPRINTF(sc, WPI_DEBUG_HW,
 			    "Status Match! - ntries = %d\n", ntries);
 			break;
 		}
 		DELAY(10);
 	}
 	if (ntries == 1000) {
 		device_printf(sc->sc_dev, "%s: could not load boot firmware\n",
 		    __func__);
 		wpi_nic_unlock(sc);
 		return ETIMEDOUT;
 	}
 
 	/* Enable boot after power up. */
 	wpi_prph_write(sc, WPI_BSM_WR_CTRL, WPI_BSM_WR_CTRL_START_EN);
 
 	wpi_nic_unlock(sc);
 	return 0;
 }
 
 static int
 wpi_load_firmware(struct wpi_softc *sc)
 {
 	struct wpi_fw_info *fw = &sc->fw;
 	struct wpi_dma_info *dma = &sc->fw_dma;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	/* Copy initialization sections into pre-allocated DMA-safe memory. */
 	memcpy(dma->vaddr, fw->init.data, fw->init.datasz);
 	bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 	memcpy(dma->vaddr + WPI_FW_DATA_MAXSZ, fw->init.text, fw->init.textsz);
 	bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 
 	/* Tell adapter where to find initialization sections. */
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 	wpi_prph_write(sc, WPI_BSM_DRAM_DATA_ADDR, dma->paddr);
 	wpi_prph_write(sc, WPI_BSM_DRAM_DATA_SIZE, fw->init.datasz);
 	wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_ADDR,
 	    dma->paddr + WPI_FW_DATA_MAXSZ);
 	wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_SIZE, fw->init.textsz);
 	wpi_nic_unlock(sc);
 
 	/* Load firmware boot code. */
 	error = wpi_load_bootcode(sc, fw->boot.text, fw->boot.textsz);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "%s: could not load boot firmware\n",
 		    __func__);
 		return error;
 	}
 
 	/* Now press "execute". */
 	WPI_WRITE(sc, WPI_RESET, 0);
 
 	/* Wait at most one second for first alive notification. */
 	if ((error = mtx_sleep(sc, &sc->sc_mtx, PCATCH, "wpiinit", hz)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: timeout waiting for adapter to initialize, error %d\n",
 		    __func__, error);
 		return error;
 	}
 
 	/* Copy runtime sections into pre-allocated DMA-safe memory. */
 	memcpy(dma->vaddr, fw->main.data, fw->main.datasz);
 	bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 	memcpy(dma->vaddr + WPI_FW_DATA_MAXSZ, fw->main.text, fw->main.textsz);
 	bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 
 	/* Tell adapter where to find runtime sections. */
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 	wpi_prph_write(sc, WPI_BSM_DRAM_DATA_ADDR, dma->paddr);
 	wpi_prph_write(sc, WPI_BSM_DRAM_DATA_SIZE, fw->main.datasz);
 	wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_ADDR,
 	    dma->paddr + WPI_FW_DATA_MAXSZ);
 	wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_SIZE,
 	    WPI_FW_UPDATED | fw->main.textsz);
 	wpi_nic_unlock(sc);
 
 	return 0;
 }
 
 static int
 wpi_read_firmware(struct wpi_softc *sc)
 {
 	const struct firmware *fp;
 	struct wpi_fw_info *fw = &sc->fw;
 	const struct wpi_firmware_hdr *hdr;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	DPRINTF(sc, WPI_DEBUG_FIRMWARE,
 	    "Attempting Loading Firmware from %s module\n", WPI_FW_NAME);
 
 	WPI_UNLOCK(sc);
 	fp = firmware_get(WPI_FW_NAME);
 	WPI_LOCK(sc);
 
 	if (fp == NULL) {
 		device_printf(sc->sc_dev,
 		    "could not load firmware image '%s'\n", WPI_FW_NAME);
 		return EINVAL;
 	}
 
 	sc->fw_fp = fp;
 
 	if (fp->datasize < sizeof (struct wpi_firmware_hdr)) {
 		device_printf(sc->sc_dev,
 		    "firmware file too short: %zu bytes\n", fp->datasize);
 		error = EINVAL;
 		goto fail;
 	}
 
 	fw->size = fp->datasize;
 	fw->data = (const uint8_t *)fp->data;
 
 	/* Extract firmware header information. */
 	hdr = (const struct wpi_firmware_hdr *)fw->data;
 
 	/*     |  RUNTIME FIRMWARE   |    INIT FIRMWARE    | BOOT FW  |
 	   |HDR|<--TEXT-->|<--DATA-->|<--TEXT-->|<--DATA-->|<--TEXT-->| */
 
 	fw->main.textsz = le32toh(hdr->rtextsz);
 	fw->main.datasz = le32toh(hdr->rdatasz);
 	fw->init.textsz = le32toh(hdr->itextsz);
 	fw->init.datasz = le32toh(hdr->idatasz);
 	fw->boot.textsz = le32toh(hdr->btextsz);
 	fw->boot.datasz = 0;
 
 	/* Sanity-check firmware header. */
 	if (fw->main.textsz > WPI_FW_TEXT_MAXSZ ||
 	    fw->main.datasz > WPI_FW_DATA_MAXSZ ||
 	    fw->init.textsz > WPI_FW_TEXT_MAXSZ ||
 	    fw->init.datasz > WPI_FW_DATA_MAXSZ ||
 	    fw->boot.textsz > WPI_FW_BOOT_TEXT_MAXSZ ||
 	    (fw->boot.textsz & 3) != 0) {
 		device_printf(sc->sc_dev, "invalid firmware header\n");
 		error = EINVAL;
 		goto fail;
 	}
 
 	/* Check that all firmware sections fit. */
 	if (fw->size < sizeof (*hdr) + fw->main.textsz + fw->main.datasz +
 	    fw->init.textsz + fw->init.datasz + fw->boot.textsz) {
 		device_printf(sc->sc_dev,
 		    "firmware file too short: %zu bytes\n", fw->size);
 		error = EINVAL;
 		goto fail;
 	}
 
 	/* Get pointers to firmware sections. */
 	fw->main.text = (const uint8_t *)(hdr + 1);
 	fw->main.data = fw->main.text + fw->main.textsz;
 	fw->init.text = fw->main.data + fw->main.datasz;
 	fw->init.data = fw->init.text + fw->init.textsz;
 	fw->boot.text = fw->init.data + fw->init.datasz;
 
 	DPRINTF(sc, WPI_DEBUG_FIRMWARE,
 	    "Firmware Version: Major %d, Minor %d, Driver %d, \n"
 	    "runtime (text: %u, data: %u) init (text: %u, data %u) "
 	    "boot (text %u)\n", hdr->major, hdr->minor, le32toh(hdr->driver),
 	    fw->main.textsz, fw->main.datasz,
 	    fw->init.textsz, fw->init.datasz, fw->boot.textsz);
 
 	DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->main.text %p\n", fw->main.text);
 	DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->main.data %p\n", fw->main.data);
 	DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->init.text %p\n", fw->init.text);
 	DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->init.data %p\n", fw->init.data);
 	DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->boot.text %p\n", fw->boot.text);
 
 	return 0;
 
 fail:	wpi_unload_firmware(sc);
 	return error;
 }
 
 /**
  * Free the referenced firmware image
  */
 static void
 wpi_unload_firmware(struct wpi_softc *sc)
 {
 	if (sc->fw_fp != NULL) {
 		firmware_put(sc->fw_fp, FIRMWARE_UNLOAD);
 		sc->fw_fp = NULL;
 	}
 }
 
 static int
 wpi_clock_wait(struct wpi_softc *sc)
 {
 	int ntries;
 
 	/* Set "initialization complete" bit. */
 	WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_INIT_DONE);
 
 	/* Wait for clock stabilization. */
 	for (ntries = 0; ntries < 2500; ntries++) {
 		if (WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_MAC_CLOCK_READY)
 			return 0;
 		DELAY(100);
 	}
 	device_printf(sc->sc_dev,
 	    "%s: timeout waiting for clock stabilization\n", __func__);
 
 	return ETIMEDOUT;
 }
 
 static int
 wpi_apm_init(struct wpi_softc *sc)
 {
 	uint32_t reg;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	/* Disable L0s exit timer (NMI bug workaround). */
 	WPI_SETBITS(sc, WPI_GIO_CHICKEN, WPI_GIO_CHICKEN_DIS_L0S_TIMER);
 	/* Don't wait for ICH L0s (ICH bug workaround). */
 	WPI_SETBITS(sc, WPI_GIO_CHICKEN, WPI_GIO_CHICKEN_L1A_NO_L0S_RX);
 
 	/* Set FH wait threshold to max (HW bug under stress workaround). */
 	WPI_SETBITS(sc, WPI_DBG_HPET_MEM, 0xffff0000);
 
 	/* Retrieve PCIe Active State Power Management (ASPM). */
-	reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + 0x10, 1);
+	reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, 1);
 	/* Workaround for HW instability in PCIe L0->L0s->L1 transition. */
-	if (reg & 0x02)	/* L1 Entry enabled. */
+	if (reg & PCIEM_LINK_CTL_ASPMC_L1)	/* L1 Entry enabled. */
 		WPI_SETBITS(sc, WPI_GIO, WPI_GIO_L0S_ENA);
 	else
 		WPI_CLRBITS(sc, WPI_GIO, WPI_GIO_L0S_ENA);
 
 	WPI_SETBITS(sc, WPI_ANA_PLL, WPI_ANA_PLL_INIT);
 
 	/* Wait for clock stabilization before accessing prph. */
 	if ((error = wpi_clock_wait(sc)) != 0)
 		return error;
 
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 	/* Cleanup. */
 	wpi_prph_write(sc, WPI_APMG_CLK_DIS, 0x00000400);
 	wpi_prph_clrbits(sc, WPI_APMG_PS, 0x00000200);
 
 	/* Enable DMA and BSM (Bootstrap State Machine). */
 	wpi_prph_write(sc, WPI_APMG_CLK_EN,
 	    WPI_APMG_CLK_CTRL_DMA_CLK_RQT | WPI_APMG_CLK_CTRL_BSM_CLK_RQT);
 	DELAY(20);
 	/* Disable L1-Active. */
 	wpi_prph_setbits(sc, WPI_APMG_PCI_STT, WPI_APMG_PCI_STT_L1A_DIS);
 	wpi_nic_unlock(sc);
 
 	return 0;
 }
 
 static void
 wpi_apm_stop_master(struct wpi_softc *sc)
 {
 	int ntries;
 
 	/* Stop busmaster DMA activity. */
 	WPI_SETBITS(sc, WPI_RESET, WPI_RESET_STOP_MASTER);
 
 	if ((WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_PS_MASK) ==
 	    WPI_GP_CNTRL_MAC_PS)
 		return; /* Already asleep. */
 
 	for (ntries = 0; ntries < 100; ntries++) {
 		if (WPI_READ(sc, WPI_RESET) & WPI_RESET_MASTER_DISABLED)
 			return;
 		DELAY(10);
 	}
 	device_printf(sc->sc_dev, "%s: timeout waiting for master\n",
 	    __func__);
 }
 
 static void
 wpi_apm_stop(struct wpi_softc *sc)
 {
 	wpi_apm_stop_master(sc);
 
 	/* Reset the entire device. */
 	WPI_SETBITS(sc, WPI_RESET, WPI_RESET_SW);
 	DELAY(10);
 	/* Clear "initialization complete" bit. */
 	WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_INIT_DONE);
 }
 
 static void
 wpi_nic_config(struct wpi_softc *sc)
 {
 	uint32_t rev;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	/* voodoo from the Linux "driver".. */
 	rev = pci_read_config(sc->sc_dev, PCIR_REVID, 1);
 	if ((rev & 0xc0) == 0x40)
 		WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_ALM_MB);
 	else if (!(rev & 0x80))
 		WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_ALM_MM);
 
 	if (sc->cap == 0x80)
 		WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_SKU_MRC);
 
 	if ((sc->rev & 0xf0) == 0xd0)
 		WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_REV_D);
 	else
 		WPI_CLRBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_REV_D);
 
 	if (sc->type > 1)
 		WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_TYPE_B);
 }
 
 static int
 wpi_hw_init(struct wpi_softc *sc)
 {
 	uint8_t chnl;
 	int ntries, error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	/* Clear pending interrupts. */
 	WPI_WRITE(sc, WPI_INT, 0xffffffff);
 
 	if ((error = wpi_apm_init(sc)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not power ON adapter, error %d\n", __func__,
 		    error);
 		return error;
 	}
 
 	/* Select VMAIN power source. */
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 	wpi_prph_clrbits(sc, WPI_APMG_PS, WPI_APMG_PS_PWR_SRC_MASK);
 	wpi_nic_unlock(sc);
 	/* Spin until VMAIN gets selected. */
 	for (ntries = 0; ntries < 5000; ntries++) {
 		if (WPI_READ(sc, WPI_GPIO_IN) & WPI_GPIO_IN_VMAIN)
 			break;
 		DELAY(10);
 	}
 	if (ntries == 5000) {
 		device_printf(sc->sc_dev, "timeout selecting power source\n");
 		return ETIMEDOUT;
 	}
 
 	/* Perform adapter initialization. */
 	wpi_nic_config(sc);
 
 	/* Initialize RX ring. */
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 	/* Set physical address of RX ring. */
 	WPI_WRITE(sc, WPI_FH_RX_BASE, sc->rxq.desc_dma.paddr);
 	/* Set physical address of RX read pointer. */
 	WPI_WRITE(sc, WPI_FH_RX_RPTR_ADDR, sc->shared_dma.paddr +
 	    offsetof(struct wpi_shared, next));
 	WPI_WRITE(sc, WPI_FH_RX_WPTR, 0);
 	/* Enable RX. */
 	WPI_WRITE(sc, WPI_FH_RX_CONFIG,
 	    WPI_FH_RX_CONFIG_DMA_ENA |
 	    WPI_FH_RX_CONFIG_RDRBD_ENA |
 	    WPI_FH_RX_CONFIG_WRSTATUS_ENA |
 	    WPI_FH_RX_CONFIG_MAXFRAG |
 	    WPI_FH_RX_CONFIG_NRBD(WPI_RX_RING_COUNT_LOG) |
 	    WPI_FH_RX_CONFIG_IRQ_DST_HOST |
 	    WPI_FH_RX_CONFIG_IRQ_TIMEOUT(1));
 	(void)WPI_READ(sc, WPI_FH_RSSR_TBL);	/* barrier */
 	wpi_nic_unlock(sc);
 	WPI_WRITE(sc, WPI_FH_RX_WPTR, (WPI_RX_RING_COUNT - 1) & ~7);
 
 	/* Initialize TX rings. */
 	if ((error = wpi_nic_lock(sc)) != 0)
 		return error;
 	wpi_prph_write(sc, WPI_ALM_SCHED_MODE, 2);	/* bypass mode */
 	wpi_prph_write(sc, WPI_ALM_SCHED_ARASTAT, 1);	/* enable RA0 */
 	/* Enable all 6 TX rings. */
 	wpi_prph_write(sc, WPI_ALM_SCHED_TXFACT, 0x3f);
 	wpi_prph_write(sc, WPI_ALM_SCHED_SBYPASS_MODE1, 0x10000);
 	wpi_prph_write(sc, WPI_ALM_SCHED_SBYPASS_MODE2, 0x30002);
 	wpi_prph_write(sc, WPI_ALM_SCHED_TXF4MF, 4);
 	wpi_prph_write(sc, WPI_ALM_SCHED_TXF5MF, 5);
 	/* Set physical address of TX rings. */
 	WPI_WRITE(sc, WPI_FH_TX_BASE, sc->shared_dma.paddr);
 	WPI_WRITE(sc, WPI_FH_MSG_CONFIG, 0xffff05a5);
 
 	/* Enable all DMA channels. */
 	for (chnl = 0; chnl < WPI_NDMACHNLS; chnl++) {
 		WPI_WRITE(sc, WPI_FH_CBBC_CTRL(chnl), 0);
 		WPI_WRITE(sc, WPI_FH_CBBC_BASE(chnl), 0);
 		WPI_WRITE(sc, WPI_FH_TX_CONFIG(chnl), 0x80200008);
 	}
 	wpi_nic_unlock(sc);
 	(void)WPI_READ(sc, WPI_FH_TX_BASE);	/* barrier */
 
 	/* Clear "radio off" and "commands blocked" bits. */
 	WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_RFKILL);
 	WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_CMD_BLOCKED);
 
 	/* Clear pending interrupts. */
 	WPI_WRITE(sc, WPI_INT, 0xffffffff);
 	/* Enable interrupts. */
 	WPI_WRITE(sc, WPI_INT_MASK, WPI_INT_MASK_DEF);
 
 	/* _Really_ make sure "radio off" bit is cleared! */
 	WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_RFKILL);
 	WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_RFKILL);
 
 	if ((error = wpi_load_firmware(sc)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not load firmware, error %d\n", __func__,
 		    error);
 		return error;
 	}
 	/* Wait at most one second for firmware alive notification. */
 	if ((error = mtx_sleep(sc, &sc->sc_mtx, PCATCH, "wpiinit", hz)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: timeout waiting for adapter to initialize, error %d\n",
 		    __func__, error);
 		return error;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	/* Do post-firmware initialization. */
 	return wpi_post_alive(sc);
 }
 
 static void
 wpi_hw_stop(struct wpi_softc *sc)
 {
 	uint8_t chnl, qid;
 	int ntries;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	if (WPI_READ(sc, WPI_UCODE_GP1) & WPI_UCODE_GP1_MAC_SLEEP)
 		wpi_nic_lock(sc);
 
 	WPI_WRITE(sc, WPI_RESET, WPI_RESET_NEVO);
 
 	/* Disable interrupts. */
 	WPI_WRITE(sc, WPI_INT_MASK, 0);
 	WPI_WRITE(sc, WPI_INT, 0xffffffff);
 	WPI_WRITE(sc, WPI_FH_INT, 0xffffffff);
 
 	/* Make sure we no longer hold the NIC lock. */
 	wpi_nic_unlock(sc);
 
 	if (wpi_nic_lock(sc) == 0) {
 		/* Stop TX scheduler. */
 		wpi_prph_write(sc, WPI_ALM_SCHED_MODE, 0);
 		wpi_prph_write(sc, WPI_ALM_SCHED_TXFACT, 0);
 
 		/* Stop all DMA channels. */
 		for (chnl = 0; chnl < WPI_NDMACHNLS; chnl++) {
 			WPI_WRITE(sc, WPI_FH_TX_CONFIG(chnl), 0);
 			for (ntries = 0; ntries < 200; ntries++) {
 				if (WPI_READ(sc, WPI_FH_TX_STATUS) &
 				    WPI_FH_TX_STATUS_IDLE(chnl))
 					break;
 				DELAY(10);
 			}
 		}
 		wpi_nic_unlock(sc);
 	}
 
 	/* Stop RX ring. */
 	wpi_reset_rx_ring(sc);
 
 	/* Reset all TX rings. */
 	for (qid = 0; qid < WPI_DRV_NTXQUEUES; qid++)
 		wpi_reset_tx_ring(sc, &sc->txq[qid]);
 
 	if (wpi_nic_lock(sc) == 0) {
 		wpi_prph_write(sc, WPI_APMG_CLK_DIS,
 		    WPI_APMG_CLK_CTRL_DMA_CLK_RQT);
 		wpi_nic_unlock(sc);
 	}
 	DELAY(5);
 	/* Power OFF adapter. */
 	wpi_apm_stop(sc);
 }
 
 static void
 wpi_radio_on(void *arg0, int pending)
 {
 	struct wpi_softc *sc = arg0;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	device_printf(sc->sc_dev, "RF switch: radio enabled\n");
 
 	WPI_LOCK(sc);
 	callout_stop(&sc->watchdog_rfkill);
 	WPI_UNLOCK(sc);
 
 	if (vap != NULL)
 		ieee80211_init(vap);
 }
 
 static void
 wpi_radio_off(void *arg0, int pending)
 {
 	struct wpi_softc *sc = arg0;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	device_printf(sc->sc_dev, "RF switch: radio disabled\n");
 
 	ieee80211_notify_radio(ic, 0);
 	wpi_stop(sc);
 	if (vap != NULL)
 		ieee80211_stop(vap);
 
 	WPI_LOCK(sc);
 	callout_reset(&sc->watchdog_rfkill, hz, wpi_watchdog_rfkill, sc);
 	WPI_UNLOCK(sc);
 }
 
 static int
 wpi_init(struct wpi_softc *sc)
 {
 	int error = 0;
 
 	WPI_LOCK(sc);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
 
 	if (sc->sc_running != 0)
 		goto end;
 
 	/* Check that the radio is not disabled by hardware switch. */
 	if (!(WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_RFKILL)) {
 		device_printf(sc->sc_dev,
 		    "RF switch: radio disabled (%s)\n", __func__);
 		callout_reset(&sc->watchdog_rfkill, hz, wpi_watchdog_rfkill,
 		    sc);
 		error = EINPROGRESS;
 		goto end;
 	}
 
 	/* Read firmware images from the filesystem. */
 	if ((error = wpi_read_firmware(sc)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not read firmware, error %d\n", __func__,
 		    error);
 		goto end;
 	}
 
 	sc->sc_running = 1;
 
 	/* Initialize hardware and upload firmware. */
 	error = wpi_hw_init(sc);
 	wpi_unload_firmware(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not initialize hardware, error %d\n", __func__,
 		    error);
 		goto fail;
 	}
 
 	/* Configure adapter now that it is ready. */
 	if ((error = wpi_config(sc)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not configure device, error %d\n", __func__,
 		    error);
 		goto fail;
 	}
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
 
 	WPI_UNLOCK(sc);
 
 	return 0;
 
 fail:	wpi_stop_locked(sc);
 
 end:	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__);
 	WPI_UNLOCK(sc);
 
 	return error;
 }
 
 static void
 wpi_stop_locked(struct wpi_softc *sc)
 {
 
 	WPI_LOCK_ASSERT(sc);
 
 	if (sc->sc_running == 0)
 		return;
 
 	WPI_TX_LOCK(sc);
 	WPI_TXQ_LOCK(sc);
 	sc->sc_running = 0;
 	WPI_TXQ_UNLOCK(sc);
 	WPI_TX_UNLOCK(sc);
 
 	WPI_TXQ_STATE_LOCK(sc);
 	callout_stop(&sc->tx_timeout);
 	WPI_TXQ_STATE_UNLOCK(sc);
 
 	WPI_RXON_LOCK(sc);
 	callout_stop(&sc->scan_timeout);
 	callout_stop(&sc->calib_to);
 	WPI_RXON_UNLOCK(sc);
 
 	/* Power OFF hardware. */
 	wpi_hw_stop(sc);
 }
 
 static void
 wpi_stop(struct wpi_softc *sc)
 {
 	WPI_LOCK(sc);
 	wpi_stop_locked(sc);
 	WPI_UNLOCK(sc);
 }
 
 /*
  * Callback from net80211 to start a scan.
  */
 static void
 wpi_scan_start(struct ieee80211com *ic)
 {
 	struct wpi_softc *sc = ic->ic_softc;
 
 	wpi_set_led(sc, WPI_LED_LINK, 20, 2);
 }
 
 /*
  * Callback from net80211 to terminate a scan.
  */
 static void
 wpi_scan_end(struct ieee80211com *ic)
 {
 	struct wpi_softc *sc = ic->ic_softc;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	if (vap->iv_state == IEEE80211_S_RUN)
 		wpi_set_led(sc, WPI_LED_LINK, 0, 1);
 }
 
 /**
  * Called by the net80211 framework to indicate to the driver
  * that the channel should be changed
  */
 static void
 wpi_set_channel(struct ieee80211com *ic)
 {
 	const struct ieee80211_channel *c = ic->ic_curchan;
 	struct wpi_softc *sc = ic->ic_softc;
 	int error;
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	WPI_LOCK(sc);
 	sc->sc_rxtap.wr_chan_freq = htole16(c->ic_freq);
 	sc->sc_rxtap.wr_chan_flags = htole16(c->ic_flags);
 	WPI_UNLOCK(sc);
 	WPI_TX_LOCK(sc);
 	sc->sc_txtap.wt_chan_freq = htole16(c->ic_freq);
 	sc->sc_txtap.wt_chan_flags = htole16(c->ic_flags);
 	WPI_TX_UNLOCK(sc);
 
 	/*
 	 * Only need to set the channel in Monitor mode. AP scanning and auth
 	 * are already taken care of by their respective firmware commands.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_MONITOR) {
 		WPI_RXON_LOCK(sc);
 		sc->rxon.chan = ieee80211_chan2ieee(ic, c);
 		if (IEEE80211_IS_CHAN_2GHZ(c)) {
 			sc->rxon.flags |= htole32(WPI_RXON_AUTO |
 			    WPI_RXON_24GHZ);
 		} else {
 			sc->rxon.flags &= ~htole32(WPI_RXON_AUTO |
 			    WPI_RXON_24GHZ);
 		}
 		if ((error = wpi_send_rxon(sc, 0, 1)) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: error %d setting channel\n", __func__,
 			    error);
 		WPI_RXON_UNLOCK(sc);
 	}
 }
 
 /**
  * Called by net80211 to indicate that we need to scan the current
  * channel. The channel is previously be set via the wpi_set_channel
  * callback.
  */
 static void
 wpi_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell)
 {
 	struct ieee80211vap *vap = ss->ss_vap;
 	struct ieee80211com *ic = vap->iv_ic;
 	struct wpi_softc *sc = ic->ic_softc;
 	int error;
 
 	WPI_RXON_LOCK(sc);
 	error = wpi_scan(sc, ic->ic_curchan);
 	WPI_RXON_UNLOCK(sc);
 	if (error != 0)
 		ieee80211_cancel_scan(vap);
 }
 
 /**
  * Called by the net80211 framework to indicate
  * the minimum dwell time has been met, terminate the scan.
  * We don't actually terminate the scan as the firmware will notify
  * us when it's finished and we have no way to interrupt it.
  */
 static void
 wpi_scan_mindwell(struct ieee80211_scan_state *ss)
 {
 	/* NB: don't try to abort scan; wait for firmware to finish */
 }
 
 static void
 wpi_hw_reset(void *arg, int pending)
 {
 	struct wpi_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__);
 
 	ieee80211_notify_radio(ic, 0);
 	if (vap != NULL && (ic->ic_flags & IEEE80211_F_SCAN))
 		ieee80211_cancel_scan(vap);
 
 	wpi_stop(sc);
 	if (vap != NULL) {
 		ieee80211_stop(vap);
 		ieee80211_init(vap);
 	}
 }
Index: projects/powernv/fs/nfsclient/nfs_clvfsops.c
===================================================================
--- projects/powernv/fs/nfsclient/nfs_clvfsops.c	(revision 290990)
+++ projects/powernv/fs/nfsclient/nfs_clvfsops.c	(revision 290991)
@@ -1,1884 +1,1886 @@
 /*-
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 
 #include "opt_bootp.h"
 #include "opt_nfsroot.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/clock.h>
 #include <sys/jail.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 
 #include <fs/nfs/nfsport.h>
 #include <fs/nfsclient/nfsnode.h>
 #include <fs/nfsclient/nfsmount.h>
 #include <fs/nfsclient/nfs.h>
 #include <nfs/nfsdiskless.h>
 
 FEATURE(nfscl, "NFSv4 client");
 
 extern int nfscl_ticks;
 extern struct timeval nfsboottime;
 extern struct nfsstats	newnfsstats;
 extern int nfsrv_useacl;
 extern int nfscl_debuglevel;
 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
 extern struct mtx ncl_iod_mutex;
 NFSCLSTATEMUTEX;
 
 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
 
 SYSCTL_DECL(_vfs_nfs);
 static int nfs_ip_paranoia = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
     &nfs_ip_paranoia, 0, "");
 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
 /* how long between console messages "nfs server foo not responding" */
 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
 #ifdef NFS_DEBUG
 int nfs_debug;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
     "Toggle debug flag");
 #endif
 
 static int	nfs_mountroot(struct mount *);
 static void	nfs_sec_name(char *, int *);
 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
 		    struct nfs_args *argp, const char *, struct ucred *,
 		    struct thread *);
 static int	mountnfs(struct nfs_args *, struct mount *,
 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
 		    u_char *, int, struct vnode **, struct ucred *,
 		    struct thread *, int, int, int);
 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
 		    struct sockaddr_storage *, int *, off_t *,
 		    struct timeval *);
 static vfs_mount_t nfs_mount;
 static vfs_cmount_t nfs_cmount;
 static vfs_unmount_t nfs_unmount;
 static vfs_root_t nfs_root;
 static vfs_statfs_t nfs_statfs;
 static vfs_sync_t nfs_sync;
 static vfs_sysctl_t nfs_sysctl;
 static vfs_purge_t nfs_purge;
 
 /*
  * nfs vfs operations.
  */
 static struct vfsops nfs_vfsops = {
 	.vfs_init =		ncl_init,
 	.vfs_mount =		nfs_mount,
 	.vfs_cmount =		nfs_cmount,
 	.vfs_root =		nfs_root,
 	.vfs_statfs =		nfs_statfs,
 	.vfs_sync =		nfs_sync,
 	.vfs_uninit =		ncl_uninit,
 	.vfs_unmount =		nfs_unmount,
 	.vfs_sysctl =		nfs_sysctl,
 	.vfs_purge =		nfs_purge,
 };
 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfs, 1);
 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
 
 /*
  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
  * can be shared by both NFS clients. It is declared here so that it
  * will be defined for kernels built without NFS_ROOT, although it
  * isn't used in that case.
  */
 #if !defined(NFS_ROOT)
 struct nfs_diskless	nfs_diskless = { { { 0 } } };
 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
 int			nfs_diskless_valid = 0;
 #endif
 
 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
     &nfs_diskless_valid, 0,
     "Has the diskless struct been filled correctly");
 
 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
 
 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
     "%Ssockaddr_in", "Diskless root nfs address");
 
 
 void		newnfsargs_ntoh(struct nfs_args *);
 static int	nfs_mountdiskless(char *,
 		    struct sockaddr_in *, struct nfs_args *,
 		    struct thread *, struct vnode **, struct mount *);
 static void	nfs_convert_diskless(void);
 static void	nfs_convert_oargs(struct nfs_args *args,
 		    struct onfs_args *oargs);
 
 int
 newnfs_iosize(struct nfsmount *nmp)
 {
 	int iosize, maxio;
 
 	/* First, set the upper limit for iosize */
 	if (nmp->nm_flag & NFSMNT_NFSV4) {
 		maxio = NFS_MAXBSIZE;
 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
 		if (nmp->nm_sotype == SOCK_DGRAM)
 			maxio = NFS_MAXDGRAMDATA;
 		else
 			maxio = NFS_MAXBSIZE;
 	} else {
 		maxio = NFS_V2MAXDATA;
 	}
 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
 		nmp->nm_rsize = maxio;
 	if (nmp->nm_rsize > NFS_MAXBSIZE)
 		nmp->nm_rsize = NFS_MAXBSIZE;
 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
 		nmp->nm_readdirsize = maxio;
 	if (nmp->nm_readdirsize > nmp->nm_rsize)
 		nmp->nm_readdirsize = nmp->nm_rsize;
 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
 		nmp->nm_wsize = maxio;
 	if (nmp->nm_wsize > NFS_MAXBSIZE)
 		nmp->nm_wsize = NFS_MAXBSIZE;
 
 	/*
 	 * Calculate the size used for io buffers.  Use the larger
 	 * of the two sizes to minimise nfs requests but make sure
 	 * that it is at least one VM page to avoid wasting buffer
-	 * space.
+	 * space.  It must also be at least NFS_DIRBLKSIZ, since
+	 * that is the buffer size used for directories.
 	 */
 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
 	iosize = imax(iosize, PAGE_SIZE);
+	iosize = imax(iosize, NFS_DIRBLKSIZ);
 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
 	return (iosize);
 }
 
 static void
 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
 {
 
 	args->version = NFS_ARGSVERSION;
 	args->addr = oargs->addr;
 	args->addrlen = oargs->addrlen;
 	args->sotype = oargs->sotype;
 	args->proto = oargs->proto;
 	args->fh = oargs->fh;
 	args->fhsize = oargs->fhsize;
 	args->flags = oargs->flags;
 	args->wsize = oargs->wsize;
 	args->rsize = oargs->rsize;
 	args->readdirsize = oargs->readdirsize;
 	args->timeo = oargs->timeo;
 	args->retrans = oargs->retrans;
 	args->readahead = oargs->readahead;
 	args->hostname = oargs->hostname;
 }
 
 static void
 nfs_convert_diskless(void)
 {
 
 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
 		sizeof(struct ifaliasreq));
 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
 		sizeof(struct sockaddr_in));
 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
 	} else {
 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
 	}
 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
 		sizeof(struct sockaddr_in));
 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
 	nfsv3_diskless.root_time = nfs_diskless.root_time;
 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
 		MAXHOSTNAMELEN);
 	nfs_diskless_valid = 3;
 }
 
 /*
  * nfs statfs call
  */
 static int
 nfs_statfs(struct mount *mp, struct statfs *sbp)
 {
 	struct vnode *vp;
 	struct thread *td;
 	struct nfsmount *nmp = VFSTONFS(mp);
 	struct nfsvattr nfsva;
 	struct nfsfsinfo fs;
 	struct nfsstatfs sb;
 	int error = 0, attrflag, gotfsinfo = 0, ret;
 	struct nfsnode *np;
 
 	td = curthread;
 
 	error = vfs_busy(mp, MBF_NOWAIT);
 	if (error)
 		return (error);
 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
 	if (error) {
 		vfs_unbusy(mp);
 		return (error);
 	}
 	vp = NFSTOV(np);
 	mtx_lock(&nmp->nm_mtx);
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
 		mtx_unlock(&nmp->nm_mtx);
 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
 		    &attrflag, NULL);
 		if (!error)
 			gotfsinfo = 1;
 	} else
 		mtx_unlock(&nmp->nm_mtx);
 	if (!error)
 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
 		    &attrflag, NULL);
 	if (error != 0)
 		NFSCL_DEBUG(2, "statfs=%d\n", error);
 	if (attrflag == 0) {
 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
 		    td->td_ucred, td, &nfsva, NULL, NULL);
 		if (ret) {
 			/*
 			 * Just set default values to get things going.
 			 */
 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
 			nfsva.na_vattr.va_type = VDIR;
 			nfsva.na_vattr.va_mode = 0777;
 			nfsva.na_vattr.va_nlink = 100;
 			nfsva.na_vattr.va_uid = (uid_t)0;
 			nfsva.na_vattr.va_gid = (gid_t)0;
 			nfsva.na_vattr.va_fileid = 2;
 			nfsva.na_vattr.va_gen = 1;
 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
 			nfsva.na_vattr.va_size = 512 * 1024;
 		}
 	}
 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 	if (!error) {
 	    mtx_lock(&nmp->nm_mtx);
 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
 		nfscl_loadfsinfo(nmp, &fs);
 	    nfscl_loadsbinfo(nmp, &sb, sbp);
 	    sbp->f_iosize = newnfs_iosize(nmp);
 	    mtx_unlock(&nmp->nm_mtx);
 	    if (sbp != &mp->mnt_stat) {
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	    }
 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	vput(vp);
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * nfs version 3 fsinfo rpc call
  */
 int
 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsfsinfo fs;
 	struct nfsvattr nfsva;
 	int error, attrflag;
 	
 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
 	if (!error) {
 		if (attrflag)
 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 			    1);
 		mtx_lock(&nmp->nm_mtx);
 		nfscl_loadfsinfo(nmp, &fs);
 		mtx_unlock(&nmp->nm_mtx);
 	}
 	return (error);
 }
 
 /*
  * Mount a remote root fs via. nfs. This depends on the info in the
  * nfs_diskless structure that has been filled in properly by some primary
  * bootstrap.
  * It goes something like this:
  * - do enough of "ifconfig" by calling ifioctl() so that the system
  *   can talk to the server
  * - If nfs_diskless.mygateway is filled in, use that address as
  *   a default gateway.
  * - build the rootfs mount point and call mountnfs() to do the rest.
  *
  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
  * structure, as well as other global NFS client variables here, as
  * nfs_mountroot() will be called once in the boot before any other NFS
  * client activity occurs.
  */
 static int
 nfs_mountroot(struct mount *mp)
 {
 	struct thread *td = curthread;
 	struct nfsv3_diskless *nd = &nfsv3_diskless;
 	struct socket *so;
 	struct vnode *vp;
 	struct ifreq ir;
 	int error;
 	u_long l;
 	char buf[128];
 	char *cp;
 
 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
 #elif defined(NFS_ROOT)
 	nfs_setup_diskless();
 #endif
 
 	if (nfs_diskless_valid == 0)
 		return (-1);
 	if (nfs_diskless_valid == 1)
 		nfs_convert_diskless();
 
 	/*
 	 * XXX splnet, so networks will receive...
 	 */
 	splnet();
 
 	/*
 	 * Do enough of ifconfig(8) so that the critical net interface can
 	 * talk to the server.
 	 */
 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
 	    td->td_ucred, td);
 	if (error)
 		panic("nfs_mountroot: socreate(%04x): %d",
 			nd->myif.ifra_addr.sa_family, error);
 
 #if 0 /* XXX Bad idea */
 	/*
 	 * We might not have been told the right interface, so we pass
 	 * over the first ten interfaces of the same kind, until we get
 	 * one of them configured.
 	 */
 
 	for (i = strlen(nd->myif.ifra_name) - 1;
 		nd->myif.ifra_name[i] >= '0' &&
 		nd->myif.ifra_name[i] <= '9';
 		nd->myif.ifra_name[i] ++) {
 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
 		if(!error)
 			break;
 	}
 #endif
 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
 	if (error)
 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
 		ir.ifr_mtu = strtol(cp, NULL, 10);
 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
 		freeenv(cp);
 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
 		if (error)
 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
 	}
 	soclose(so);
 
 	/*
 	 * If the gateway field is filled in, set it as the default route.
 	 * Note that pxeboot will set a default route of 0 if the route
 	 * is not set by the DHCP server.  Check also for a value of 0
 	 * to avoid panicking inappropriately in that situation.
 	 */
 	if (nd->mygateway.sin_len != 0 &&
 	    nd->mygateway.sin_addr.s_addr != 0) {
 		struct sockaddr_in mask, sin;
 
 		bzero((caddr_t)&mask, sizeof(mask));
 		sin = mask;
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(sin);
                 /* XXX MRT use table 0 for this sort of thing */
 		CURVNET_SET(TD_TO_VNET(td));
 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
 		    (struct sockaddr *)&nd->mygateway,
 		    (struct sockaddr *)&mask,
 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
 		CURVNET_RESTORE();
 		if (error)
 			panic("nfs_mountroot: RTM_ADD: %d", error);
 	}
 
 	/*
 	 * Create the rootfs mount point.
 	 */
 	nd->root_args.fh = nd->root_fh;
 	nd->root_args.fhsize = nd->root_fhsize;
 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
 		(l >> 24) & 0xff, (l >> 16) & 0xff,
 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
 	printf("NFS ROOT: %s\n", buf);
 	nd->root_args.hostname = buf;
 	if ((error = nfs_mountdiskless(buf,
 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
 		return (error);
 	}
 
 	/*
 	 * This is not really an nfs issue, but it is much easier to
 	 * set hostname here and then let the "/etc/rc.xxx" files
 	 * mount the right /var based upon its preset value.
 	 */
 	mtx_lock(&prison0.pr_mtx);
 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
 	    sizeof(prison0.pr_hostname));
 	mtx_unlock(&prison0.pr_mtx);
 	inittodr(ntohl(nd->root_time));
 	return (0);
 }
 
 /*
  * Internal version of mount system call for diskless setup.
  */
 static int
 nfs_mountdiskless(char *path,
     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
     struct vnode **vpp, struct mount *mp)
 {
 	struct sockaddr *nam;
 	int dirlen, error;
 	char *dirpath;
 
 	/*
 	 * Find the directory path in "path", which also has the server's
 	 * name/ip address in it.
 	 */
 	dirpath = strchr(path, ':');
 	if (dirpath != NULL)
 		dirlen = strlen(++dirpath);
 	else
 		dirlen = 0;
 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, 
 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
 		return (error);
 	}
 	return (0);
 }
 
 static void
 nfs_sec_name(char *sec, int *flagsp)
 {
 	if (!strcmp(sec, "krb5"))
 		*flagsp |= NFSMNT_KERB;
 	else if (!strcmp(sec, "krb5i"))
 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
 	else if (!strcmp(sec, "krb5p"))
 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
 }
 
 static void
 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
     const char *hostname, struct ucred *cred, struct thread *td)
 {
 	int s;
 	int adjsock;
 	char *p;
 
 	s = splnet();
 
 	/*
 	 * Set read-only flag if requested; otherwise, clear it if this is
 	 * an update.  If this is not an update, then either the read-only
 	 * flag is already clear, or this is a root mount and it was set
 	 * intentionally at some previous point.
 	 */
 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_RDONLY;
 		MNT_IUNLOCK(mp);
 	} else if (mp->mnt_flag & MNT_UPDATE) {
 		MNT_ILOCK(mp);
 		mp->mnt_flag &= ~MNT_RDONLY;
 		MNT_IUNLOCK(mp);
 	}
 
 	/*
 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
 	 * no sense in that context.  Also, set up appropriate retransmit
 	 * and soft timeout behavior.
 	 */
 	if (argp->sotype == SOCK_STREAM) {
 		nmp->nm_flag &= ~NFSMNT_NOCONN;
 		nmp->nm_timeo = NFS_MAXTIMEO;
 		if ((argp->flags & NFSMNT_NFSV4) != 0)
 			nmp->nm_retry = INT_MAX;
 		else
 			nmp->nm_retry = NFS_RETRANS_TCP;
 	}
 
 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
 		argp->flags &= ~NFSMNT_RDIRPLUS;
 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 	}
 
 	/* Re-bind if rsrvd port requested and wasn't on one */
 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
 		  && (argp->flags & NFSMNT_RESVPORT);
 	/* Also re-bind if we're switching to/from a connected UDP socket */
 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
 		    (argp->flags & NFSMNT_NOCONN));
 
 	/* Update flags atomically.  Don't change the lock bits. */
 	nmp->nm_flag = argp->flags | nmp->nm_flag;
 	splx(s);
 
 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
 		if (nmp->nm_timeo < NFS_MINTIMEO)
 			nmp->nm_timeo = NFS_MINTIMEO;
 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
 			nmp->nm_timeo = NFS_MAXTIMEO;
 	}
 
 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
 		nmp->nm_retry = argp->retrans;
 		if (nmp->nm_retry > NFS_MAXREXMIT)
 			nmp->nm_retry = NFS_MAXREXMIT;
 	}
 
 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
 		nmp->nm_wsize = argp->wsize;
 		/*
 		 * Clip at the power of 2 below the size. There is an
 		 * issue (not isolated) that causes intermittent page
 		 * faults if this is not done.
 		 */
 		if (nmp->nm_wsize > NFS_FABLKSIZE)
 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
 		else
 			nmp->nm_wsize = NFS_FABLKSIZE;
 	}
 
 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
 		nmp->nm_rsize = argp->rsize;
 		/*
 		 * Clip at the power of 2 below the size. There is an
 		 * issue (not isolated) that causes intermittent page
 		 * faults if this is not done.
 		 */
 		if (nmp->nm_rsize > NFS_FABLKSIZE)
 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
 		else
 			nmp->nm_rsize = NFS_FABLKSIZE;
 	}
 
 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
 		nmp->nm_readdirsize = argp->readdirsize;
 	}
 
 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
 		nmp->nm_acregmin = argp->acregmin;
 	else
 		nmp->nm_acregmin = NFS_MINATTRTIMO;
 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
 		nmp->nm_acregmax = argp->acregmax;
 	else
 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
 		nmp->nm_acdirmin = argp->acdirmin;
 	else
 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
 		nmp->nm_acdirmax = argp->acdirmax;
 	else
 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
 		nmp->nm_acdirmin = nmp->nm_acdirmax;
 	if (nmp->nm_acregmin > nmp->nm_acregmax)
 		nmp->nm_acregmin = nmp->nm_acregmax;
 
 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
 		if (argp->readahead <= NFS_MAXRAHEAD)
 			nmp->nm_readahead = argp->readahead;
 		else
 			nmp->nm_readahead = NFS_MAXRAHEAD;
 	}
 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
 		if (argp->wcommitsize < nmp->nm_wsize)
 			nmp->nm_wcommitsize = nmp->nm_wsize;
 		else
 			nmp->nm_wcommitsize = argp->wcommitsize;
 	}
 
 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
 		    (nmp->nm_soproto != argp->proto));
 
 	if (nmp->nm_client != NULL && adjsock) {
 		int haslock = 0, error = 0;
 
 		if (nmp->nm_sotype == SOCK_STREAM) {
 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
 			if (!error)
 				haslock = 1;
 		}
 		if (!error) {
 		    newnfs_disconnect(&nmp->nm_sockreq);
 		    if (haslock)
 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
 		    nmp->nm_sotype = argp->sotype;
 		    nmp->nm_soproto = argp->proto;
 		    if (nmp->nm_sotype == SOCK_DGRAM)
 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
 			    cred, td, 0)) {
 				printf("newnfs_args: retrying connect\n");
 				(void) nfs_catnap(PSOCK, 0, "nfscon");
 			}
 		}
 	} else {
 		nmp->nm_sotype = argp->sotype;
 		nmp->nm_soproto = argp->proto;
 	}
 
 	if (hostname != NULL) {
 		strlcpy(nmp->nm_hostname, hostname,
 		    sizeof(nmp->nm_hostname));
 		p = strchr(nmp->nm_hostname, ':');
 		if (p != NULL)
 			*p = '\0';
 	}
 }
 
 static const char *nfs_opts[] = { "from", "nfs_args",
     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
     "pnfs", "wcommitsize",
     NULL };
 
 /*
  * VFS Operations.
  *
  * mount system call
  * It seems a bit dumb to copyinstr() the host and path here and then
  * bcopy() them in mountnfs(), but I wanted to detect errors before
  * doing the sockargs() call because sockargs() allocates an mbuf and
  * an error after that means that I have to release the mbuf.
  */
 /* ARGSUSED */
 static int
 nfs_mount(struct mount *mp)
 {
 	struct nfs_args args = {
 	    .version = NFS_ARGSVERSION,
 	    .addr = NULL,
 	    .addrlen = sizeof (struct sockaddr_in),
 	    .sotype = SOCK_STREAM,
 	    .proto = 0,
 	    .fh = NULL,
 	    .fhsize = 0,
 	    .flags = NFSMNT_RESVPORT,
 	    .wsize = NFS_WSIZE,
 	    .rsize = NFS_RSIZE,
 	    .readdirsize = NFS_READDIRSIZE,
 	    .timeo = 10,
 	    .retrans = NFS_RETRANS,
 	    .readahead = NFS_DEFRAHEAD,
 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
 	    .hostname = NULL,
 	    .acregmin = NFS_MINATTRTIMO,
 	    .acregmax = NFS_MAXATTRTIMO,
 	    .acdirmin = NFS_MINDIRATTRTIMO,
 	    .acdirmax = NFS_MAXDIRATTRTIMO,
 	};
 	int error = 0, ret, len;
 	struct sockaddr *nam = NULL;
 	struct vnode *vp;
 	struct thread *td;
 	char hst[MNAMELEN];
 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
 	char *cp, *opt, *name, *secname;
 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
 	int minvers = 0;
 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
 	size_t hstlen;
 
 	has_nfs_args_opt = 0;
 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
 		error = EINVAL;
 		goto out;
 	}
 
 	td = curthread;
 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
 		error = nfs_mountroot(mp);
 		goto out;
 	}
 
 	nfscl_init();
 
 	/*
 	 * The old mount_nfs program passed the struct nfs_args
 	 * from userspace to kernel.  The new mount_nfs program
 	 * passes string options via nmount() from userspace to kernel
 	 * and we populate the struct nfs_args in the kernel.
 	 */
 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
 		    sizeof(args));
 		if (error != 0)
 			goto out;
 
 		if (args.version != NFS_ARGSVERSION) {
 			error = EPROGMISMATCH;
 			goto out;
 		}
 		has_nfs_args_opt = 1;
 	}
 
 	/* Handle the new style options. */
 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
 		args.acdirmin = args.acdirmax =
 		    args.acregmin = args.acregmax = 0;
 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NOCONN;
 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
 		args.flags &= ~NFSMNT_NOCONN;
 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NOLOCKD;
 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
 		args.flags &= ~NFSMNT_NOLOCKD;
 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
 		args.flags |= NFSMNT_INT;
 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
 		args.flags |= NFSMNT_RDIRPLUS;
 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
 		args.flags |= NFSMNT_RESVPORT;
 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
 		args.flags &= ~NFSMNT_RESVPORT;
 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
 		args.flags |= NFSMNT_SOFT;
 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
 		args.flags &= ~NFSMNT_SOFT;
 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
 		args.sotype = SOCK_DGRAM;
 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
 		args.sotype = SOCK_DGRAM;
 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
 		args.sotype = SOCK_STREAM;
 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NFSV3;
 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
 		args.flags |= NFSMNT_NFSV4;
 		args.sotype = SOCK_STREAM;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
 		args.flags |= NFSMNT_ALLGSSNAME;
 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NOCTO;
 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NONCONTIGWR;
 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
 		args.flags |= NFSMNT_PNFS;
 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
 		if (opt == NULL) { 
 			vfs_mount_error(mp, "illegal readdirsize");
 			error = EINVAL;
 			goto out;
 		}
 		ret = sscanf(opt, "%d", &args.readdirsize);
 		if (ret != 1 || args.readdirsize <= 0) {
 			vfs_mount_error(mp, "illegal readdirsize: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_READDIRSIZE;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
 		if (opt == NULL) { 
 			vfs_mount_error(mp, "illegal readahead");
 			error = EINVAL;
 			goto out;
 		}
 		ret = sscanf(opt, "%d", &args.readahead);
 		if (ret != 1 || args.readahead <= 0) {
 			vfs_mount_error(mp, "illegal readahead: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_READAHEAD;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
 		if (opt == NULL) { 
 			vfs_mount_error(mp, "illegal wsize");
 			error = EINVAL;
 			goto out;
 		}
 		ret = sscanf(opt, "%d", &args.wsize);
 		if (ret != 1 || args.wsize <= 0) {
 			vfs_mount_error(mp, "illegal wsize: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_WSIZE;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
 		if (opt == NULL) { 
 			vfs_mount_error(mp, "illegal rsize");
 			error = EINVAL;
 			goto out;
 		}
 		ret = sscanf(opt, "%d", &args.rsize);
 		if (ret != 1 || args.rsize <= 0) {
 			vfs_mount_error(mp, "illegal wsize: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_RSIZE;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
 		if (opt == NULL) { 
 			vfs_mount_error(mp, "illegal retrans");
 			error = EINVAL;
 			goto out;
 		}
 		ret = sscanf(opt, "%d", &args.retrans);
 		if (ret != 1 || args.retrans <= 0) {
 			vfs_mount_error(mp, "illegal retrans: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_RETRANS;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.acregmin);
 		if (ret != 1 || args.acregmin < 0) {
 			vfs_mount_error(mp, "illegal actimeo: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.acregmin);
 		if (ret != 1 || args.acregmin < 0) {
 			vfs_mount_error(mp, "illegal acregmin: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_ACREGMIN;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.acregmax);
 		if (ret != 1 || args.acregmax < 0) {
 			vfs_mount_error(mp, "illegal acregmax: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_ACREGMAX;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.acdirmin);
 		if (ret != 1 || args.acdirmin < 0) {
 			vfs_mount_error(mp, "illegal acdirmin: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_ACDIRMIN;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.acdirmax);
 		if (ret != 1 || args.acdirmax < 0) {
 			vfs_mount_error(mp, "illegal acdirmax: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_ACDIRMAX;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.wcommitsize);
 		if (ret != 1 || args.wcommitsize < 0) {
 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_WCOMMITSIZE;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.timeo);
 		if (ret != 1 || args.timeo <= 0) {
 			vfs_mount_error(mp, "illegal timeo: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_TIMEO;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.timeo);
 		if (ret != 1 || args.timeo <= 0) {
 			vfs_mount_error(mp, "illegal timeout: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 		args.flags |= NFSMNT_TIMEO;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &nametimeo);
 		if (ret != 1 || nametimeo < 0) {
 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
 			error = EINVAL;
 			goto out;
 		}
 	}
 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
 	    == 0) {
 		ret = sscanf(opt, "%d", &negnametimeo);
 		if (ret != 1 || negnametimeo < 0) {
 			vfs_mount_error(mp, "illegal negnametimeo: %s",
 			    opt);
 			error = EINVAL;
 			goto out;
 		}
 	}
 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
 	    0) {
 		ret = sscanf(opt, "%d", &minvers);
 		if (ret != 1 || minvers < 0 || minvers > 1 ||
 		    (args.flags & NFSMNT_NFSV4) == 0) {
 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
 			error = EINVAL;
 			goto out;
 		}
 	}
 	if (vfs_getopt(mp->mnt_optnew, "sec",
 		(void **) &secname, NULL) == 0)
 		nfs_sec_name(secname, &args.flags);
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		struct nfsmount *nmp = VFSTONFS(mp);
 
 		if (nmp == NULL) {
 			error = EIO;
 			goto out;
 		}
 
 		/*
 		 * If a change from TCP->UDP is done and there are thread(s)
 		 * that have I/O RPC(s) in progress with a tranfer size
 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
 		 * hung, retrying the RPC(s) forever. Usually these threads
 		 * will be seen doing an uninterruptible sleep on wait channel
 		 * "nfsreq".
 		 */
 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
 			tprintf(td->td_proc, LOG_WARNING,
 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
 
 		/*
 		 * When doing an update, we can't change version,
 		 * security, switch lockd strategies or change cookie
 		 * translation
 		 */
 		args.flags = (args.flags &
 		    ~(NFSMNT_NFSV3 |
 		      NFSMNT_NFSV4 |
 		      NFSMNT_KERB |
 		      NFSMNT_INTEGRITY |
 		      NFSMNT_PRIVACY |
 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
 		    (nmp->nm_flag &
 			(NFSMNT_NFSV3 |
 			 NFSMNT_NFSV4 |
 			 NFSMNT_KERB |
 			 NFSMNT_INTEGRITY |
 			 NFSMNT_PRIVACY |
 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
 		goto out;
 	}
 
 	/*
 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
 	 * or no-connection mode for those protocols that support 
 	 * no-connection mode (the flag will be cleared later for protocols
 	 * that do not support no-connection mode).  This will allow a client
 	 * to receive replies from a different IP then the request was
 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
 	 * not 0.
 	 */
 	if (nfs_ip_paranoia == 0)
 		args.flags |= NFSMNT_NOCONN;
 
 	if (has_nfs_args_opt != 0) {
 		/*
 		 * In the 'nfs_args' case, the pointers in the args
 		 * structure are in userland - we copy them in here.
 		 */
 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
 			vfs_mount_error(mp, "Bad file handle");
 			error = EINVAL;
 			goto out;
 		}
 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
 		    args.fhsize);
 		if (error != 0)
 			goto out;
 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
 		if (error != 0)
 			goto out;
 		bzero(&hst[hstlen], MNAMELEN - hstlen);
 		args.hostname = hst;
 		/* sockargs() call must be after above copyin() calls */
 		error = getsockaddr(&nam, (caddr_t)args.addr,
 		    args.addrlen);
 		if (error != 0)
 			goto out;
 	} else {
 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
 		    &args.fhsize) == 0) {
 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
 				vfs_mount_error(mp, "Bad file handle");
 				error = EINVAL;
 				goto out;
 			}
 			bcopy(args.fh, nfh, args.fhsize);
 		} else {
 			args.fhsize = 0;
 		}
 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
 		    (void **)&args.hostname, &len);
 		if (args.hostname == NULL) {
 			vfs_mount_error(mp, "Invalid hostname");
 			error = EINVAL;
 			goto out;
 		}
 		bcopy(args.hostname, hst, MNAMELEN);
 		hst[MNAMELEN - 1] = '\0';
 	}
 
 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
 	else {
 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
 		cp = strchr(srvkrbname, ':');
 		if (cp != NULL)
 			*cp = '\0';
 	}
 	srvkrbnamelen = strlen(srvkrbname);
 
 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
 		strlcpy(krbname, name, sizeof (krbname));
 	else
 		krbname[0] = '\0';
 	krbnamelen = strlen(krbname);
 
 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
 		strlcpy(dirpath, name, sizeof (dirpath));
 	else
 		dirpath[0] = '\0';
 	dirlen = strlen(dirpath);
 
 	if (has_nfs_args_opt == 0) {
 		if (vfs_getopt(mp->mnt_optnew, "addr",
 		    (void **)&args.addr, &args.addrlen) == 0) {
 			if (args.addrlen > SOCK_MAXADDRLEN) {
 				error = ENAMETOOLONG;
 				goto out;
 			}
 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
 			bcopy(args.addr, nam, args.addrlen);
 			nam->sa_len = args.addrlen;
 		} else {
 			vfs_mount_error(mp, "No server address");
 			error = EINVAL;
 			goto out;
 		}
 	}
 
 	args.fh = nfh;
 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
 	    nametimeo, negnametimeo, minvers);
 out:
 	if (!error) {
 		MNT_ILOCK(mp);
 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
 		    MNTK_USES_BCACHE;
 		MNT_IUNLOCK(mp);
 	}
 	return (error);
 }
 
 
 /*
  * VFS Operations.
  *
  * mount system call
  * It seems a bit dumb to copyinstr() the host and path here and then
  * bcopy() them in mountnfs(), but I wanted to detect errors before
  * doing the sockargs() call because sockargs() allocates an mbuf and
  * an error after that means that I have to release the mbuf.
  */
 /* ARGSUSED */
 static int
 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
 {
 	int error;
 	struct nfs_args args;
 
 	error = copyin(data, &args, sizeof (struct nfs_args));
 	if (error)
 		return error;
 
 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
 
 	error = kernel_mount(ma, flags);
 	return (error);
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
     int minvers)
 {
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	int error, trycnt, ret;
 	struct nfsvattr nfsva;
 	struct nfsclclient *clp;
 	struct nfsclds *dsp, *tdsp;
 	uint32_t lease;
 	static u_int64_t clval = 0;
 
 	NFSCL_DEBUG(3, "in mnt\n");
 	clp = NULL;
 	if (mp->mnt_flag & MNT_UPDATE) {
 		nmp = VFSTONFS(mp);
 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
 		FREE(nam, M_SONAME);
 		return (0);
 	} else {
 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
 		    krbnamelen + dirlen + srvkrbnamelen + 2,
 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
 		TAILQ_INIT(&nmp->nm_bufq);
 		if (clval == 0)
 			clval = (u_int64_t)nfsboottime.tv_sec;
 		nmp->nm_clval = clval++;
 		nmp->nm_krbnamelen = krbnamelen;
 		nmp->nm_dirpathlen = dirlen;
 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
 		if (td->td_ucred->cr_uid != (uid_t)0) {
 			/*
 			 * nm_uid is used to get KerberosV credentials for
 			 * the nfsv4 state handling operations if there is
 			 * no host based principal set. Use the uid of
 			 * this user if not root, since they are doing the
 			 * mount. I don't think setting this for root will
 			 * work, since root normally does not have user
 			 * credentials in a credentials cache.
 			 */
 			nmp->nm_uid = td->td_ucred->cr_uid;
 		} else {
 			/*
 			 * Just set to -1, so it won't be used.
 			 */
 			nmp->nm_uid = (uid_t)-1;
 		}
 
 		/* Copy and null terminate all the names */
 		if (nmp->nm_krbnamelen > 0) {
 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
 		}
 		if (nmp->nm_dirpathlen > 0) {
 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
 			    nmp->nm_dirpathlen);
 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
 			    + 1] = '\0';
 		}
 		if (nmp->nm_srvkrbnamelen > 0) {
 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
 			    nmp->nm_srvkrbnamelen);
 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
 		}
 		nmp->nm_sockreq.nr_cred = crhold(cred);
 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
 		mp->mnt_data = nmp;
 		nmp->nm_getinfo = nfs_getnlminfo;
 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
 	}
 	vfs_getnewfsid(mp);
 	nmp->nm_mountp = mp;
 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
 
 	/*
 	 * Since nfs_decode_args() might optionally set them, these
 	 * need to be set to defaults before the call, so that the
 	 * optional settings aren't overwritten.
 	 */
 	nmp->nm_nametimeo = nametimeo;
 	nmp->nm_negnametimeo = negnametimeo;
 	nmp->nm_timeo = NFS_TIMEO;
 	nmp->nm_retry = NFS_RETRANS;
 	nmp->nm_readahead = NFS_DEFRAHEAD;
 
 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
 		nmp->nm_wcommitsize *= 2;
 	nmp->nm_wcommitsize *= 256;
 
 	if ((argp->flags & NFSMNT_NFSV4) != 0)
 		nmp->nm_minorvers = minvers;
 	else
 		nmp->nm_minorvers = 0;
 
 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
 
 	/*
 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
 	 * high, depending on whether we end up with negative offsets in
 	 * the client or server somewhere.  2GB-1 may be safer.
 	 *
 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
 	 * that we can handle until we find out otherwise.
 	 */
 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
 		nmp->nm_maxfilesize = 0xffffffffLL;
 	else
 		nmp->nm_maxfilesize = OFF_MAX;
 
 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
 		nmp->nm_wsize = NFS_WSIZE;
 		nmp->nm_rsize = NFS_RSIZE;
 		nmp->nm_readdirsize = NFS_READDIRSIZE;
 	}
 	nmp->nm_numgrps = NFS_MAXGRPS;
 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
 	if (nmp->nm_tprintf_delay < 0)
 		nmp->nm_tprintf_delay = 0;
 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
 	if (nmp->nm_tprintf_initial_delay < 0)
 		nmp->nm_tprintf_initial_delay = 0;
 	nmp->nm_fhsize = argp->fhsize;
 	if (nmp->nm_fhsize > 0)
 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
 	nmp->nm_nam = nam;
 	/* Set up the sockets and per-host congestion */
 	nmp->nm_sotype = argp->sotype;
 	nmp->nm_soproto = argp->proto;
 	nmp->nm_sockreq.nr_prog = NFS_PROG;
 	if ((argp->flags & NFSMNT_NFSV4))
 		nmp->nm_sockreq.nr_vers = NFS_VER4;
 	else if ((argp->flags & NFSMNT_NFSV3))
 		nmp->nm_sockreq.nr_vers = NFS_VER3;
 	else
 		nmp->nm_sockreq.nr_vers = NFS_VER2;
 
 
 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
 		goto bad;
 	/* For NFSv4.1, get the clientid now. */
 	if (nmp->nm_minorvers > 0) {
 		NFSCL_DEBUG(3, "at getcl\n");
 		error = nfscl_getcl(mp, cred, td, 0, &clp);
 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
 		if (error != 0)
 			goto bad;
 	}
 
 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
 	    nmp->nm_dirpathlen > 0) {
 		NFSCL_DEBUG(3, "in dirp\n");
 		/*
 		 * If the fhsize on the mount point == 0 for V4, the mount
 		 * path needs to be looked up.
 		 */
 		trycnt = 3;
 		do {
 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
 			    cred, td);
 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
 			if (error)
 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
 		} while (error && --trycnt > 0);
 		if (error) {
 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 			goto bad;
 		}
 	}
 
 	/*
 	 * A reference count is needed on the nfsnode representing the
 	 * remote root.  If this object is not persistent, then backward
 	 * traversals of the mount point (i.e. "..") will not work if
 	 * the nfsnode gets flushed out of the cache. Ufs does not have
 	 * this problem, because one can identify root inodes by their
 	 * number == ROOTINO (2).
 	 */
 	if (nmp->nm_fhsize > 0) {
 		/*
 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
 		 * non-zero for the root vnode. f_iosize will be set correctly
 		 * by nfs_statfs() before any I/O occurs.
 		 */
 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
 		    LK_EXCLUSIVE);
 		if (error)
 			goto bad;
 		*vpp = NFSTOV(np);
 	
 		/*
 		 * Get file attributes and transfer parameters for the
 		 * mountpoint.  This has the side effect of filling in
 		 * (*vpp)->v_type with the correct value.
 		 */
 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
 		    cred, td, &nfsva, NULL, &lease);
 		if (ret) {
 			/*
 			 * Just set default values to get things going.
 			 */
 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
 			nfsva.na_vattr.va_type = VDIR;
 			nfsva.na_vattr.va_mode = 0777;
 			nfsva.na_vattr.va_nlink = 100;
 			nfsva.na_vattr.va_uid = (uid_t)0;
 			nfsva.na_vattr.va_gid = (gid_t)0;
 			nfsva.na_vattr.va_fileid = 2;
 			nfsva.na_vattr.va_gen = 1;
 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
 			nfsva.na_vattr.va_size = 512 * 1024;
 			lease = 60;
 		}
 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
 		if (nmp->nm_minorvers > 0) {
 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
 			NFSLOCKCLSTATE();
 			clp->nfsc_renew = NFSCL_RENEW(lease);
 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
 			clp->nfsc_clientidrev++;
 			if (clp->nfsc_clientidrev == 0)
 				clp->nfsc_clientidrev++;
 			NFSUNLOCKCLSTATE();
 			/*
 			 * Mount will succeed, so the renew thread can be
 			 * started now.
 			 */
 			nfscl_start_renewthread(clp);
 			nfscl_clientrelease(clp);
 		}
 		if (argp->flags & NFSMNT_NFSV3)
 			ncl_fsinfo(nmp, *vpp, cred, td);
 	
 		/* Mark if the mount point supports NFSv4 ACLs. */
 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
 		    ret == 0 &&
 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_NFS4ACLS;
 			MNT_IUNLOCK(mp);
 		}
 	
 		/*
 		 * Lose the lock but keep the ref.
 		 */
 		NFSVOPUNLOCK(*vpp, 0);
 		return (0);
 	}
 	error = EIO;
 
 bad:
 	if (clp != NULL)
 		nfscl_clientrelease(clp);
 	newnfs_disconnect(&nmp->nm_sockreq);
 	crfree(nmp->nm_sockreq.nr_cred);
 	if (nmp->nm_sockreq.nr_auth != NULL)
 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
 	mtx_destroy(&nmp->nm_mtx);
 	if (nmp->nm_clp != NULL) {
 		NFSLOCKCLSTATE();
 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
 		NFSUNLOCKCLSTATE();
 		free(nmp->nm_clp, M_NFSCLCLIENT);
 	}
 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
 		nfscl_freenfsclds(dsp);
 	FREE(nmp, M_NEWNFSMNT);
 	FREE(nam, M_SONAME);
 	return (error);
 }
 
 /*
  * unmount system call
  */
 static int
 nfs_unmount(struct mount *mp, int mntflags)
 {
 	struct thread *td;
 	struct nfsmount *nmp;
 	int error, flags = 0, i, trycnt = 0;
 	struct nfsclds *dsp, *tdsp;
 
 	td = curthread;
 
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	nmp = VFSTONFS(mp);
 	/*
 	 * Goes something like this..
 	 * - Call vflush() to clear out vnodes for this filesystem
 	 * - Close the socket
 	 * - Free up the data structures
 	 */
 	/* In the forced case, cancel any outstanding requests. */
 	if (mntflags & MNT_FORCE) {
 		error = newnfs_nmcancelreqs(nmp);
 		if (error)
 			goto out;
 		/* For a forced close, get rid of the renew thread now */
 		nfscl_umount(nmp, td);
 	}
 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
 	do {
 		error = vflush(mp, 1, flags, td);
 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
 			(void) nfs_catnap(PSOCK, error, "newndm");
 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
 	if (error)
 		goto out;
 
 	/*
 	 * We are now committed to the unmount.
 	 */
 	if ((mntflags & MNT_FORCE) == 0)
 		nfscl_umount(nmp, td);
 	/* Make sure no nfsiods are assigned to this mount. */
 	mtx_lock(&ncl_iod_mutex);
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
 		if (ncl_iodmount[i] == nmp) {
 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
 			ncl_iodmount[i] = NULL;
 		}
 	mtx_unlock(&ncl_iod_mutex);
 	newnfs_disconnect(&nmp->nm_sockreq);
 	crfree(nmp->nm_sockreq.nr_cred);
 	FREE(nmp->nm_nam, M_SONAME);
 	if (nmp->nm_sockreq.nr_auth != NULL)
 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
 	mtx_destroy(&nmp->nm_mtx);
 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
 		nfscl_freenfsclds(dsp);
 	FREE(nmp, M_NEWNFSMNT);
 out:
 	return (error);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct vnode *vp;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	int error;
 
 	nmp = VFSTONFS(mp);
 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
 	if (error)
 		return error;
 	vp = NFSTOV(np);
 	/*
 	 * Get transfer parameters and attributes for root vnode once.
 	 */
 	mtx_lock(&nmp->nm_mtx);
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
 		mtx_unlock(&nmp->nm_mtx);
 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
 	} else 
 		mtx_unlock(&nmp->nm_mtx);
 	if (vp->v_type == VNON)
 	    vp->v_type = VDIR;
 	vp->v_vflag |= VV_ROOT;
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Flush out the buffer cache
  */
 /* ARGSUSED */
 static int
 nfs_sync(struct mount *mp, int waitfor)
 {
 	struct vnode *vp, *mvp;
 	struct thread *td;
 	int error, allerror = 0;
 
 	td = curthread;
 
 	MNT_ILOCK(mp);
 	/*
 	 * If a forced dismount is in progress, return from here so that
 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
 	 * calling VFS_UNMOUNT().
 	 */
 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
 		MNT_IUNLOCK(mp);
 		return (EBADF);
 	}
 	MNT_IUNLOCK(mp);
 
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/* XXX Racy bv_cnt check. */
 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
 		if (error)
 			allerror = error;
 		NFSVOPUNLOCK(vp, 0);
 		vrele(vp);
 	}
 	return (allerror);
 }
 
 static int
 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
 {
 	struct nfsmount *nmp = VFSTONFS(mp);
 	struct vfsquery vq;
 	int error;
 
 	bzero(&vq, sizeof(vq));
 	switch (op) {
 #if 0
 	case VFS_CTL_NOLOCKS:
 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
  		if (req->oldptr != NULL) {
  			error = SYSCTL_OUT(req, &val, sizeof(val));
  			if (error)
  				return (error);
  		}
  		if (req->newptr != NULL) {
  			error = SYSCTL_IN(req, &val, sizeof(val));
  			if (error)
  				return (error);
 			if (val)
 				nmp->nm_flag |= NFSMNT_NOLOCKS;
 			else
 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
  		}
 		break;
 #endif
 	case VFS_CTL_QUERY:
 		mtx_lock(&nmp->nm_mtx);
 		if (nmp->nm_state & NFSSTA_TIMEO)
 			vq.vq_flags |= VQ_NOTRESP;
 		mtx_unlock(&nmp->nm_mtx);
 #if 0
 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
 			vq.vq_flags |= VQ_NOTRESPLOCK;
 #endif
 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
 		break;
  	case VFS_CTL_TIMEO:
  		if (req->oldptr != NULL) {
  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
  			    sizeof(nmp->nm_tprintf_initial_delay));
  			if (error)
  				return (error);
  		}
  		if (req->newptr != NULL) {
 			error = vfs_suser(mp, req->td);
 			if (error)
 				return (error);
  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
  			    sizeof(nmp->nm_tprintf_initial_delay));
  			if (error)
  				return (error);
  			if (nmp->nm_tprintf_initial_delay < 0)
  				nmp->nm_tprintf_initial_delay = 0;
  		}
 		break;
 	default:
 		return (ENOTSUP);
 	}
 	return (0);
 }
 
 /*
  * Purge any RPCs in progress, so that they will all return errors.
  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
  * forced dismount.
  */
 static void
 nfs_purge(struct mount *mp)
 {
 	struct nfsmount *nmp = VFSTONFS(mp);
 
 	newnfs_nmcancelreqs(nmp);
 }
 
 /*
  * Extract the information needed by the nlm from the nfs vnode.
  */
 static void
 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
     struct timeval *timeop)
 {
 	struct nfsmount *nmp;
 	struct nfsnode *np = VTONFS(vp);
 
 	nmp = VFSTONFS(vp->v_mount);
 	if (fhlenp != NULL)
 		*fhlenp = (size_t)np->n_fhp->nfh_len;
 	if (fhp != NULL)
 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
 	if (sp != NULL)
 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
 	if (is_v3p != NULL)
 		*is_v3p = NFS_ISV3(vp);
 	if (sizep != NULL)
 		*sizep = np->n_size;
 	if (timeop != NULL) {
 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
 	}
 }
 
 /*
  * This function prints out an option name, based on the conditional
  * argument.
  */
 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
     char *opt, char **buf, size_t *blen)
 {
 	int len;
 
 	if (testval != 0 && *blen > strlen(opt)) {
 		len = snprintf(*buf, *blen, "%s", opt);
 		if (len != strlen(opt))
 			printf("EEK!!\n");
 		*buf += len;
 		*blen -= len;
 	}
 }
 
 /*
  * This function printf out an options integer value.
  */
 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
     char *opt, char **buf, size_t *blen)
 {
 	int len;
 
 	if (*blen > strlen(opt) + 1) {
 		/* Could result in truncated output string. */
 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
 		if (len < *blen) {
 			*buf += len;
 			*blen -= len;
 		}
 	}
 }
 
 /*
  * Load the option flags and values into the buffer.
  */
 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
 {
 	char *buf;
 	size_t blen;
 
 	buf = buffer;
 	blen = buflen;
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
 	    &blen);
 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
 		    &blen);
 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
 		    &buf, &blen);
 	}
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
 	    "nfsv2", &buf, &blen);
 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
 	    &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
 	    &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
 	    ",noncontigwr", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
 	    0, ",lockd", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
 	    &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
 	    &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
 	    &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
 	    &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
 	    &blen);
 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
 	    &blen);
 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
 	    &blen);
 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
 }
 
Index: projects/powernv/kern/kern_dump.c
===================================================================
--- projects/powernv/kern/kern_dump.c	(revision 290990)
+++ projects/powernv/kern/kern_dump.c	(revision 290991)
@@ -1,393 +1,399 @@
 /*-
  * Copyright (c) 2002 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/kerneldump.h>
 #ifdef SW_WATCHDOG
 #include <sys/watchdog.h>
 #endif
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <machine/dump.h>
 #include <machine/elf.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
 CTASSERT(sizeof(struct kerneldumpheader) == 512);
 
 /*
  * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
  * is to protect us from metadata and to protect metadata from us.
  */
 #define	SIZEOF_METADATA		(64*1024)
 
 #define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
 #define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
 
 off_t dumplo;
 
 /* Handle buffered writes. */
 static char buffer[DEV_BSIZE];
 static size_t fragsz;
 
 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
 
+#if !defined(__powerpc__) && !defined(__sparc__)
 void
 dumpsys_gen_pa_init(void)
 {
-#if !defined(__sparc__) && !defined(__powerpc__)
 	int n, idx;
 
 	bzero(dump_map, sizeof(dump_map));
 	for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) {
 		idx = n * 2;
 		if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
 			break;
 		dump_map[n].pa_start = dump_avail[idx];
 		dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
 	}
-#endif
 }
+#endif
 
 struct dump_pa *
 dumpsys_gen_pa_next(struct dump_pa *mdp)
 {
 
 	if (mdp == NULL)
 		return (&dump_map[0]);
 
 	mdp++;
 	if (mdp->pa_size == 0)
 		mdp = NULL;
 	return (mdp);
 }
 
 void
 dumpsys_gen_wbinv_all(void)
 {
+
 }
 
 void
 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
     void *va __unused)
 {
+
 }
 
+#if !defined(__sparc__)
 int
 dumpsys_gen_write_aux_headers(struct dumperinfo *di)
 {
 
 	return (0);
 }
+#endif
 
 int
 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
 {
 	size_t len;
 	int error;
 
 	while (sz) {
 		len = DEV_BSIZE - fragsz;
 		if (len > sz)
 			len = sz;
 		bcopy(ptr, buffer + fragsz, len);
 		fragsz += len;
 		ptr += len;
 		sz -= len;
 		if (fragsz == DEV_BSIZE) {
 			error = dump_write(di, buffer, 0, dumplo,
 			    DEV_BSIZE);
 			if (error)
 				return (error);
 			dumplo += DEV_BSIZE;
 			fragsz = 0;
 		}
 	}
 	return (0);
 }
 
 int
 dumpsys_buf_flush(struct dumperinfo *di)
 {
 	int error;
 
 	if (fragsz == 0)
 		return (0);
 
 	error = dump_write(di, buffer, 0, dumplo, DEV_BSIZE);
 	dumplo += DEV_BSIZE;
 	fragsz = 0;
 	return (error);
 }
 
 CTASSERT(PAGE_SHIFT < 20);
 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
 
 int
 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
 {
 	struct dumperinfo *di = (struct dumperinfo*)arg;
 	vm_paddr_t pa;
 	void *va;
 	uint64_t pgs;
 	size_t counter, sz, chunk;
 	int c, error;
 	u_int maxdumppgs;
 
 	error = 0;	/* catch case in which chunk size is 0 */
 	counter = 0;	/* Update twiddle every 16MB */
 	va = 0;
 	pgs = mdp->pa_size / PAGE_SIZE;
 	pa = mdp->pa_start;
 	maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
 	if (maxdumppgs == 0)	/* seatbelt */
 		maxdumppgs = 1;
 
 	printf("  chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
 	    (uintmax_t)pgs);
 
 	dumpsys_wbinv_all();
 	while (pgs) {
 		chunk = pgs;
 		if (chunk > maxdumppgs)
 			chunk = maxdumppgs;
 		sz = chunk << PAGE_SHIFT;
 		counter += sz;
 		if (counter >> 24) {
 			printf(" %ju", (uintmax_t)PG2MB(pgs));
 			counter &= (1 << 24) - 1;
 		}
 
 		dumpsys_map_chunk(pa, chunk, &va);
 #ifdef SW_WATCHDOG
 		wdog_kern_pat(WD_LASTVAL);
 #endif
 
 		error = dump_write(di, va, 0, dumplo, sz);
 		dumpsys_unmap_chunk(pa, chunk, va);
 		if (error)
 			break;
 		dumplo += sz;
 		pgs -= chunk;
 		pa += sz;
 
 		/* Check for user abort. */
 		c = cncheckc();
 		if (c == 0x03)
 			return (ECANCELED);
 		if (c != -1)
 			printf(" (CTRL-C to abort) ");
 	}
 	printf(" ... %s\n", (error) ? "fail" : "ok");
 	return (error);
 }
 
 int
 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
 {
 	struct dump_pa *mdp;
 	int error, seqnr;
 
 	seqnr = 0;
 	mdp = dumpsys_pa_next(NULL);
 	while (mdp != NULL) {
 		error = (*cb)(mdp, seqnr++, arg);
 		if (error)
 			return (-error);
 		mdp = dumpsys_pa_next(mdp);
 	}
 	return (seqnr);
 }
 
+#if !defined(__sparc__)
 static off_t fileofs;
 
 static int
 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
 {
 	struct dumperinfo *di = (struct dumperinfo*)arg;
 	Elf_Phdr phdr;
 	uint64_t size;
 	int error;
 
 	size = mdp->pa_size;
 	bzero(&phdr, sizeof(phdr));
 	phdr.p_type = PT_LOAD;
 	phdr.p_flags = PF_R;			/* XXX */
 	phdr.p_offset = fileofs;
 #ifdef __powerpc__
 	phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
 	phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
 #else
 	phdr.p_vaddr = mdp->pa_start;
 	phdr.p_paddr = mdp->pa_start;
 #endif
 	phdr.p_filesz = size;
 	phdr.p_memsz = size;
 	phdr.p_align = PAGE_SIZE;
 
 	error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
 	fileofs += phdr.p_filesz;
 	return (error);
 }
 
 static int
 cb_size(struct dump_pa *mdp, int seqnr, void *arg)
 {
 	uint64_t *sz;
 
 	sz = (uint64_t *)arg;
 	*sz += (uint64_t)mdp->pa_size;
 	return (0);
 }
 
 int
 dumpsys_generic(struct dumperinfo *di)
 {
 	static struct kerneldumpheader kdh;
 	Elf_Ehdr ehdr;
 	uint64_t dumpsize;
 	off_t hdrgap;
 	size_t hdrsz;
 	int error;
 
 #ifndef __powerpc__
 	if (do_minidump)
 		return (minidumpsys(di));
 #endif
 
 	bzero(&ehdr, sizeof(ehdr));
 	ehdr.e_ident[EI_MAG0] = ELFMAG0;
 	ehdr.e_ident[EI_MAG1] = ELFMAG1;
 	ehdr.e_ident[EI_MAG2] = ELFMAG2;
 	ehdr.e_ident[EI_MAG3] = ELFMAG3;
 	ehdr.e_ident[EI_CLASS] = ELF_CLASS;
 #if BYTE_ORDER == LITTLE_ENDIAN
 	ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 #else
 	ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 #endif
 	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 	ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE;	/* XXX big picture? */
 	ehdr.e_type = ET_CORE;
 	ehdr.e_machine = EM_VALUE;
 	ehdr.e_phoff = sizeof(ehdr);
 	ehdr.e_flags = 0;
 	ehdr.e_ehsize = sizeof(ehdr);
 	ehdr.e_phentsize = sizeof(Elf_Phdr);
 	ehdr.e_shentsize = sizeof(Elf_Shdr);
 
 	dumpsys_pa_init();
 
 	/* Calculate dump size. */
 	dumpsize = 0L;
 	ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
 	    DUMPSYS_NUM_AUX_HDRS;
 	hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
 	fileofs = MD_ALIGN(hdrsz);
 	dumpsize += fileofs;
 	hdrgap = fileofs - DEV_ALIGN(hdrsz);
 
 	/* Determine dump offset on device. */
 	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
 		error = ENOSPC;
 		goto fail;
 	}
 	dumplo = di->mediaoffset + di->mediasize - dumpsize;
 	dumplo -= sizeof(kdh) * 2;
 
 	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, dumpsize,
 	    di->blocksize);
 
 	printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
 	    ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
 
 	/* Dump leader */
 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
 	if (error)
 		goto fail;
 	dumplo += sizeof(kdh);
 
 	/* Dump ELF header */
 	error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
 	if (error)
 		goto fail;
 
 	/* Dump program headers */
 	error = dumpsys_foreach_chunk(cb_dumphdr, di);
 	if (error < 0)
 		goto fail;
 	error = dumpsys_write_aux_headers(di);
 	if (error < 0)
 		goto fail;
 	dumpsys_buf_flush(di);
 
 	/*
 	 * All headers are written using blocked I/O, so we know the
 	 * current offset is (still) block aligned. Skip the alignement
 	 * in the file to have the segment contents aligned at page
 	 * boundary. We cannot use MD_ALIGN on dumplo, because we don't
 	 * care and may very well be unaligned within the dump device.
 	 */
 	dumplo += hdrgap;
 
 	/* Dump memory chunks (updates dumplo) */
 	error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
 	if (error < 0)
 		goto fail;
 
 	/* Dump trailer */
 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
 	if (error)
 		goto fail;
 
 	/* Signal completion, signoff and exit stage left. */
 	dump_write(di, NULL, 0, 0, 0);
 	printf("\nDump complete\n");
 	return (0);
 
  fail:
 	if (error < 0)
 		error = -error;
 
 	if (error == ECANCELED)
 		printf("\nDump aborted\n");
 	else if (error == ENOSPC)
 		printf("\nDump failed. Partition too small.\n");
 	else
 		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
 	return (error);
 }
+#endif
Index: projects/powernv/kern/kern_jail.c
===================================================================
--- projects/powernv/kern/kern_jail.c	(revision 290990)
+++ projects/powernv/kern/kern_jail.c	(revision 290991)
@@ -1,4769 +1,4774 @@
 /*-
  * Copyright (c) 1999 Poul-Henning Kamp.
  * Copyright (c) 2008 Bjoern A. Zeeb.
  * Copyright (c) 2009 James Gritton.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/osd.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/taskqueue.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/racct.h>
 #include <sys/refcount.h>
 #include <sys/sx.h>
 #include <sys/sysent.h>
 #include <sys/namei.h>
 #include <sys/mount.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif /* DDB */
 
 #include <security/mac/mac_framework.h>
 
 #define	DEFAULT_HOSTUUID	"00000000-0000-0000-0000-000000000000"
 
 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
 static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures");
 
 /* Keep struct prison prison0 and some code in kern_jail_set() readable. */
 #ifdef INET
 #ifdef INET6
 #define	_PR_IP_SADDRSEL	PR_IP4_SADDRSEL|PR_IP6_SADDRSEL
 #else
 #define	_PR_IP_SADDRSEL	PR_IP4_SADDRSEL
 #endif
 #else /* !INET */
 #ifdef INET6
 #define	_PR_IP_SADDRSEL	PR_IP6_SADDRSEL
 #else
 #define	_PR_IP_SADDRSEL	0
 #endif
 #endif
 
 /* prison0 describes what is "real" about the system. */
 struct prison prison0 = {
 	.pr_id		= 0,
 	.pr_name	= "0",
 	.pr_ref		= 1,
 	.pr_uref	= 1,
 	.pr_path	= "/",
 	.pr_securelevel	= -1,
 	.pr_devfs_rsnum = 0,
 	.pr_childmax	= JAIL_MAX,
 	.pr_hostuuid	= DEFAULT_HOSTUUID,
 	.pr_children	= LIST_HEAD_INITIALIZER(prison0.pr_children),
 #ifdef VIMAGE
 	.pr_flags	= PR_HOST|PR_VNET|_PR_IP_SADDRSEL,
 #else
 	.pr_flags	= PR_HOST|_PR_IP_SADDRSEL,
 #endif
 	.pr_allow	= PR_ALLOW_ALL,
 };
 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
 
 /* allprison, allprison_racct and lastprid are protected by allprison_lock. */
 struct	sx allprison_lock;
 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
 struct	prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
 LIST_HEAD(, prison_racct) allprison_racct;
 int	lastprid = 0;
 
 static int do_jail_attach(struct thread *td, struct prison *pr);
 static void prison_complete(void *context, int pending);
 static void prison_deref(struct prison *pr, int flags);
 static char *prison_path(struct prison *pr1, struct prison *pr2);
 static void prison_remove_one(struct prison *pr);
 #ifdef RACCT
 static void prison_racct_attach(struct prison *pr);
 static void prison_racct_modify(struct prison *pr);
 static void prison_racct_detach(struct prison *pr);
 #endif
 #ifdef INET
 static int _prison_check_ip4(const struct prison *, const struct in_addr *);
 static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4);
 #endif
 #ifdef INET6
 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6);
 static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6);
 #endif
 
 /* Flags for prison_deref */
 #define	PD_DEREF	0x01
 #define	PD_DEUREF	0x02
 #define	PD_LOCKED	0x04
 #define	PD_LIST_SLOCKED	0x08
 #define	PD_LIST_XLOCKED	0x10
 
 /*
  * Parameter names corresponding to PR_* flag values.  Size values are for kvm
  * as we cannot figure out the size of a sparse array, or an array without a
  * terminating entry.
  */
 static char *pr_flag_names[] = {
 	[0] = "persist",
 #ifdef INET
 	[7] = "ip4.saddrsel",
 #endif
 #ifdef INET6
 	[8] = "ip6.saddrsel",
 #endif
 };
 const size_t pr_flag_names_size = sizeof(pr_flag_names);
 
 static char *pr_flag_nonames[] = {
 	[0] = "nopersist",
 #ifdef INET
 	[7] = "ip4.nosaddrsel",
 #endif
 #ifdef INET6
 	[8] = "ip6.nosaddrsel",
 #endif
 };
 const size_t pr_flag_nonames_size = sizeof(pr_flag_nonames);
 
 struct jailsys_flags {
 	const char	*name;
 	unsigned	 disable;
 	unsigned	 new;
 } pr_flag_jailsys[] = {
 	{ "host", 0, PR_HOST },
 #ifdef VIMAGE
 	{ "vnet", 0, PR_VNET },
 #endif
 #ifdef INET
 	{ "ip4", PR_IP4_USER, PR_IP4_USER },
 #endif
 #ifdef INET6
 	{ "ip6", PR_IP6_USER, PR_IP6_USER },
 #endif
 };
 const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys);
 
 static char *pr_allow_names[] = {
 	"allow.set_hostname",
 	"allow.sysvipc",
 	"allow.raw_sockets",
 	"allow.chflags",
 	"allow.mount",
 	"allow.quotas",
 	"allow.socket_af",
 	"allow.mount.devfs",
 	"allow.mount.nullfs",
 	"allow.mount.zfs",
 	"allow.mount.procfs",
 	"allow.mount.tmpfs",
 	"allow.mount.fdescfs",
 	"allow.mount.linprocfs",
 	"allow.mount.linsysfs",
 };
 const size_t pr_allow_names_size = sizeof(pr_allow_names);
 
 static char *pr_allow_nonames[] = {
 	"allow.noset_hostname",
 	"allow.nosysvipc",
 	"allow.noraw_sockets",
 	"allow.nochflags",
 	"allow.nomount",
 	"allow.noquotas",
 	"allow.nosocket_af",
 	"allow.mount.nodevfs",
 	"allow.mount.nonullfs",
 	"allow.mount.nozfs",
 	"allow.mount.noprocfs",
 	"allow.mount.notmpfs",
 	"allow.mount.nofdescfs",
 	"allow.mount.nolinprocfs",
 	"allow.mount.nolinsysfs",
 };
 const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames);
 
 #define	JAIL_DEFAULT_ALLOW		PR_ALLOW_SET_HOSTNAME
 #define	JAIL_DEFAULT_ENFORCE_STATFS	2
 #define	JAIL_DEFAULT_DEVFS_RSNUM	0
 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
 static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
 static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM;
 #if defined(INET) || defined(INET6)
 static unsigned jail_max_af_ips = 255;
 #endif
 
 /*
  * Initialize the parts of prison0 that can't be static-initialized with
  * constants.  This is called from proc0_init() after creating thread0 cpuset.
  */
 void
 prison0_init(void)
 {
 
 	prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset);
 	prison0.pr_osreldate = osreldate;
 	strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease));
 }
 
 #ifdef INET
 static int
 qcmp_v4(const void *ip1, const void *ip2)
 {
 	in_addr_t iaa, iab;
 
 	/*
 	 * We need to compare in HBO here to get the list sorted as expected
 	 * by the result of the code.  Sorting NBO addresses gives you
 	 * interesting results.  If you do not understand, do not try.
 	 */
 	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
 	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
 
 	/*
 	 * Do not simply return the difference of the two numbers, the int is
 	 * not wide enough.
 	 */
 	if (iaa > iab)
 		return (1);
 	else if (iaa < iab)
 		return (-1);
 	else
 		return (0);
 }
 #endif
 
 #ifdef INET6
 static int
 qcmp_v6(const void *ip1, const void *ip2)
 {
 	const struct in6_addr *ia6a, *ia6b;
 	int i, rc;
 
 	ia6a = (const struct in6_addr *)ip1;
 	ia6b = (const struct in6_addr *)ip2;
 
 	rc = 0;
 	for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) {
 		if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
 			rc = 1;
 		else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
 			rc = -1;
 	}
 	return (rc);
 }
 #endif
 
 /*
  * struct jail_args {
  *	struct jail *jail;
  * };
  */
 int
 sys_jail(struct thread *td, struct jail_args *uap)
 {
 	uint32_t version;
 	int error;
 	struct jail j;
 
 	error = copyin(uap->jail, &version, sizeof(uint32_t));
 	if (error)
 		return (error);
 
 	switch (version) {
 	case 0:
 	{
 		struct jail_v0 j0;
 
 		/* FreeBSD single IPv4 jails. */
 		bzero(&j, sizeof(struct jail));
 		error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
 		if (error)
 			return (error);
 		j.version = j0.version;
 		j.path = j0.path;
 		j.hostname = j0.hostname;
 		j.ip4s = htonl(j0.ip_number);	/* jail_v0 is host order */
 		break;
 	}
 
 	case 1:
 		/*
 		 * Version 1 was used by multi-IPv4 jail implementations
 		 * that never made it into the official kernel.
 		 */
 		return (EINVAL);
 
 	case 2:	/* JAIL_API_VERSION */
 		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
 		error = copyin(uap->jail, &j, sizeof(struct jail));
 		if (error)
 			return (error);
 		break;
 
 	default:
 		/* Sci-Fi jails are not supported, sorry. */
 		return (EINVAL);
 	}
 	return (kern_jail(td, &j));
 }
 
 int
 kern_jail(struct thread *td, struct jail *j)
 {
 	struct iovec optiov[2 * (4
 			    + sizeof(pr_allow_names) / sizeof(pr_allow_names[0])
 #ifdef INET
 			    + 1
 #endif
 #ifdef INET6
 			    + 1
 #endif
 			    )];
 	struct uio opt;
 	char *u_path, *u_hostname, *u_name;
 #ifdef INET
 	uint32_t ip4s;
 	struct in_addr *u_ip4;
 #endif
 #ifdef INET6
 	struct in6_addr *u_ip6;
 #endif
 	size_t tmplen;
 	int error, enforce_statfs, fi;
 
 	bzero(&optiov, sizeof(optiov));
 	opt.uio_iov = optiov;
 	opt.uio_iovcnt = 0;
 	opt.uio_offset = -1;
 	opt.uio_resid = -1;
 	opt.uio_segflg = UIO_SYSSPACE;
 	opt.uio_rw = UIO_READ;
 	opt.uio_td = td;
 
 	/* Set permissions for top-level jails from sysctls. */
 	if (!jailed(td->td_ucred)) {
 		for (fi = 0; fi < sizeof(pr_allow_names) /
 		     sizeof(pr_allow_names[0]); fi++) {
 			optiov[opt.uio_iovcnt].iov_base =
 			    (jail_default_allow & (1 << fi))
 			    ? pr_allow_names[fi] : pr_allow_nonames[fi];
 			optiov[opt.uio_iovcnt].iov_len =
 			    strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
 			opt.uio_iovcnt += 2;
 		}
 		optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
 		optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
 		opt.uio_iovcnt++;
 		enforce_statfs = jail_default_enforce_statfs;
 		optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
 		optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
 		opt.uio_iovcnt++;
 	}
 
 	tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
 #ifdef INET
 	ip4s = (j->version == 0) ? 1 : j->ip4s;
 	if (ip4s > jail_max_af_ips)
 		return (EINVAL);
 	tmplen += ip4s * sizeof(struct in_addr);
 #else
 	if (j->ip4s > 0)
 		return (EINVAL);
 #endif
 #ifdef INET6
 	if (j->ip6s > jail_max_af_ips)
 		return (EINVAL);
 	tmplen += j->ip6s * sizeof(struct in6_addr);
 #else
 	if (j->ip6s > 0)
 		return (EINVAL);
 #endif
 	u_path = malloc(tmplen, M_TEMP, M_WAITOK);
 	u_hostname = u_path + MAXPATHLEN;
 	u_name = u_hostname + MAXHOSTNAMELEN;
 #ifdef INET
 	u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
 #endif
 #ifdef INET6
 #ifdef INET
 	u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
 #else
 	u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
 #endif
 #endif
 	optiov[opt.uio_iovcnt].iov_base = "path";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("path");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_path;
 	error = copyinstr(j->path, u_path, MAXPATHLEN,
 	    &optiov[opt.uio_iovcnt].iov_len);
 	if (error) {
 		free(u_path, M_TEMP);
 		return (error);
 	}
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = "host.hostname";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_hostname;
 	error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
 	    &optiov[opt.uio_iovcnt].iov_len);
 	if (error) {
 		free(u_path, M_TEMP);
 		return (error);
 	}
 	opt.uio_iovcnt++;
 	if (j->jailname != NULL) {
 		optiov[opt.uio_iovcnt].iov_base = "name";
 		optiov[opt.uio_iovcnt].iov_len = sizeof("name");
 		opt.uio_iovcnt++;
 		optiov[opt.uio_iovcnt].iov_base = u_name;
 		error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
 		    &optiov[opt.uio_iovcnt].iov_len);
 		if (error) {
 			free(u_path, M_TEMP);
 			return (error);
 		}
 		opt.uio_iovcnt++;
 	}
 #ifdef INET
 	optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_ip4;
 	optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
 	if (j->version == 0)
 		u_ip4->s_addr = j->ip4s;
 	else {
 		error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
 		if (error) {
 			free(u_path, M_TEMP);
 			return (error);
 		}
 	}
 	opt.uio_iovcnt++;
 #endif
 #ifdef INET6
 	optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_ip6;
 	optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
 	error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
 	if (error) {
 		free(u_path, M_TEMP);
 		return (error);
 	}
 	opt.uio_iovcnt++;
 #endif
 	KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]),
 	    ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
 	error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
 	free(u_path, M_TEMP);
 	return (error);
 }
 
 
 /*
  * struct jail_set_args {
  *	struct iovec *iovp;
  *	unsigned int iovcnt;
  *	int flags;
  * };
  */
 int
 sys_jail_set(struct thread *td, struct jail_set_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_set(td, auio, uap->flags);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 {
 	struct nameidata nd;
 #ifdef INET
 	struct in_addr *ip4;
 #endif
 #ifdef INET6
 	struct in6_addr *ip6;
 #endif
 	struct vfsopt *opt;
 	struct vfsoptlist *opts;
 	struct prison *pr, *deadpr, *mypr, *ppr, *tpr;
 	struct vnode *root;
 	char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
 	char *g_path, *osrelstr;
 #if defined(INET) || defined(INET6)
 	struct prison *tppr;
 	void *op;
 #endif
 	unsigned long hid;
 	size_t namelen, onamelen;
 	int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
 	int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
 	int fi, jid, jsys, len, level;
 	int childmax, osreldt, rsnum, slevel;
 	int fullpath_disabled;
 #if defined(INET) || defined(INET6)
 	int ii, ij;
 #endif
 #ifdef INET
 	int ip4s, redo_ip4;
 #endif
 #ifdef INET6
 	int ip6s, redo_ip6;
 #endif
 	uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
 	unsigned tallow;
 	char numbuf[12];
 
 	error = priv_check(td, PRIV_JAIL_SET);
 	if (!error && (flags & JAIL_ATTACH))
 		error = priv_check(td, PRIV_JAIL_ATTACH);
 	if (error)
 		return (error);
 	mypr = ppr = td->td_ucred->cr_prison;
 	if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
 		return (EPERM);
 	if (flags & ~JAIL_SET_MASK)
 		return (EINVAL);
 
 	/*
 	 * Check all the parameters before committing to anything.  Not all
 	 * errors can be caught early, but we may as well try.  Also, this
 	 * takes care of some expensive stuff (path lookup) before getting
 	 * the allprison lock.
 	 *
 	 * XXX Jails are not filesystems, and jail parameters are not mount
 	 *     options.  But it makes more sense to re-use the vfsopt code
 	 *     than duplicate it under a different name.
 	 */
 	error = vfs_buildopts(optuio, &opts);
 	if (error)
 		return (error);
 #ifdef INET
 	ip4 = NULL;
 #endif
 #ifdef INET6
 	ip6 = NULL;
 #endif
 	g_path = NULL;
 
 	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 	if (error == ENOENT)
 		jid = 0;
 	else if (error != 0)
 		goto done_free;
 
 	error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
 	if (error == ENOENT)
 		gotslevel = 0;
 	else if (error != 0)
 		goto done_free;
 	else
 		gotslevel = 1;
 
 	error =
 	    vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
 	if (error == ENOENT)
 		gotchildmax = 0;
 	else if (error != 0)
 		goto done_free;
 	else
 		gotchildmax = 1;
 
 	error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
 	if (error == ENOENT)
 		gotenforce = 0;
 	else if (error != 0)
 		goto done_free;
 	else if (enforce < 0 || enforce > 2) {
 		error = EINVAL;
 		goto done_free;
 	} else
 		gotenforce = 1;
 
 	error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum));
 	if (error == ENOENT)
 		gotrsnum = 0;
 	else if (error != 0)
 		goto done_free;
 	else
 		gotrsnum = 1;
 
 	pr_flags = ch_flags = 0;
 	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
 	    fi++) {
 		if (pr_flag_names[fi] == NULL)
 			continue;
 		vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi);
 		vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi);
 	}
 	ch_flags |= pr_flags;
 	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
 	    fi++) {
 		error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys,
 		    sizeof(jsys));
 		if (error == ENOENT)
 			continue;
 		if (error != 0)
 			goto done_free;
 		switch (jsys) {
 		case JAIL_SYS_DISABLE:
 			if (!pr_flag_jailsys[fi].disable) {
 				error = EINVAL;
 				goto done_free;
 			}
 			pr_flags |= pr_flag_jailsys[fi].disable;
 			break;
 		case JAIL_SYS_NEW:
 			pr_flags |= pr_flag_jailsys[fi].new;
 			break;
 		case JAIL_SYS_INHERIT:
 			break;
 		default:
 			error = EINVAL;
 			goto done_free;
 		}
 		ch_flags |=
 		    pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable;
 	}
 	if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
 	    && !(pr_flags & PR_PERSIST)) {
 		error = EINVAL;
 		vfs_opterror(opts, "new jail must persist or attach");
 		goto done_errmsg;
 	}
 #ifdef VIMAGE
 	if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
 		error = EINVAL;
 		vfs_opterror(opts, "vnet cannot be changed after creation");
 		goto done_errmsg;
 	}
 #endif
 #ifdef INET
 	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
 		error = EINVAL;
 		vfs_opterror(opts, "ip4 cannot be changed after creation");
 		goto done_errmsg;
 	}
 #endif
 #ifdef INET6
 	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
 		error = EINVAL;
 		vfs_opterror(opts, "ip6 cannot be changed after creation");
 		goto done_errmsg;
 	}
 #endif
 
 	pr_allow = ch_allow = 0;
 	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
 	    fi++) {
 		vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi);
 		vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi);
 	}
 	ch_allow |= pr_allow;
 
 	error = vfs_getopt(opts, "name", (void **)&name, &len);
 	if (error == ENOENT)
 		name = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (len == 0 || name[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > MAXHOSTNAMELEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 	error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
 	if (error == ENOENT)
 		host = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 		if (len == 0 || host[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > MAXHOSTNAMELEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 	error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
 	if (error == ENOENT)
 		domain = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 		if (len == 0 || domain[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > MAXHOSTNAMELEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 	error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
 	if (error == ENOENT)
 		uuid = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 		if (len == 0 || uuid[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > HOSTUUIDLEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		uint32_t hid32;
 
 		error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
 		hid = hid32;
 	} else
 #endif
 		error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
 	if (error == ENOENT)
 		gothid = 0;
 	else if (error != 0)
 		goto done_free;
 	else {
 		gothid = 1;
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 	}
 
 #ifdef INET
 	error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
 	if (error == ENOENT)
 		ip4s = 0;
 	else if (error != 0)
 		goto done_free;
 	else if (ip4s & (sizeof(*ip4) - 1)) {
 		error = EINVAL;
 		goto done_free;
 	} else {
 		ch_flags |= PR_IP4_USER;
 		pr_flags |= PR_IP4_USER;
 		if (ip4s > 0) {
 			ip4s /= sizeof(*ip4);
 			if (ip4s > jail_max_af_ips) {
 				error = EINVAL;
 				vfs_opterror(opts, "too many IPv4 addresses");
 				goto done_errmsg;
 			}
 			ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 			bcopy(op, ip4, ip4s * sizeof(*ip4));
 			/*
 			 * IP addresses are all sorted but ip[0] to preserve
 			 * the primary IP address as given from userland.
 			 * This special IP is used for unbound outgoing
 			 * connections as well for "loopback" traffic in case
 			 * source address selection cannot find any more fitting
 			 * address to connect from.
 			 */
 			if (ip4s > 1)
 				qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4);
 			/*
 			 * Check for duplicate addresses and do some simple
 			 * zero and broadcast checks. If users give other bogus
 			 * addresses it is their problem.
 			 *
 			 * We do not have to care about byte order for these
 			 * checks so we will do them in NBO.
 			 */
 			for (ii = 0; ii < ip4s; ii++) {
 				if (ip4[ii].s_addr == INADDR_ANY ||
 				    ip4[ii].s_addr == INADDR_BROADCAST) {
 					error = EINVAL;
 					goto done_free;
 				}
 				if ((ii+1) < ip4s &&
 				    (ip4[0].s_addr == ip4[ii+1].s_addr ||
 				     ip4[ii].s_addr == ip4[ii+1].s_addr)) {
 					error = EINVAL;
 					goto done_free;
 				}
 			}
 		}
 	}
 #endif
 
 #ifdef INET6
 	error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
 	if (error == ENOENT)
 		ip6s = 0;
 	else if (error != 0)
 		goto done_free;
 	else if (ip6s & (sizeof(*ip6) - 1)) {
 		error = EINVAL;
 		goto done_free;
 	} else {
 		ch_flags |= PR_IP6_USER;
 		pr_flags |= PR_IP6_USER;
 		if (ip6s > 0) {
 			ip6s /= sizeof(*ip6);
 			if (ip6s > jail_max_af_ips) {
 				error = EINVAL;
 				vfs_opterror(opts, "too many IPv6 addresses");
 				goto done_errmsg;
 			}
 			ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 			bcopy(op, ip6, ip6s * sizeof(*ip6));
 			if (ip6s > 1)
 				qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6);
 			for (ii = 0; ii < ip6s; ii++) {
 				if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
 					error = EINVAL;
 					goto done_free;
 				}
 				if ((ii+1) < ip6s &&
 				    (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
 				     IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
 				{
 					error = EINVAL;
 					goto done_free;
 				}
 			}
 		}
 	}
 #endif
 
 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 	if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 		error = EINVAL;
 		vfs_opterror(opts,
 		    "vnet jails cannot have IP address restrictions");
 		goto done_errmsg;
 	}
 #endif
 
 	fullpath_disabled = 0;
 	root = NULL;
 	error = vfs_getopt(opts, "path", (void **)&path, &len);
 	if (error == ENOENT)
 		path = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (flags & JAIL_UPDATE) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "path cannot be changed after creation");
 			goto done_errmsg;
 		}
 		if (len == 0 || path[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 		    path, td);
 		error = namei(&nd);
 		if (error)
 			goto done_free;
 		root = nd.ni_vp;
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 		strlcpy(g_path, path, MAXPATHLEN);
 		error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN);
 		if (error == 0)
 			path = g_path;
 		else if (error == ENODEV) {
 			/* proceed if sysctl debug.disablefullpath == 1 */
 			fullpath_disabled = 1;
 			if (len < 2 || (len == 2 && path[0] == '/'))
 				path = NULL;
 		} else {
 			/* exit on other errors */
 			goto done_free;
 		}
 		if (root->v_type != VDIR) {
 			error = ENOTDIR;
 			vput(root);
 			goto done_free;
 		}
 		VOP_UNLOCK(root, 0);
 		if (fullpath_disabled) {
 			/* Leave room for a real-root full pathname. */
 			if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/")
 			    ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) {
 				error = ENAMETOOLONG;
 				goto done_free;
 			}
 		}
 	}
 
 	error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len);
 	if (error == ENOENT)
 		osrelstr = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (flags & JAIL_UPDATE) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "osrelease cannot be changed after creation");
 			goto done_errmsg;
 		}
 		if (len == 0 || len >= OSRELEASELEN) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "osrelease string must be 1-%d bytes long",
 			    OSRELEASELEN - 1);
 			goto done_errmsg;
 		}
 	}
 
 	error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt));
 	if (error == ENOENT)
 		osreldt = 0;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (flags & JAIL_UPDATE) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "osreldate cannot be changed after creation");
 			goto done_errmsg;
 		}
 		if (osreldt == 0) {
 			error = EINVAL;
 			vfs_opterror(opts, "osreldate cannot be 0");
 			goto done_errmsg;
 		}
 	}
 
 	/*
 	 * Grab the allprison lock before letting modules check their
 	 * parameters.  Once we have it, do not let go so we'll have a
 	 * consistent view of the OSD list.
 	 */
 	sx_xlock(&allprison_lock);
 	error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
 	if (error)
 		goto done_unlock_list;
 
 	/* By now, all parameters should have been noted. */
 	TAILQ_FOREACH(opt, opts, link) {
 		if (!opt->seen && strcmp(opt->name, "errmsg")) {
 			error = EINVAL;
 			vfs_opterror(opts, "unknown parameter: %s", opt->name);
 			goto done_unlock_list;
 		}
 	}
 
 	/*
 	 * See if we are creating a new record or updating an existing one.
 	 * This abuses the file error codes ENOENT and EEXIST.
 	 */
 	cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
 	if (!cuflags) {
 		error = EINVAL;
 		vfs_opterror(opts, "no valid operation (create or update)");
 		goto done_unlock_list;
 	}
 	pr = NULL;
 	namelc = NULL;
 	if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
 		namelc = strrchr(name, '.');
 		jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
 		if (*p != '\0')
 			jid = 0;
 	}
 	if (jid != 0) {
 		/*
 		 * See if a requested jid already exists.  There is an
 		 * information leak here if the jid exists but is not within
 		 * the caller's jail hierarchy.  Jail creators will get EEXIST
 		 * even though they cannot see the jail, and CREATE | UPDATE
 		 * will return ENOENT which is not normally a valid error.
 		 */
 		if (jid < 0) {
 			error = EINVAL;
 			vfs_opterror(opts, "negative jid");
 			goto done_unlock_list;
 		}
 		pr = prison_find(jid);
 		if (pr != NULL) {
 			ppr = pr->pr_parent;
 			/* Create: jid must not exist. */
 			if (cuflags == JAIL_CREATE) {
 				mtx_unlock(&pr->pr_mtx);
 				error = EEXIST;
 				vfs_opterror(opts, "jail %d already exists",
 				    jid);
 				goto done_unlock_list;
 			}
 			if (!prison_ischild(mypr, pr)) {
 				mtx_unlock(&pr->pr_mtx);
 				pr = NULL;
 			} else if (pr->pr_uref == 0) {
 				if (!(flags & JAIL_DYING)) {
 					mtx_unlock(&pr->pr_mtx);
 					error = ENOENT;
 					vfs_opterror(opts, "jail %d is dying",
 					    jid);
 					goto done_unlock_list;
 				} else if ((flags & JAIL_ATTACH) ||
 				    (pr_flags & PR_PERSIST)) {
 					/*
 					 * A dying jail might be resurrected
 					 * (via attach or persist), but first
 					 * it must determine if another jail
 					 * has claimed its name.  Accomplish
 					 * this by implicitly re-setting the
 					 * name.
 					 */
 					if (name == NULL)
 						name = prison_name(mypr, pr);
 				}
 			}
 		}
 		if (pr == NULL) {
 			/* Update: jid must exist. */
 			if (cuflags == JAIL_UPDATE) {
 				error = ENOENT;
 				vfs_opterror(opts, "jail %d not found", jid);
 				goto done_unlock_list;
 			}
 		}
 	}
 	/*
 	 * If the caller provided a name, look for a jail by that name.
 	 * This has different semantics for creates and updates keyed by jid
 	 * (where the name must not already exist in a different jail),
 	 * and updates keyed by the name itself (where the name must exist
 	 * because that is the jail being updated).
 	 */
 	if (name != NULL) {
 		namelc = strrchr(name, '.');
 		if (namelc == NULL)
 			namelc = name;
 		else {
 			/*
 			 * This is a hierarchical name.  Split it into the
 			 * parent and child names, and make sure the parent
 			 * exists or matches an already found jail.
 			 */
 			*namelc = '\0';
 			if (pr != NULL) {
 				if (strncmp(name, ppr->pr_name, namelc - name)
 				    || ppr->pr_name[namelc - name] != '\0') {
 					mtx_unlock(&pr->pr_mtx);
 					error = EINVAL;
 					vfs_opterror(opts,
 					    "cannot change jail's parent");
 					goto done_unlock_list;
 				}
 			} else {
 				ppr = prison_find_name(mypr, name);
 				if (ppr == NULL) {
 					error = ENOENT;
 					vfs_opterror(opts,
 					    "jail \"%s\" not found", name);
 					goto done_unlock_list;
 				}
 				mtx_unlock(&ppr->pr_mtx);
 			}
 			name = ++namelc;
 		}
 		if (name[0] != '\0') {
 			namelen =
 			    (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
  name_again:
 			deadpr = NULL;
 			FOREACH_PRISON_CHILD(ppr, tpr) {
 				if (tpr != pr && tpr->pr_ref > 0 &&
 				    !strcmp(tpr->pr_name + namelen, name)) {
 					if (pr == NULL &&
 					    cuflags != JAIL_CREATE) {
 						mtx_lock(&tpr->pr_mtx);
 						if (tpr->pr_ref > 0) {
 							/*
 							 * Use this jail
 							 * for updates.
 							 */
 							if (tpr->pr_uref > 0) {
 								pr = tpr;
 								break;
 							}
 							deadpr = tpr;
 						}
 						mtx_unlock(&tpr->pr_mtx);
 					} else if (tpr->pr_uref > 0) {
 						/*
 						 * Create, or update(jid):
 						 * name must not exist in an
 						 * active sibling jail.
 						 */
 						error = EEXIST;
 						if (pr != NULL)
 							mtx_unlock(&pr->pr_mtx);
 						vfs_opterror(opts,
 						   "jail \"%s\" already exists",
 						   name);
 						goto done_unlock_list;
 					}
 				}
 			}
 			/* If no active jail is found, use a dying one. */
 			if (deadpr != NULL && pr == NULL) {
 				if (flags & JAIL_DYING) {
 					mtx_lock(&deadpr->pr_mtx);
 					if (deadpr->pr_ref == 0) {
 						mtx_unlock(&deadpr->pr_mtx);
 						goto name_again;
 					}
 					pr = deadpr;
 				} else if (cuflags == JAIL_UPDATE) {
 					error = ENOENT;
 					vfs_opterror(opts,
 					    "jail \"%s\" is dying", name);
 					goto done_unlock_list;
 				}
 			}
 			/* Update: name must exist if no jid. */
 			else if (cuflags == JAIL_UPDATE && pr == NULL) {
 				error = ENOENT;
 				vfs_opterror(opts, "jail \"%s\" not found",
 				    name);
 				goto done_unlock_list;
 			}
 		}
 	}
 	/* Update: must provide a jid or name. */
 	else if (cuflags == JAIL_UPDATE && pr == NULL) {
 		error = ENOENT;
 		vfs_opterror(opts, "update specified no jail");
 		goto done_unlock_list;
 	}
 
 	/* If there's no prison to update, create a new one and link it in. */
 	if (pr == NULL) {
 		for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
 			if (tpr->pr_childcount >= tpr->pr_childmax) {
 				error = EPERM;
 				vfs_opterror(opts, "prison limit exceeded");
 				goto done_unlock_list;
 			}
 		created = 1;
 		mtx_lock(&ppr->pr_mtx);
 		if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) {
 			mtx_unlock(&ppr->pr_mtx);
 			error = ENOENT;
 			vfs_opterror(opts, "parent jail went away!");
 			goto done_unlock_list;
 		}
 		ppr->pr_ref++;
 		ppr->pr_uref++;
 		mtx_unlock(&ppr->pr_mtx);
 		pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
 		if (jid == 0) {
 			/* Find the next free jid. */
 			jid = lastprid + 1;
  findnext:
 			if (jid == JAIL_MAX)
 				jid = 1;
 			TAILQ_FOREACH(tpr, &allprison, pr_list) {
 				if (tpr->pr_id < jid)
 					continue;
 				if (tpr->pr_id > jid || tpr->pr_ref == 0) {
 					TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 					break;
 				}
 				if (jid == lastprid) {
 					error = EAGAIN;
 					vfs_opterror(opts,
 					    "no available jail IDs");
 					free(pr, M_PRISON);
 					prison_deref(ppr, PD_DEREF |
 					    PD_DEUREF | PD_LIST_XLOCKED);
 					goto done_releroot;
 				}
 				jid++;
 				goto findnext;
 			}
 			lastprid = jid;
 		} else {
 			/*
 			 * The jail already has a jid (that did not yet exist),
 			 * so just find where to insert it.
 			 */
 			TAILQ_FOREACH(tpr, &allprison, pr_list)
 				if (tpr->pr_id >= jid) {
 					TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 					break;
 				}
 		}
 		if (tpr == NULL)
 			TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
 		LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
 		for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 			tpr->pr_childcount++;
 
 		pr->pr_parent = ppr;
 		pr->pr_id = jid;
 
 		/* Set some default values, and inherit some from the parent. */
 		if (name == NULL)
 			name = "";
 		if (path == NULL) {
 			path = "/";
 			root = mypr->pr_root;
 			vref(root);
 		}
 		strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
 		pr->pr_flags |= PR_HOST;
 #if defined(INET) || defined(INET6)
 #ifdef VIMAGE
 		if (!(pr_flags & PR_VNET))
 #endif
 		{
 #ifdef INET
 			if (!(ch_flags & PR_IP4_USER))
 				pr->pr_flags |= PR_IP4 | PR_IP4_USER;
 			else if (!(pr_flags & PR_IP4_USER)) {
 				pr->pr_flags |= ppr->pr_flags & PR_IP4;
 				if (ppr->pr_ip4 != NULL) {
 					pr->pr_ip4s = ppr->pr_ip4s;
 					pr->pr_ip4 = malloc(pr->pr_ip4s *
 					    sizeof(struct in_addr), M_PRISON,
 					    M_WAITOK);
 					bcopy(ppr->pr_ip4, pr->pr_ip4,
 					    pr->pr_ip4s * sizeof(*pr->pr_ip4));
 				}
 			}
 #endif
 #ifdef INET6
 			if (!(ch_flags & PR_IP6_USER))
 				pr->pr_flags |= PR_IP6 | PR_IP6_USER;
 			else if (!(pr_flags & PR_IP6_USER)) {
 				pr->pr_flags |= ppr->pr_flags & PR_IP6;
 				if (ppr->pr_ip6 != NULL) {
 					pr->pr_ip6s = ppr->pr_ip6s;
 					pr->pr_ip6 = malloc(pr->pr_ip6s *
 					    sizeof(struct in6_addr), M_PRISON,
 					    M_WAITOK);
 					bcopy(ppr->pr_ip6, pr->pr_ip6,
 					    pr->pr_ip6s * sizeof(*pr->pr_ip6));
 				}
 			}
 #endif
 		}
 #endif
 		/* Source address selection is always on by default. */
 		pr->pr_flags |= _PR_IP_SADDRSEL;
 
 		pr->pr_securelevel = ppr->pr_securelevel;
 		pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
 		pr->pr_enforce_statfs = jail_default_enforce_statfs;
 		pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum;
 
 		pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate;
 		if (osrelstr == NULL)
 		    strcpy(pr->pr_osrelease, ppr->pr_osrelease);
 		else
 		    strcpy(pr->pr_osrelease, osrelstr);
 
 		LIST_INIT(&pr->pr_children);
 		mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
 
 #ifdef VIMAGE
 		/* Allocate a new vnet if specified. */
 		pr->pr_vnet = (pr_flags & PR_VNET)
 		    ? vnet_alloc() : ppr->pr_vnet;
 #endif
 		/*
 		 * Allocate a dedicated cpuset for each jail.
 		 * Unlike other initial settings, this may return an erorr.
 		 */
 		error = cpuset_create_root(ppr, &pr->pr_cpuset);
 		if (error) {
 			prison_deref(pr, PD_LIST_XLOCKED);
 			goto done_releroot;
 		}
 
 		mtx_lock(&pr->pr_mtx);
 		/*
 		 * New prisons do not yet have a reference, because we do not
 		 * want other to see the incomplete prison once the
 		 * allprison_lock is downgraded.
 		 */
 	} else {
 		created = 0;
 		/*
 		 * Grab a reference for existing prisons, to ensure they
 		 * continue to exist for the duration of the call.
 		 */
 		pr->pr_ref++;
 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 		if ((pr->pr_flags & PR_VNET) &&
 		    (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "vnet jails cannot have IP address restrictions");
 			goto done_deref_locked;
 		}
 #endif
 #ifdef INET
 		if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "ip4 cannot be changed after creation");
 			goto done_deref_locked;
 		}
 #endif
 #ifdef INET6
 		if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "ip6 cannot be changed after creation");
 			goto done_deref_locked;
 		}
 #endif
 	}
 
 	/* Do final error checking before setting anything. */
 	if (gotslevel) {
 		if (slevel < ppr->pr_securelevel) {
 			error = EPERM;
 			goto done_deref_locked;
 		}
 	}
 	if (gotchildmax) {
 		if (childmax >= ppr->pr_childmax) {
 			error = EPERM;
 			goto done_deref_locked;
 		}
 	}
 	if (gotenforce) {
 		if (enforce < ppr->pr_enforce_statfs) {
 			error = EPERM;
 			goto done_deref_locked;
 		}
 	}
 	if (gotrsnum) {
 		/*
 		 * devfs_rsnum is a uint16_t
 		 */
 		if (rsnum < 0 || rsnum > 65535) {
 			error = EINVAL;
 			goto done_deref_locked;
 		}
 		/*
 		 * Nested jails always inherit parent's devfs ruleset
 		 */
 		if (jailed(td->td_ucred)) {
 			if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) {
 				error = EPERM;
 				goto done_deref_locked;
 			} else
 				rsnum = ppr->pr_devfs_rsnum;
 		}
 	}
 #ifdef INET
 	if (ip4s > 0) {
 		if (ppr->pr_flags & PR_IP4) {
 			/*
 			 * Make sure the new set of IP addresses is a
 			 * subset of the parent's list.  Don't worry
 			 * about the parent being unlocked, as any
 			 * setting is done with allprison_lock held.
 			 */
 			for (ij = 0; ij < ppr->pr_ip4s; ij++)
 				if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 					break;
 			if (ij == ppr->pr_ip4s) {
 				error = EPERM;
 				goto done_deref_locked;
 			}
 			if (ip4s > 1) {
 				for (ii = ij = 1; ii < ip4s; ii++) {
 					if (ip4[ii].s_addr ==
 					    ppr->pr_ip4[0].s_addr)
 						continue;
 					for (; ij < ppr->pr_ip4s; ij++)
 						if (ip4[ii].s_addr ==
 						    ppr->pr_ip4[ij].s_addr)
 							break;
 					if (ij == ppr->pr_ip4s)
 						break;
 				}
 				if (ij == ppr->pr_ip4s) {
 					error = EPERM;
 					goto done_deref_locked;
 				}
 			}
 		}
 		/*
 		 * Check for conflicting IP addresses.  We permit them
 		 * if there is no more than one IP on each jail.  If
 		 * there is a duplicate on a jail with more than one
 		 * IP stop checking and return error.
 		 */
 		tppr = ppr;
 #ifdef VIMAGE
 		for (; tppr != &prison0; tppr = tppr->pr_parent)
 			if (tppr->pr_flags & PR_VNET)
 				break;
 #endif
 		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 			if (tpr == pr ||
 #ifdef VIMAGE
 			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
 			    tpr->pr_uref == 0) {
 				descend = 0;
 				continue;
 			}
 			if (!(tpr->pr_flags & PR_IP4_USER))
 				continue;
 			descend = 0;
 			if (tpr->pr_ip4 == NULL ||
 			    (ip4s == 1 && tpr->pr_ip4s == 1))
 				continue;
 			for (ii = 0; ii < ip4s; ii++) {
 				if (_prison_check_ip4(tpr, &ip4[ii]) == 0) {
 					error = EADDRINUSE;
 					vfs_opterror(opts,
 					    "IPv4 addresses clash");
 					goto done_deref_locked;
 				}
 			}
 		}
 	}
 #endif
 #ifdef INET6
 	if (ip6s > 0) {
 		if (ppr->pr_flags & PR_IP6) {
 			/*
 			 * Make sure the new set of IP addresses is a
 			 * subset of the parent's list.
 			 */
 			for (ij = 0; ij < ppr->pr_ip6s; ij++)
 				if (IN6_ARE_ADDR_EQUAL(&ip6[0],
 				    &ppr->pr_ip6[ij]))
 					break;
 			if (ij == ppr->pr_ip6s) {
 				error = EPERM;
 				goto done_deref_locked;
 			}
 			if (ip6s > 1) {
 				for (ii = ij = 1; ii < ip6s; ii++) {
 					if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
 					     &ppr->pr_ip6[0]))
 						continue;
 					for (; ij < ppr->pr_ip6s; ij++)
 						if (IN6_ARE_ADDR_EQUAL(
 						    &ip6[ii], &ppr->pr_ip6[ij]))
 							break;
 					if (ij == ppr->pr_ip6s)
 						break;
 				}
 				if (ij == ppr->pr_ip6s) {
 					error = EPERM;
 					goto done_deref_locked;
 				}
 			}
 		}
 		/* Check for conflicting IP addresses. */
 		tppr = ppr;
 #ifdef VIMAGE
 		for (; tppr != &prison0; tppr = tppr->pr_parent)
 			if (tppr->pr_flags & PR_VNET)
 				break;
 #endif
 		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 			if (tpr == pr ||
 #ifdef VIMAGE
 			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
 			    tpr->pr_uref == 0) {
 				descend = 0;
 				continue;
 			}
 			if (!(tpr->pr_flags & PR_IP6_USER))
 				continue;
 			descend = 0;
 			if (tpr->pr_ip6 == NULL ||
 			    (ip6s == 1 && tpr->pr_ip6s == 1))
 				continue;
 			for (ii = 0; ii < ip6s; ii++) {
 				if (_prison_check_ip6(tpr, &ip6[ii]) == 0) {
 					error = EADDRINUSE;
 					vfs_opterror(opts,
 					    "IPv6 addresses clash");
 					goto done_deref_locked;
 				}
 			}
 		}
 	}
 #endif
 	onamelen = namelen = 0;
 	if (name != NULL) {
 		/* Give a default name of the jid. */
 		if (name[0] == '\0')
 			snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
 		else if (*namelc == '0' || (strtoul(namelc, &p, 10) != jid &&
 		    *p == '\0')) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "name cannot be numeric (unless it is the jid)");
 			goto done_deref_locked;
 		}
 		/*
 		 * Make sure the name isn't too long for the prison or its
 		 * children.
 		 */
 		onamelen = strlen(pr->pr_name);
 		namelen = strlen(name);
 		if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
 			error = ENAMETOOLONG;
 			goto done_deref_locked;
 		}
 		FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
 			if (strlen(tpr->pr_name) + (namelen - onamelen) >=
 			    sizeof(pr->pr_name)) {
 				error = ENAMETOOLONG;
 				goto done_deref_locked;
 			}
 		}
 	}
 	if (pr_allow & ~ppr->pr_allow) {
 		error = EPERM;
 		goto done_deref_locked;
 	}
 
 	/* Set the parameters of the prison. */
 #ifdef INET
 	redo_ip4 = 0;
 	if (pr_flags & PR_IP4_USER) {
 		pr->pr_flags |= PR_IP4;
 		free(pr->pr_ip4, M_PRISON);
 		pr->pr_ip4s = ip4s;
 		pr->pr_ip4 = ip4;
 		ip4 = NULL;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip4(tpr, NULL)) {
 				redo_ip4 = 1;
 				descend = 0;
 			}
 		}
 	}
 #endif
 #ifdef INET6
 	redo_ip6 = 0;
 	if (pr_flags & PR_IP6_USER) {
 		pr->pr_flags |= PR_IP6;
 		free(pr->pr_ip6, M_PRISON);
 		pr->pr_ip6s = ip6s;
 		pr->pr_ip6 = ip6;
 		ip6 = NULL;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip6(tpr, NULL)) {
 				redo_ip6 = 1;
 				descend = 0;
 			}
 		}
 	}
 #endif
 	if (gotslevel) {
 		pr->pr_securelevel = slevel;
 		/* Set all child jails to be at least this level. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			if (tpr->pr_securelevel < slevel)
 				tpr->pr_securelevel = slevel;
 	}
 	if (gotchildmax) {
 		pr->pr_childmax = childmax;
 		/* Set all child jails to under this limit. */
 		FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
 			if (tpr->pr_childmax > childmax - level)
 				tpr->pr_childmax = childmax > level
 				    ? childmax - level : 0;
 	}
 	if (gotenforce) {
 		pr->pr_enforce_statfs = enforce;
 		/* Pass this restriction on to the children. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			if (tpr->pr_enforce_statfs < enforce)
 				tpr->pr_enforce_statfs = enforce;
 	}
 	if (gotrsnum) {
 		pr->pr_devfs_rsnum = rsnum;
 		/* Pass this restriction on to the children. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			tpr->pr_devfs_rsnum = rsnum;
 	}
 	if (name != NULL) {
 		if (ppr == &prison0)
 			strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
 		else
 			snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
 			    ppr->pr_name, name);
 		/* Change this component of child names. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 			bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
 			    strlen(tpr->pr_name + onamelen) + 1);
 			bcopy(pr->pr_name, tpr->pr_name, namelen);
 		}
 	}
 	if (path != NULL) {
 		/* Try to keep a real-rooted full pathname. */
 		if (fullpath_disabled && path[0] == '/' &&
 		    strcmp(mypr->pr_path, "/"))
 			snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s",
 			    mypr->pr_path, path);
 		else
 			strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
 		pr->pr_root = root;
 	}
 	if (PR_HOST & ch_flags & ~pr_flags) {
 		if (pr->pr_flags & PR_HOST) {
 			/*
 			 * Copy the parent's host info.  As with pr_ip4 above,
 			 * the lack of a lock on the parent is not a problem;
 			 * it is always set with allprison_lock at least
 			 * shared, and is held exclusively here.
 			 */
 			strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
 			    sizeof(pr->pr_hostname));
 			strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
 			    sizeof(pr->pr_domainname));
 			strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
 			    sizeof(pr->pr_hostuuid));
 			pr->pr_hostid = pr->pr_parent->pr_hostid;
 		}
 	} else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
 		/* Set this prison, and any descendants without PR_HOST. */
 		if (host != NULL)
 			strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
 		if (domain != NULL)
 			strlcpy(pr->pr_domainname, domain, 
 			    sizeof(pr->pr_domainname));
 		if (uuid != NULL)
 			strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
 		if (gothid)
 			pr->pr_hostid = hid;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 			if (tpr->pr_flags & PR_HOST)
 				descend = 0;
 			else {
 				if (host != NULL)
 					strlcpy(tpr->pr_hostname,
 					    pr->pr_hostname,
 					    sizeof(tpr->pr_hostname));
 				if (domain != NULL)
 					strlcpy(tpr->pr_domainname, 
 					    pr->pr_domainname,
 					    sizeof(tpr->pr_domainname));
 				if (uuid != NULL)
 					strlcpy(tpr->pr_hostuuid,
 					    pr->pr_hostuuid,
 					    sizeof(tpr->pr_hostuuid));
 				if (gothid)
 					tpr->pr_hostid = hid;
 			}
 		}
 	}
 	if ((tallow = ch_allow & ~pr_allow)) {
 		/* Clear allow bits in all children. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			tpr->pr_allow &= ~tallow;
 	}
 	pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
 	/*
 	 * Persistent prisons get an extra reference, and prisons losing their
 	 * persist flag lose that reference.  Only do this for existing prisons
 	 * for now, so new ones will remain unseen until after the module
 	 * handlers have completed.
 	 */
 	if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
 		if (pr_flags & PR_PERSIST) {
 			pr->pr_ref++;
 			pr->pr_uref++;
 		} else {
 			pr->pr_ref--;
 			pr->pr_uref--;
 		}
 	}
 	pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
 	mtx_unlock(&pr->pr_mtx);
 
 #ifdef RACCT
 	if (racct_enable && created)
 		prison_racct_attach(pr);
 #endif
 
 	/* Locks may have prevented a complete restriction of child IP
 	 * addresses.  If so, allocate some more memory and try again.
 	 */
 #ifdef INET
 	while (redo_ip4) {
 		ip4s = pr->pr_ip4s;
 		ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 		mtx_lock(&pr->pr_mtx);
 		redo_ip4 = 0;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip4(tpr, ip4)) {
 				if (ip4 != NULL)
 					ip4 = NULL;
 				else
 					redo_ip4 = 1;
 			}
 		}
 		mtx_unlock(&pr->pr_mtx);
 	}
 #endif
 #ifdef INET6
 	while (redo_ip6) {
 		ip6s = pr->pr_ip6s;
 		ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 		mtx_lock(&pr->pr_mtx);
 		redo_ip6 = 0;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip6(tpr, ip6)) {
 				if (ip6 != NULL)
 					ip6 = NULL;
 				else
 					redo_ip6 = 1;
 			}
 		}
 		mtx_unlock(&pr->pr_mtx);
 	}
 #endif
 
 	/* Let the modules do their work. */
 	sx_downgrade(&allprison_lock);
 	if (created) {
 		error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
 		if (error) {
 			prison_deref(pr, PD_LIST_SLOCKED);
 			goto done_errmsg;
 		}
 	}
 	error = osd_jail_call(pr, PR_METHOD_SET, opts);
 	if (error) {
 		prison_deref(pr, created
 		    ? PD_LIST_SLOCKED
 		    : PD_DEREF | PD_LIST_SLOCKED);
 		goto done_errmsg;
 	}
 
 	/* Attach this process to the prison if requested. */
 	if (flags & JAIL_ATTACH) {
 		mtx_lock(&pr->pr_mtx);
 		error = do_jail_attach(td, pr);
 		if (error) {
 			vfs_opterror(opts, "attach failed");
 			if (!created)
 				prison_deref(pr, PD_DEREF);
 			goto done_errmsg;
 		}
 	}
 
 #ifdef RACCT
 	if (racct_enable && !created) {
 		if (!(flags & JAIL_ATTACH))
 			sx_sunlock(&allprison_lock);
 		prison_racct_modify(pr);
 		if (!(flags & JAIL_ATTACH))
 			sx_slock(&allprison_lock);
 	}
 #endif
 
 	td->td_retval[0] = pr->pr_id;
 
 	/*
 	 * Now that it is all there, drop the temporary reference from existing
 	 * prisons.  Or add a reference to newly created persistent prisons
 	 * (which was not done earlier so that the prison would not be publicly
 	 * visible).
 	 */
 	if (!created) {
 		prison_deref(pr, (flags & JAIL_ATTACH)
 		    ? PD_DEREF
 		    : PD_DEREF | PD_LIST_SLOCKED);
 	} else {
 		if (pr_flags & PR_PERSIST) {
 			mtx_lock(&pr->pr_mtx);
 			pr->pr_ref++;
 			pr->pr_uref++;
 			mtx_unlock(&pr->pr_mtx);
 		}
 		if (!(flags & JAIL_ATTACH))
 			sx_sunlock(&allprison_lock);
 	}
 
 	goto done_errmsg;
 
  done_deref_locked:
 	prison_deref(pr, created
 	    ? PD_LOCKED | PD_LIST_XLOCKED
 	    : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 	goto done_releroot;
  done_unlock_list:
 	sx_xunlock(&allprison_lock);
  done_releroot:
 	if (root != NULL)
 		vrele(root);
  done_errmsg:
 	if (error) {
 		vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 		if (errmsg_len > 0) {
 			errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
 			if (errmsg_pos > 0) {
 				if (optuio->uio_segflg == UIO_SYSSPACE)
 					bcopy(errmsg,
 					   optuio->uio_iov[errmsg_pos].iov_base,
 					   errmsg_len);
 				else
 					copyout(errmsg,
 					   optuio->uio_iov[errmsg_pos].iov_base,
 					   errmsg_len);
 			}
 		}
 	}
  done_free:
 #ifdef INET
 	free(ip4, M_PRISON);
 #endif
 #ifdef INET6
 	free(ip6, M_PRISON);
 #endif
 	if (g_path != NULL)
 		free(g_path, M_TEMP);
 	vfs_freeopts(opts);
 	return (error);
 }
 
 
 /*
  * struct jail_get_args {
  *	struct iovec *iovp;
  *	unsigned int iovcnt;
  *	int flags;
  * };
  */
 int
 sys_jail_get(struct thread *td, struct jail_get_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_get(td, auio, uap->flags);
 	if (error == 0)
 		error = copyout(auio->uio_iov, uap->iovp,
 		    uap->iovcnt * sizeof (struct iovec));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_jail_get(struct thread *td, struct uio *optuio, int flags)
 {
 	struct prison *pr, *mypr;
 	struct vfsopt *opt;
 	struct vfsoptlist *opts;
 	char *errmsg, *name;
 	int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos;
 
 	if (flags & ~JAIL_GET_MASK)
 		return (EINVAL);
 
 	/* Get the parameter list. */
 	error = vfs_buildopts(optuio, &opts);
 	if (error)
 		return (error);
 	errmsg_pos = vfs_getopt_pos(opts, "errmsg");
 	mypr = td->td_ucred->cr_prison;
 
 	/*
 	 * Find the prison specified by one of: lastjid, jid, name.
 	 */
 	sx_slock(&allprison_lock);
 	error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
 	if (error == 0) {
 		TAILQ_FOREACH(pr, &allprison, pr_list) {
 			if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
 				mtx_lock(&pr->pr_mtx);
 				if (pr->pr_ref > 0 &&
 				    (pr->pr_uref > 0 || (flags & JAIL_DYING)))
 					break;
 				mtx_unlock(&pr->pr_mtx);
 			}
 		}
 		if (pr != NULL)
 			goto found_prison;
 		error = ENOENT;
 		vfs_opterror(opts, "no jail after %d", jid);
 		goto done_unlock_list;
 	} else if (error != ENOENT)
 		goto done_unlock_list;
 
 	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 	if (error == 0) {
 		if (jid != 0) {
 			pr = prison_find_child(mypr, jid);
 			if (pr != NULL) {
 				if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 					mtx_unlock(&pr->pr_mtx);
 					error = ENOENT;
 					vfs_opterror(opts, "jail %d is dying",
 					    jid);
 					goto done_unlock_list;
 				}
 				goto found_prison;
 			}
 			error = ENOENT;
 			vfs_opterror(opts, "jail %d not found", jid);
 			goto done_unlock_list;
 		}
 	} else if (error != ENOENT)
 		goto done_unlock_list;
 
 	error = vfs_getopt(opts, "name", (void **)&name, &len);
 	if (error == 0) {
 		if (len == 0 || name[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_unlock_list;
 		}
 		pr = prison_find_name(mypr, name);
 		if (pr != NULL) {
 			if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 				mtx_unlock(&pr->pr_mtx);
 				error = ENOENT;
 				vfs_opterror(opts, "jail \"%s\" is dying",
 				    name);
 				goto done_unlock_list;
 			}
 			goto found_prison;
 		}
 		error = ENOENT;
 		vfs_opterror(opts, "jail \"%s\" not found", name);
 		goto done_unlock_list;
 	} else if (error != ENOENT)
 		goto done_unlock_list;
 
 	vfs_opterror(opts, "no jail specified");
 	error = ENOENT;
 	goto done_unlock_list;
 
  found_prison:
 	/* Get the parameters of the prison. */
 	pr->pr_ref++;
 	locked = PD_LOCKED;
 	td->td_retval[0] = pr->pr_id;
 	error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
 	error = vfs_setopt(opts, "parent", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopts(opts, "name", prison_name(mypr, pr));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
 	    sizeof(pr->pr_cpuset->cs_id));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopts(opts, "path", prison_path(mypr, pr));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 #ifdef INET
 	error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
 	    pr->pr_ip4s * sizeof(*pr->pr_ip4));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 #endif
 #ifdef INET6
 	error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
 	    pr->pr_ip6s * sizeof(*pr->pr_ip6));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 #endif
 	error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
 	    sizeof(pr->pr_securelevel));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
 	    sizeof(pr->pr_childcount));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
 	    sizeof(pr->pr_childmax));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		uint32_t hid32 = pr->pr_hostid;
 
 		error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
 	} else
 #endif
 	error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
 	    sizeof(pr->pr_hostid));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
 	    sizeof(pr->pr_enforce_statfs));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum,
 	    sizeof(pr->pr_devfs_rsnum));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
 	    fi++) {
 		if (pr_flag_names[fi] == NULL)
 			continue;
 		i = (pr->pr_flags & (1 << fi)) ? 1 : 0;
 		error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done_deref;
 		i = !i;
 		error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done_deref;
 	}
 	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
 	    fi++) {
 		i = pr->pr_flags &
 		    (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 		i = pr_flag_jailsys[fi].disable &&
 		      (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE
 		    : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW
 		    : JAIL_SYS_INHERIT;
 		error =
 		    vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done_deref;
 	}
 	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
 	    fi++) {
 		if (pr_allow_names[fi] == NULL)
 			continue;
 		i = (pr->pr_allow & (1 << fi)) ? 1 : 0;
 		error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done_deref;
 		i = !i;
 		error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done_deref;
 	}
 	i = (pr->pr_uref == 0);
 	error = vfs_setopt(opts, "dying", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	i = !i;
 	error = vfs_setopt(opts, "nodying", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate,
 	    sizeof(pr->pr_osreldate));
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 	error = vfs_setopts(opts, "osrelease", pr->pr_osrelease);
 	if (error != 0 && error != ENOENT)
 		goto done_deref;
 
 	/* Get the module parameters. */
 	mtx_unlock(&pr->pr_mtx);
 	locked = 0;
 	error = osd_jail_call(pr, PR_METHOD_GET, opts);
 	if (error)
 		goto done_deref;
 	prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED);
 
 	/* By now, all parameters should have been noted. */
 	TAILQ_FOREACH(opt, opts, link) {
 		if (!opt->seen && strcmp(opt->name, "errmsg")) {
 			error = EINVAL;
 			vfs_opterror(opts, "unknown parameter: %s", opt->name);
 			goto done_errmsg;
 		}
 	}
 
 	/* Write the fetched parameters back to userspace. */
 	error = 0;
 	TAILQ_FOREACH(opt, opts, link) {
 		if (opt->pos >= 0 && opt->pos != errmsg_pos) {
 			pos = 2 * opt->pos + 1;
 			optuio->uio_iov[pos].iov_len = opt->len;
 			if (opt->value != NULL) {
 				if (optuio->uio_segflg == UIO_SYSSPACE) {
 					bcopy(opt->value,
 					    optuio->uio_iov[pos].iov_base,
 					    opt->len);
 				} else {
 					error = copyout(opt->value,
 					    optuio->uio_iov[pos].iov_base,
 					    opt->len);
 					if (error)
 						break;
 				}
 			}
 		}
 	}
 	goto done_errmsg;
 
  done_deref:
 	prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED);
 	goto done_errmsg;
 
  done_unlock_list:
 	sx_sunlock(&allprison_lock);
  done_errmsg:
 	if (error && errmsg_pos >= 0) {
 		vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 		errmsg_pos = 2 * errmsg_pos + 1;
 		if (errmsg_len > 0) {
 			if (optuio->uio_segflg == UIO_SYSSPACE)
 				bcopy(errmsg,
 				    optuio->uio_iov[errmsg_pos].iov_base,
 				    errmsg_len);
 			else
 				copyout(errmsg,
 				    optuio->uio_iov[errmsg_pos].iov_base,
 				    errmsg_len);
 		}
 	}
 	vfs_freeopts(opts);
 	return (error);
 }
 
 
 /*
  * struct jail_remove_args {
  *	int jid;
  * };
  */
 int
 sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 {
 	struct prison *pr, *cpr, *lpr, *tpr;
 	int descend, error;
 
 	error = priv_check(td, PRIV_JAIL_REMOVE);
 	if (error)
 		return (error);
 
 	sx_xlock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 	if (pr == NULL) {
 		sx_xunlock(&allprison_lock);
 		return (EINVAL);
 	}
 
 	/* Remove all descendants of this prison, then remove this prison. */
 	pr->pr_ref++;
 	pr->pr_flags |= PR_REMOVE;
 	if (!LIST_EMPTY(&pr->pr_children)) {
 		mtx_unlock(&pr->pr_mtx);
 		lpr = NULL;
 		FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 			mtx_lock(&cpr->pr_mtx);
 			if (cpr->pr_ref > 0) {
 				tpr = cpr;
 				cpr->pr_ref++;
 				cpr->pr_flags |= PR_REMOVE;
 			} else {
 				/* Already removed - do not do it again. */
 				tpr = NULL;
 			}
 			mtx_unlock(&cpr->pr_mtx);
 			if (lpr != NULL) {
 				mtx_lock(&lpr->pr_mtx);
 				prison_remove_one(lpr);
 				sx_xlock(&allprison_lock);
 			}
 			lpr = tpr;
 		}
 		if (lpr != NULL) {
 			mtx_lock(&lpr->pr_mtx);
 			prison_remove_one(lpr);
 			sx_xlock(&allprison_lock);
 		}
 		mtx_lock(&pr->pr_mtx);
 	}
 	prison_remove_one(pr);
 	return (0);
 }
 
 static void
 prison_remove_one(struct prison *pr)
 {
 	struct proc *p;
 	int deuref;
 
 	/* If the prison was persistent, it is not anymore. */
 	deuref = 0;
 	if (pr->pr_flags & PR_PERSIST) {
 		pr->pr_ref--;
 		deuref = PD_DEUREF;
 		pr->pr_flags &= ~PR_PERSIST;
 	}
 
 	/*
 	 * jail_remove added a reference.  If that's the only one, remove
 	 * the prison now.
 	 */
 	KASSERT(pr->pr_ref > 0,
 	    ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
 	if (pr->pr_ref == 1) {
 		prison_deref(pr,
 		    deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 		return;
 	}
 
 	mtx_unlock(&pr->pr_mtx);
 	sx_xunlock(&allprison_lock);
 	/*
 	 * Kill all processes unfortunate enough to be attached to this prison.
 	 */
 	sx_slock(&allproc_lock);
 	LIST_FOREACH(p, &allproc, p_list) {
 		PROC_LOCK(p);
 		if (p->p_state != PRS_NEW && p->p_ucred &&
 		    p->p_ucred->cr_prison == pr)
 			kern_psignal(p, SIGKILL);
 		PROC_UNLOCK(p);
 	}
 	sx_sunlock(&allproc_lock);
 	/* Remove the temporary reference added by jail_remove. */
 	prison_deref(pr, deuref | PD_DEREF);
 }
 
 
 /*
  * struct jail_attach_args {
  *	int jid;
  * };
  */
 int
 sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
 {
 	struct prison *pr;
 	int error;
 
 	error = priv_check(td, PRIV_JAIL_ATTACH);
 	if (error)
 		return (error);
 
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 	if (pr == NULL) {
 		sx_sunlock(&allprison_lock);
 		return (EINVAL);
 	}
 
 	/*
 	 * Do not allow a process to attach to a prison that is not
 	 * considered to be "alive".
 	 */
 	if (pr->pr_uref == 0) {
 		mtx_unlock(&pr->pr_mtx);
 		sx_sunlock(&allprison_lock);
 		return (EINVAL);
 	}
 
 	return (do_jail_attach(td, pr));
 }
 
 static int
 do_jail_attach(struct thread *td, struct prison *pr)
 {
 	struct prison *ppr;
 	struct proc *p;
 	struct ucred *newcred, *oldcred;
 	int error;
 
 	/*
 	 * XXX: Note that there is a slight race here if two threads
 	 * in the same privileged process attempt to attach to two
 	 * different jails at the same time.  It is important for
 	 * user processes not to do this, or they might end up with
 	 * a process root from one prison, but attached to the jail
 	 * of another.
 	 */
 	pr->pr_ref++;
 	pr->pr_uref++;
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Let modules do whatever they need to prepare for attaching. */
 	error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
 	if (error) {
 		prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
 		return (error);
 	}
 	sx_sunlock(&allprison_lock);
 
 	/*
 	 * Reparent the newly attached process to this jail.
 	 */
 	ppr = td->td_ucred->cr_prison;
 	p = td->td_proc;
 	error = cpuset_setproc_update_set(p, pr->pr_cpuset);
 	if (error)
 		goto e_revert_osd;
 
 	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
 	if ((error = change_dir(pr->pr_root, td)) != 0)
 		goto e_unlock;
 #ifdef MAC
 	if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
 		goto e_unlock;
 #endif
 	VOP_UNLOCK(pr->pr_root, 0);
 	if ((error = pwd_chroot(td, pr->pr_root)))
 		goto e_revert_osd;
 
 	newcred = crget();
 	PROC_LOCK(p);
 	oldcred = p->p_ucred;
 	setsugid(p);
 	crcopy(newcred, oldcred);
 	newcred->cr_prison = pr;
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
 #endif
 	crfree(oldcred);
 	prison_deref(ppr, PD_DEREF | PD_DEUREF);
 	return (0);
  e_unlock:
 	VOP_UNLOCK(pr->pr_root, 0);
  e_revert_osd:
 	/* Tell modules this thread is still in its old jail after all. */
 	(void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
 	prison_deref(pr, PD_DEREF | PD_DEUREF);
 	return (error);
 }
 
 
 /*
  * Returns a locked prison instance, or NULL on failure.
  */
 struct prison *
 prison_find(int prid)
 {
 	struct prison *pr;
 
 	sx_assert(&allprison_lock, SX_LOCKED);
 	TAILQ_FOREACH(pr, &allprison, pr_list) {
 		if (pr->pr_id == prid) {
 			mtx_lock(&pr->pr_mtx);
 			if (pr->pr_ref > 0)
 				return (pr);
 			mtx_unlock(&pr->pr_mtx);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
  */
 struct prison *
 prison_find_child(struct prison *mypr, int prid)
 {
 	struct prison *pr;
 	int descend;
 
 	sx_assert(&allprison_lock, SX_LOCKED);
 	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 		if (pr->pr_id == prid) {
 			mtx_lock(&pr->pr_mtx);
 			if (pr->pr_ref > 0)
 				return (pr);
 			mtx_unlock(&pr->pr_mtx);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Look for the name relative to mypr.  Returns a locked prison or NULL.
  */
 struct prison *
 prison_find_name(struct prison *mypr, const char *name)
 {
 	struct prison *pr, *deadpr;
 	size_t mylen;
 	int descend;
 
 	sx_assert(&allprison_lock, SX_LOCKED);
 	mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
  again:
 	deadpr = NULL;
 	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 		if (!strcmp(pr->pr_name + mylen, name)) {
 			mtx_lock(&pr->pr_mtx);
 			if (pr->pr_ref > 0) {
 				if (pr->pr_uref > 0)
 					return (pr);
 				deadpr = pr;
 			}
 			mtx_unlock(&pr->pr_mtx);
 		}
 	}
 	/* There was no valid prison - perhaps there was a dying one. */
 	if (deadpr != NULL) {
 		mtx_lock(&deadpr->pr_mtx);
 		if (deadpr->pr_ref == 0) {
 			mtx_unlock(&deadpr->pr_mtx);
 			goto again;
 		}
 	}
 	return (deadpr);
 }
 
 /*
  * See if a prison has the specific flag set.
  */
 int
 prison_flag(struct ucred *cred, unsigned flag)
 {
 
 	/* This is an atomic read, so no locking is necessary. */
 	return (cred->cr_prison->pr_flags & flag);
 }
 
 int
 prison_allow(struct ucred *cred, unsigned flag)
 {
 
 	/* This is an atomic read, so no locking is necessary. */
 	return (cred->cr_prison->pr_allow & flag);
 }
 
 /*
  * Remove a prison reference.  If that was the last reference, remove the
  * prison itself - but not in this context in case there are locks held.
  */
 void
 prison_free_locked(struct prison *pr)
 {
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
 	pr->pr_ref--;
 	if (pr->pr_ref == 0) {
 		mtx_unlock(&pr->pr_mtx);
 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 		return;
 	}
 	mtx_unlock(&pr->pr_mtx);
 }
 
 void
 prison_free(struct prison *pr)
 {
 
 	mtx_lock(&pr->pr_mtx);
 	prison_free_locked(pr);
 }
 
 static void
 prison_complete(void *context, int pending)
 {
 
 	prison_deref((struct prison *)context, 0);
 }
 
 /*
  * Remove a prison reference (usually).  This internal version assumes no
  * mutexes are held, except perhaps the prison itself.  If there are no more
  * references, release and delist the prison.  On completion, the prison lock
  * and the allprison lock are both unlocked.
  */
 static void
 prison_deref(struct prison *pr, int flags)
 {
 	struct prison *ppr, *tpr;
 
 	if (!(flags & PD_LOCKED))
 		mtx_lock(&pr->pr_mtx);
 	for (;;) {
 		if (flags & PD_DEUREF) {
 			pr->pr_uref--;
 			KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
 		}
 		if (flags & PD_DEREF)
 			pr->pr_ref--;
 		/* If the prison still has references, nothing else to do. */
 		if (pr->pr_ref > 0) {
 			mtx_unlock(&pr->pr_mtx);
 			if (flags & PD_LIST_SLOCKED)
 				sx_sunlock(&allprison_lock);
 			else if (flags & PD_LIST_XLOCKED)
 				sx_xunlock(&allprison_lock);
 			return;
 		}
 
 		mtx_unlock(&pr->pr_mtx);
 		if (flags & PD_LIST_SLOCKED) {
 			if (!sx_try_upgrade(&allprison_lock)) {
 				sx_sunlock(&allprison_lock);
 				sx_xlock(&allprison_lock);
 			}
 		} else if (!(flags & PD_LIST_XLOCKED))
 			sx_xlock(&allprison_lock);
 
 		TAILQ_REMOVE(&allprison, pr, pr_list);
 		LIST_REMOVE(pr, pr_sibling);
 		ppr = pr->pr_parent;
 		for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 			tpr->pr_childcount--;
 		sx_xunlock(&allprison_lock);
 
 #ifdef VIMAGE
 		if (pr->pr_vnet != ppr->pr_vnet)
 			vnet_destroy(pr->pr_vnet);
 #endif
 		if (pr->pr_root != NULL)
 			vrele(pr->pr_root);
 		mtx_destroy(&pr->pr_mtx);
 #ifdef INET
 		free(pr->pr_ip4, M_PRISON);
 #endif
 #ifdef INET6
 		free(pr->pr_ip6, M_PRISON);
 #endif
 		if (pr->pr_cpuset != NULL)
 			cpuset_rel(pr->pr_cpuset);
 		osd_jail_exit(pr);
 #ifdef RACCT
 		if (racct_enable)
 			prison_racct_detach(pr);
 #endif
 		free(pr, M_PRISON);
 
 		/* Removing a prison frees a reference on its parent. */
 		pr = ppr;
 		mtx_lock(&pr->pr_mtx);
 		flags = PD_DEREF | PD_DEUREF;
 	}
 }
 
 void
 prison_hold_locked(struct prison *pr)
 {
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
 	KASSERT(pr->pr_ref > 0,
 	    ("Trying to hold dead prison (jid=%d).", pr->pr_id));
 	pr->pr_ref++;
 }
 
 void
 prison_hold(struct prison *pr)
 {
 
 	mtx_lock(&pr->pr_mtx);
 	prison_hold_locked(pr);
 	mtx_unlock(&pr->pr_mtx);
 }
 
 void
 prison_proc_hold(struct prison *pr)
 {
 
 	mtx_lock(&pr->pr_mtx);
 	KASSERT(pr->pr_uref > 0,
 	    ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
 	pr->pr_uref++;
 	mtx_unlock(&pr->pr_mtx);
 }
 
 void
 prison_proc_free(struct prison *pr)
 {
 
 	mtx_lock(&pr->pr_mtx);
 	KASSERT(pr->pr_uref > 0,
 	    ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
 	prison_deref(pr, PD_DEUREF | PD_LOCKED);
 }
 
 
 #ifdef INET
 /*
  * Restrict a prison's IP address list with its parent's, possibly replacing
  * it.  Return true if the replacement buffer was used (or would have been).
  */
 static int
 prison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
 {
 	int ii, ij, used;
 	struct prison *ppr;
 
 	ppr = pr->pr_parent;
 	if (!(pr->pr_flags & PR_IP4_USER)) {
 		/* This has no user settings, so just copy the parent's list. */
 		if (pr->pr_ip4s < ppr->pr_ip4s) {
 			/*
 			 * There's no room for the parent's list.  Use the
 			 * new list buffer, which is assumed to be big enough
 			 * (if it was passed).  If there's no buffer, try to
 			 * allocate one.
 			 */
 			used = 1;
 			if (newip4 == NULL) {
 				newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
 				    M_PRISON, M_NOWAIT);
 				if (newip4 != NULL)
 					used = 0;
 			}
 			if (newip4 != NULL) {
 				bcopy(ppr->pr_ip4, newip4,
 				    ppr->pr_ip4s * sizeof(*newip4));
 				free(pr->pr_ip4, M_PRISON);
 				pr->pr_ip4 = newip4;
 				pr->pr_ip4s = ppr->pr_ip4s;
 			}
 			return (used);
 		}
 		pr->pr_ip4s = ppr->pr_ip4s;
 		if (pr->pr_ip4s > 0)
 			bcopy(ppr->pr_ip4, pr->pr_ip4,
 			    pr->pr_ip4s * sizeof(*newip4));
 		else if (pr->pr_ip4 != NULL) {
 			free(pr->pr_ip4, M_PRISON);
 			pr->pr_ip4 = NULL;
 		}
 	} else if (pr->pr_ip4s > 0) {
 		/* Remove addresses that aren't in the parent. */
 		for (ij = 0; ij < ppr->pr_ip4s; ij++)
 			if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 				break;
 		if (ij < ppr->pr_ip4s)
 			ii = 1;
 		else {
 			bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
 			    --pr->pr_ip4s * sizeof(*pr->pr_ip4));
 			ii = 0;
 		}
 		for (ij = 1; ii < pr->pr_ip4s; ) {
 			if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
 				ii++;
 				continue;
 			}
 			switch (ij >= ppr->pr_ip4s ? -1 :
 				qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
 			case -1:
 				bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
 				    (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
 				break;
 			case 0:
 				ii++;
 				ij++;
 				break;
 			case 1:
 				ij++;
 				break;
 			}
 		}
 		if (pr->pr_ip4s == 0) {
 			free(pr->pr_ip4, M_PRISON);
 			pr->pr_ip4 = NULL;
 		}
 	}
 	return (0);
 }
 
 /*
  * Pass back primary IPv4 address of this jail.
  *
  * If not restricted return success but do not alter the address.  Caller has
  * to make sure to initialize it correctly (e.g. INADDR_ANY).
  *
  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
  * Address returned in NBO.
  */
 int
 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
 {
 	struct prison *pr;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP4))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP4)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip4 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	ia->s_addr = pr->pr_ip4[0].s_addr;
 	mtx_unlock(&pr->pr_mtx);
 	return (0);
 }
 
 /*
  * Return 1 if we should do proper source address selection or are not jailed.
  * We will return 0 if we should bypass source address selection in favour
  * of the primary jail IPv4 address. Only in this case *ia will be updated and
  * returned in NBO.
  * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
  */
 int
 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
 {
 	struct prison *pr;
 	struct in_addr lia;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 
 	if (!jailed(cred))
 		return (1);
 
 	pr = cred->cr_prison;
 	if (pr->pr_flags & PR_IP4_SADDRSEL)
 		return (1);
 
 	lia.s_addr = INADDR_ANY;
 	error = prison_get_ip4(cred, &lia);
 	if (error)
 		return (error);
 	if (lia.s_addr == INADDR_ANY)
 		return (1);
 
 	ia->s_addr = lia.s_addr;
 	return (0);
 }
 
 /*
  * Return true if pr1 and pr2 have the same IPv4 address restrictions.
  */
 int
 prison_equal_ip4(struct prison *pr1, struct prison *pr2)
 {
 
 	if (pr1 == pr2)
 		return (1);
 
 	/*
 	 * No need to lock since the PR_IP4_USER flag can't be altered for
 	 * existing prisons.
 	 */
 	while (pr1 != &prison0 &&
 #ifdef VIMAGE
 	       !(pr1->pr_flags & PR_VNET) &&
 #endif
 	       !(pr1->pr_flags & PR_IP4_USER))
 		pr1 = pr1->pr_parent;
 	while (pr2 != &prison0 &&
 #ifdef VIMAGE
 	       !(pr2->pr_flags & PR_VNET) &&
 #endif
 	       !(pr2->pr_flags & PR_IP4_USER))
 		pr2 = pr2->pr_parent;
 	return (pr1 == pr2);
 }
 
 /*
  * Make sure our (source) address is set to something meaningful to this
  * jail.
  *
  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
  * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
  */
 int
 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
 {
 	struct prison *pr;
 	struct in_addr ia0;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP4))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP4)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip4 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	ia0.s_addr = ntohl(ia->s_addr);
 	if (ia0.s_addr == INADDR_LOOPBACK) {
 		ia->s_addr = pr->pr_ip4[0].s_addr;
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 
 	if (ia0.s_addr == INADDR_ANY) {
 		/*
 		 * In case there is only 1 IPv4 address, bind directly.
 		 */
 		if (pr->pr_ip4s == 1)
 			ia->s_addr = pr->pr_ip4[0].s_addr;
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 
 	error = _prison_check_ip4(pr, ia);
 	mtx_unlock(&pr->pr_mtx);
 	return (error);
 }
 
 /*
  * Rewrite destination address in case we will connect to loopback address.
  *
  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
  * Address passed in in NBO and returned in NBO.
  */
 int
 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
 {
 	struct prison *pr;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP4))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP4)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip4 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
 		ia->s_addr = pr->pr_ip4[0].s_addr;
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 
 	/*
 	 * Return success because nothing had to be changed.
 	 */
 	mtx_unlock(&pr->pr_mtx);
 	return (0);
 }
 
 /*
  * Check if given address belongs to the jail referenced by cred/prison.
  *
  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
  * doesn't allow IPv4.  Address passed in in NBO.
  */
 static int
 _prison_check_ip4(const struct prison *pr, const struct in_addr *ia)
 {
 	int i, a, z, d;
 
 	/*
 	 * Check the primary IP.
 	 */
 	if (pr->pr_ip4[0].s_addr == ia->s_addr)
 		return (0);
 
 	/*
 	 * All the other IPs are sorted so we can do a binary search.
 	 */
 	a = 0;
 	z = pr->pr_ip4s - 2;
 	while (a <= z) {
 		i = (a + z) / 2;
 		d = qcmp_v4(&pr->pr_ip4[i+1], ia);
 		if (d > 0)
 			z = i - 1;
 		else if (d < 0)
 			a = i + 1;
 		else
 			return (0);
 	}
 
 	return (EADDRNOTAVAIL);
 }
 
 int
 prison_check_ip4(const struct ucred *cred, const struct in_addr *ia)
 {
 	struct prison *pr;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP4))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP4)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip4 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	error = _prison_check_ip4(pr, ia);
 	mtx_unlock(&pr->pr_mtx);
 	return (error);
 }
 #endif
 
 #ifdef INET6
 static int
 prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6)
 {
 	int ii, ij, used;
 	struct prison *ppr;
 
 	ppr = pr->pr_parent;
 	if (!(pr->pr_flags & PR_IP6_USER)) {
 		/* This has no user settings, so just copy the parent's list. */
 		if (pr->pr_ip6s < ppr->pr_ip6s) {
 			/*
 			 * There's no room for the parent's list.  Use the
 			 * new list buffer, which is assumed to be big enough
 			 * (if it was passed).  If there's no buffer, try to
 			 * allocate one.
 			 */
 			used = 1;
 			if (newip6 == NULL) {
 				newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6),
 				    M_PRISON, M_NOWAIT);
 				if (newip6 != NULL)
 					used = 0;
 			}
 			if (newip6 != NULL) {
 				bcopy(ppr->pr_ip6, newip6,
 				    ppr->pr_ip6s * sizeof(*newip6));
 				free(pr->pr_ip6, M_PRISON);
 				pr->pr_ip6 = newip6;
 				pr->pr_ip6s = ppr->pr_ip6s;
 			}
 			return (used);
 		}
 		pr->pr_ip6s = ppr->pr_ip6s;
 		if (pr->pr_ip6s > 0)
 			bcopy(ppr->pr_ip6, pr->pr_ip6,
 			    pr->pr_ip6s * sizeof(*newip6));
 		else if (pr->pr_ip6 != NULL) {
 			free(pr->pr_ip6, M_PRISON);
 			pr->pr_ip6 = NULL;
 		}
 	} else if (pr->pr_ip6s > 0) {
 		/* Remove addresses that aren't in the parent. */
 		for (ij = 0; ij < ppr->pr_ip6s; ij++)
 			if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0],
 			    &ppr->pr_ip6[ij]))
 				break;
 		if (ij < ppr->pr_ip6s)
 			ii = 1;
 		else {
 			bcopy(pr->pr_ip6 + 1, pr->pr_ip6,
 			    --pr->pr_ip6s * sizeof(*pr->pr_ip6));
 			ii = 0;
 		}
 		for (ij = 1; ii < pr->pr_ip6s; ) {
 			if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii],
 			    &ppr->pr_ip6[0])) {
 				ii++;
 				continue;
 			}
 			switch (ij >= ppr->pr_ip6s ? -1 :
 				qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) {
 			case -1:
 				bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii,
 				    (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6));
 				break;
 			case 0:
 				ii++;
 				ij++;
 				break;
 			case 1:
 				ij++;
 				break;
 			}
 		}
 		if (pr->pr_ip6s == 0) {
 			free(pr->pr_ip6, M_PRISON);
 			pr->pr_ip6 = NULL;
 		}
 	}
 	return 0;
 }
 
 /*
  * Pass back primary IPv6 address for this jail.
  *
  * If not restricted return success but do not alter the address.  Caller has
  * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT).
  *
  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
  */
 int
 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
 {
 	struct prison *pr;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP6))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP6)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip6 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 	mtx_unlock(&pr->pr_mtx);
 	return (0);
 }
 
 /*
  * Return 1 if we should do proper source address selection or are not jailed.
  * We will return 0 if we should bypass source address selection in favour
  * of the primary jail IPv6 address. Only in this case *ia will be updated and
  * returned in NBO.
  * Return EAFNOSUPPORT, in case this jail does not allow IPv6.
  */
 int
 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6)
 {
 	struct prison *pr;
 	struct in6_addr lia6;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 
 	if (!jailed(cred))
 		return (1);
 
 	pr = cred->cr_prison;
 	if (pr->pr_flags & PR_IP6_SADDRSEL)
 		return (1);
 
 	lia6 = in6addr_any;
 	error = prison_get_ip6(cred, &lia6);
 	if (error)
 		return (error);
 	if (IN6_IS_ADDR_UNSPECIFIED(&lia6))
 		return (1);
 
 	bcopy(&lia6, ia6, sizeof(struct in6_addr));
 	return (0);
 }
 
 /*
  * Return true if pr1 and pr2 have the same IPv6 address restrictions.
  */
 int
 prison_equal_ip6(struct prison *pr1, struct prison *pr2)
 {
 
 	if (pr1 == pr2)
 		return (1);
 
 	while (pr1 != &prison0 &&
 #ifdef VIMAGE
 	       !(pr1->pr_flags & PR_VNET) &&
 #endif
 	       !(pr1->pr_flags & PR_IP6_USER))
 		pr1 = pr1->pr_parent;
 	while (pr2 != &prison0 &&
 #ifdef VIMAGE
 	       !(pr2->pr_flags & PR_VNET) &&
 #endif
 	       !(pr2->pr_flags & PR_IP6_USER))
 		pr2 = pr2->pr_parent;
 	return (pr1 == pr2);
 }
 
 /*
  * Make sure our (source) address is set to something meaningful to this jail.
  *
  * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
  * when needed while binding.
  *
  * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
  * doesn't allow IPv6.
  */
 int
 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
 {
 	struct prison *pr;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP6))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP6)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip6 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 		bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 
 	if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
 		/*
 		 * In case there is only 1 IPv6 address, and v6only is true,
 		 * then bind directly.
 		 */
 		if (v6only != 0 && pr->pr_ip6s == 1)
 			bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 
 	error = _prison_check_ip6(pr, ia6);
 	mtx_unlock(&pr->pr_mtx);
 	return (error);
 }
 
 /*
  * Rewrite destination address in case we will connect to loopback address.
  *
  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
  */
 int
 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
 {
 	struct prison *pr;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP6))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP6)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip6 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 		bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 
 	/*
 	 * Return success because nothing had to be changed.
 	 */
 	mtx_unlock(&pr->pr_mtx);
 	return (0);
 }
 
 /*
  * Check if given address belongs to the jail referenced by cred/prison.
  *
  * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
  * doesn't allow IPv6.
  */
 static int
 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
 {
 	int i, a, z, d;
 
 	/*
 	 * Check the primary IP.
 	 */
 	if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
 		return (0);
 
 	/*
 	 * All the other IPs are sorted so we can do a binary search.
 	 */
 	a = 0;
 	z = pr->pr_ip6s - 2;
 	while (a <= z) {
 		i = (a + z) / 2;
 		d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
 		if (d > 0)
 			z = i - 1;
 		else if (d < 0)
 			a = i + 1;
 		else
 			return (0);
 	}
 
 	return (EADDRNOTAVAIL);
 }
 
 int
 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
 {
 	struct prison *pr;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 
 	pr = cred->cr_prison;
 	if (!(pr->pr_flags & PR_IP6))
 		return (0);
 	mtx_lock(&pr->pr_mtx);
 	if (!(pr->pr_flags & PR_IP6)) {
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	if (pr->pr_ip6 == NULL) {
 		mtx_unlock(&pr->pr_mtx);
 		return (EAFNOSUPPORT);
 	}
 
 	error = _prison_check_ip6(pr, ia6);
 	mtx_unlock(&pr->pr_mtx);
 	return (error);
 }
 #endif
 
 /*
  * Check if a jail supports the given address family.
  *
  * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
  * if not.
  */
 int
 prison_check_af(struct ucred *cred, int af)
 {
 	struct prison *pr;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 
 	pr = cred->cr_prison;
 #ifdef VIMAGE
 	/* Prisons with their own network stack are not limited. */
 	if (prison_owns_vnet(cred))
 		return (0);
 #endif
 
 	error = 0;
 	switch (af)
 	{
 #ifdef INET
 	case AF_INET:
 		if (pr->pr_flags & PR_IP4)
 		{
 			mtx_lock(&pr->pr_mtx);
 			if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
 				error = EAFNOSUPPORT;
 			mtx_unlock(&pr->pr_mtx);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (pr->pr_flags & PR_IP6)
 		{
 			mtx_lock(&pr->pr_mtx);
 			if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
 				error = EAFNOSUPPORT;
 			mtx_unlock(&pr->pr_mtx);
 		}
 		break;
 #endif
 	case AF_LOCAL:
 	case AF_ROUTE:
 		break;
 	default:
 		if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
 			error = EAFNOSUPPORT;
 	}
 	return (error);
 }
 
 /*
  * Check if given address belongs to the jail referenced by cred (wrapper to
  * prison_check_ip[46]).
  *
  * Returns 0 if jail doesn't restrict the address family or if address belongs
  * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
  * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
  */
 int
 prison_if(struct ucred *cred, struct sockaddr *sa)
 {
 #ifdef INET
 	struct sockaddr_in *sai;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sai6;
 #endif
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 
 #ifdef VIMAGE
 	if (prison_owns_vnet(cred))
 		return (0);
 #endif
 
 	error = 0;
 	switch (sa->sa_family)
 	{
 #ifdef INET
 	case AF_INET:
 		sai = (struct sockaddr_in *)sa;
 		error = prison_check_ip4(cred, &sai->sin_addr);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sai6 = (struct sockaddr_in6 *)sa;
 		error = prison_check_ip6(cred, &sai6->sin6_addr);
 		break;
 #endif
 	default:
 		if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
 			error = EAFNOSUPPORT;
 	}
 	return (error);
 }
 
 /*
  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
  */
 int
 prison_check(struct ucred *cred1, struct ucred *cred2)
 {
 
 	return ((cred1->cr_prison == cred2->cr_prison ||
 	    prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
 }
 
 /*
  * Return 1 if p2 is a child of p1, otherwise 0.
  */
 int
 prison_ischild(struct prison *pr1, struct prison *pr2)
 {
 
 	for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
 		if (pr1 == pr2)
 			return (1);
 	return (0);
 }
 
 /*
  * Return 1 if the passed credential is in a jail, otherwise 0.
  */
 int
 jailed(struct ucred *cred)
 {
 
 	return (cred->cr_prison != &prison0);
 }
 
 /*
  * Return 1 if the passed credential is in a jail and that jail does not
  * have its own virtual network stack, otherwise 0.
  */
 int
 jailed_without_vnet(struct ucred *cred)
 {
 
 	if (!jailed(cred))
 		return (0);
 #ifdef VIMAGE
 	if (prison_owns_vnet(cred))
 		return (0);
 #endif
 
 	return (1);
 }
 
 /*
  * Return the correct hostname (domainname, et al) for the passed credential.
  */
 void
 getcredhostname(struct ucred *cred, char *buf, size_t size)
 {
 	struct prison *pr;
 
 	/*
 	 * A NULL credential can be used to shortcut to the physical
 	 * system's hostname.
 	 */
 	pr = (cred != NULL) ? cred->cr_prison : &prison0;
 	mtx_lock(&pr->pr_mtx);
 	strlcpy(buf, pr->pr_hostname, size);
 	mtx_unlock(&pr->pr_mtx);
 }
 
 void
 getcreddomainname(struct ucred *cred, char *buf, size_t size)
 {
 
 	mtx_lock(&cred->cr_prison->pr_mtx);
 	strlcpy(buf, cred->cr_prison->pr_domainname, size);
 	mtx_unlock(&cred->cr_prison->pr_mtx);
 }
 
 void
 getcredhostuuid(struct ucred *cred, char *buf, size_t size)
 {
 
 	mtx_lock(&cred->cr_prison->pr_mtx);
 	strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
 	mtx_unlock(&cred->cr_prison->pr_mtx);
 }
 
 void
 getcredhostid(struct ucred *cred, unsigned long *hostid)
 {
 
 	mtx_lock(&cred->cr_prison->pr_mtx);
 	*hostid = cred->cr_prison->pr_hostid;
 	mtx_unlock(&cred->cr_prison->pr_mtx);
 }
 
 #ifdef VIMAGE
 /*
  * Determine whether the prison represented by cred owns
  * its vnet rather than having it inherited.
  *
  * Returns 1 in case the prison owns the vnet, 0 otherwise.
  */
 int
 prison_owns_vnet(struct ucred *cred)
 {
 
 	/*
 	 * vnets cannot be added/removed after jail creation,
 	 * so no need to lock here.
 	 */
 	return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
 }
 #endif
 
 /*
  * Determine whether the subject represented by cred can "see"
  * status of a mount point.
  * Returns: 0 for permitted, ENOENT otherwise.
  * XXX: This function should be called cr_canseemount() and should be
  *      placed in kern_prot.c.
  */
 int
 prison_canseemount(struct ucred *cred, struct mount *mp)
 {
 	struct prison *pr;
 	struct statfs *sp;
 	size_t len;
 
 	pr = cred->cr_prison;
 	if (pr->pr_enforce_statfs == 0)
 		return (0);
 	if (pr->pr_root->v_mount == mp)
 		return (0);
 	if (pr->pr_enforce_statfs == 2)
 		return (ENOENT);
 	/*
 	 * If jail's chroot directory is set to "/" we should be able to see
 	 * all mount-points from inside a jail.
 	 * This is ugly check, but this is the only situation when jail's
 	 * directory ends with '/'.
 	 */
 	if (strcmp(pr->pr_path, "/") == 0)
 		return (0);
 	len = strlen(pr->pr_path);
 	sp = &mp->mnt_stat;
 	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
 		return (ENOENT);
 	/*
 	 * Be sure that we don't have situation where jail's root directory
 	 * is "/some/path" and mount point is "/some/pathpath".
 	 */
 	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
 		return (ENOENT);
 	return (0);
 }
 
 void
 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
 {
 	char jpath[MAXPATHLEN];
 	struct prison *pr;
 	size_t len;
 
 	pr = cred->cr_prison;
 	if (pr->pr_enforce_statfs == 0)
 		return;
 	if (prison_canseemount(cred, mp) != 0) {
 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 		strlcpy(sp->f_mntonname, "[restricted]",
 		    sizeof(sp->f_mntonname));
 		return;
 	}
 	if (pr->pr_root->v_mount == mp) {
 		/*
 		 * Clear current buffer data, so we are sure nothing from
 		 * the valid path left there.
 		 */
 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 		*sp->f_mntonname = '/';
 		return;
 	}
 	/*
 	 * If jail's chroot directory is set to "/" we should be able to see
 	 * all mount-points from inside a jail.
 	 */
 	if (strcmp(pr->pr_path, "/") == 0)
 		return;
 	len = strlen(pr->pr_path);
 	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
 	/*
 	 * Clear current buffer data, so we are sure nothing from
 	 * the valid path left there.
 	 */
 	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 	if (*jpath == '\0') {
 		/* Should never happen. */
 		*sp->f_mntonname = '/';
 	} else {
 		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
 	}
 }
 
 /*
  * Check with permission for a specific privilege is granted within jail.  We
  * have a specific list of accepted privileges; the rest are denied.
  */
 int
 prison_priv_check(struct ucred *cred, int priv)
 {
 
 	if (!jailed(cred))
 		return (0);
 
 #ifdef VIMAGE
 	/*
 	 * Privileges specific to prisons with a virtual network stack.
 	 * There might be a duplicate entry here in case the privilege
 	 * is only granted conditionally in the legacy jail case.
 	 */
 	switch (priv) {
 #ifdef notyet
 		/*
 		 * NFS-specific privileges.
 		 */
 	case PRIV_NFS_DAEMON:
 	case PRIV_NFS_LOCKD:
 #endif
 		/*
 		 * Network stack privileges.
 		 */
 	case PRIV_NET_BRIDGE:
 	case PRIV_NET_GRE:
 	case PRIV_NET_BPF:
 	case PRIV_NET_RAW:		/* Dup, cond. in legacy jail case. */
 	case PRIV_NET_ROUTE:
 	case PRIV_NET_TAP:
 	case PRIV_NET_SETIFMTU:
 	case PRIV_NET_SETIFFLAGS:
 	case PRIV_NET_SETIFCAP:
 	case PRIV_NET_SETIFDESCR:
 	case PRIV_NET_SETIFNAME	:
 	case PRIV_NET_SETIFMETRIC:
 	case PRIV_NET_SETIFPHYS:
 	case PRIV_NET_SETIFMAC:
 	case PRIV_NET_ADDMULTI:
 	case PRIV_NET_DELMULTI:
 	case PRIV_NET_HWIOCTL:
 	case PRIV_NET_SETLLADDR:
 	case PRIV_NET_ADDIFGROUP:
 	case PRIV_NET_DELIFGROUP:
 	case PRIV_NET_IFCREATE:
 	case PRIV_NET_IFDESTROY:
 	case PRIV_NET_ADDIFADDR:
 	case PRIV_NET_DELIFADDR:
 	case PRIV_NET_LAGG:
 	case PRIV_NET_GIF:
 	case PRIV_NET_SETIFVNET:
 	case PRIV_NET_SETIFFIB:
 
 		/*
 		 * 802.11-related privileges.
 		 */
 	case PRIV_NET80211_GETKEY:
 #ifdef notyet
 	case PRIV_NET80211_MANAGE:		/* XXX-BZ discuss with sam@ */
 #endif
 
 #ifdef notyet
 		/*
 		 * ATM privileges.
 		 */
 	case PRIV_NETATM_CFG:
 	case PRIV_NETATM_ADD:
 	case PRIV_NETATM_DEL:
 	case PRIV_NETATM_SET:
 
 		/*
 		 * Bluetooth privileges.
 		 */
 	case PRIV_NETBLUETOOTH_RAW:
 #endif
 
 		/*
 		 * Netgraph and netgraph module privileges.
 		 */
 	case PRIV_NETGRAPH_CONTROL:
 #ifdef notyet
 	case PRIV_NETGRAPH_TTY:
 #endif
 
 		/*
 		 * IPv4 and IPv6 privileges.
 		 */
 	case PRIV_NETINET_IPFW:
 	case PRIV_NETINET_DIVERT:
 	case PRIV_NETINET_PF:
 	case PRIV_NETINET_DUMMYNET:
 	case PRIV_NETINET_CARP:
 	case PRIV_NETINET_MROUTE:
 	case PRIV_NETINET_RAW:
 	case PRIV_NETINET_ADDRCTRL6:
 	case PRIV_NETINET_ND6:
 	case PRIV_NETINET_SCOPE6:
 	case PRIV_NETINET_ALIFETIME6:
 	case PRIV_NETINET_IPSEC:
 	case PRIV_NETINET_BINDANY:
 
 #ifdef notyet
 		/*
 		 * NCP privileges.
 		 */
 	case PRIV_NETNCP:
 
 		/*
 		 * SMB privileges.
 		 */
 	case PRIV_NETSMB:
 #endif
 
 	/*
 	 * No default: or deny here.
 	 * In case of no permit fall through to next switch().
 	 */
 		if (cred->cr_prison->pr_flags & PR_VNET)
 			return (0);
 	}
 #endif /* VIMAGE */
 
 	switch (priv) {
 
 		/*
 		 * Allow ktrace privileges for root in jail.
 		 */
 	case PRIV_KTRACE:
 
 #if 0
 		/*
 		 * Allow jailed processes to configure audit identity and
 		 * submit audit records (login, etc).  In the future we may
 		 * want to further refine the relationship between audit and
 		 * jail.
 		 */
 	case PRIV_AUDIT_GETAUDIT:
 	case PRIV_AUDIT_SETAUDIT:
 	case PRIV_AUDIT_SUBMIT:
 #endif
 
 		/*
 		 * Allow jailed processes to manipulate process UNIX
 		 * credentials in any way they see fit.
 		 */
 	case PRIV_CRED_SETUID:
 	case PRIV_CRED_SETEUID:
 	case PRIV_CRED_SETGID:
 	case PRIV_CRED_SETEGID:
 	case PRIV_CRED_SETGROUPS:
 	case PRIV_CRED_SETREUID:
 	case PRIV_CRED_SETREGID:
 	case PRIV_CRED_SETRESUID:
 	case PRIV_CRED_SETRESGID:
 
 		/*
 		 * Jail implements visibility constraints already, so allow
 		 * jailed root to override uid/gid-based constraints.
 		 */
 	case PRIV_SEEOTHERGIDS:
 	case PRIV_SEEOTHERUIDS:
 
 		/*
 		 * Jail implements inter-process debugging limits already, so
 		 * allow jailed root various debugging privileges.
 		 */
 	case PRIV_DEBUG_DIFFCRED:
 	case PRIV_DEBUG_SUGID:
 	case PRIV_DEBUG_UNPRIV:
 
 		/*
 		 * Allow jail to set various resource limits and login
 		 * properties, and for now, exceed process resource limits.
 		 */
 	case PRIV_PROC_LIMIT:
 	case PRIV_PROC_SETLOGIN:
 	case PRIV_PROC_SETRLIMIT:
 
 		/*
 		 * System V and POSIX IPC privileges are granted in jail.
 		 */
 	case PRIV_IPC_READ:
 	case PRIV_IPC_WRITE:
 	case PRIV_IPC_ADMIN:
 	case PRIV_IPC_MSGSIZE:
 	case PRIV_MQ_ADMIN:
 
 		/*
 		 * Jail operations within a jail work on child jails.
 		 */
 	case PRIV_JAIL_ATTACH:
 	case PRIV_JAIL_SET:
 	case PRIV_JAIL_REMOVE:
 
 		/*
 		 * Jail implements its own inter-process limits, so allow
 		 * root processes in jail to change scheduling on other
 		 * processes in the same jail.  Likewise for signalling.
 		 */
 	case PRIV_SCHED_DIFFCRED:
 	case PRIV_SCHED_CPUSET:
 	case PRIV_SIGNAL_DIFFCRED:
 	case PRIV_SIGNAL_SUGID:
 
 		/*
 		 * Allow jailed processes to write to sysctls marked as jail
 		 * writable.
 		 */
 	case PRIV_SYSCTL_WRITEJAIL:
 
 		/*
 		 * Allow root in jail to manage a variety of quota
 		 * properties.  These should likely be conditional on a
 		 * configuration option.
 		 */
 	case PRIV_VFS_GETQUOTA:
 	case PRIV_VFS_SETQUOTA:
 
 		/*
 		 * Since Jail relies on chroot() to implement file system
 		 * protections, grant many VFS privileges to root in jail.
 		 * Be careful to exclude mount-related and NFS-related
 		 * privileges.
 		 */
 	case PRIV_VFS_READ:
 	case PRIV_VFS_WRITE:
 	case PRIV_VFS_ADMIN:
 	case PRIV_VFS_EXEC:
 	case PRIV_VFS_LOOKUP:
 	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
 	case PRIV_VFS_CHFLAGS_DEV:
 	case PRIV_VFS_CHOWN:
 	case PRIV_VFS_CHROOT:
 	case PRIV_VFS_RETAINSUGID:
 	case PRIV_VFS_FCHROOT:
 	case PRIV_VFS_LINK:
 	case PRIV_VFS_SETGID:
 	case PRIV_VFS_STAT:
 	case PRIV_VFS_STICKYFILE:
 
 		/*
 		 * As in the non-jail case, non-root users are expected to be
 		 * able to read kernel/phyiscal memory (provided /dev/[k]mem
 		 * exists in the jail and they have permission to access it).
 		 */
 	case PRIV_KMEM_READ:
 		return (0);
 
 		/*
 		 * Depending on the global setting, allow privilege of
 		 * setting system flags.
 		 */
 	case PRIV_VFS_SYSFLAGS:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
 			return (0);
 		else
 			return (EPERM);
 
 		/*
 		 * Depending on the global setting, allow privilege of
 		 * mounting/unmounting file systems.
 		 */
 	case PRIV_VFS_MOUNT:
 	case PRIV_VFS_UNMOUNT:
 	case PRIV_VFS_MOUNT_NONUSER:
 	case PRIV_VFS_MOUNT_OWNER:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT &&
 		    cred->cr_prison->pr_enforce_statfs < 2)
 			return (0);
 		else
 			return (EPERM);
 
 		/*
 		 * Allow jailed root to bind reserved ports and reuse in-use
 		 * ports.
 		 */
 	case PRIV_NETINET_RESERVEDPORT:
 	case PRIV_NETINET_REUSEPORT:
 		return (0);
 
 		/*
 		 * Allow jailed root to set certian IPv4/6 (option) headers.
 		 */
 	case PRIV_NETINET_SETHDROPTS:
 		return (0);
 
 		/*
 		 * Conditionally allow creating raw sockets in jail.
 		 */
 	case PRIV_NETINET_RAW:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
 			return (0);
 		else
 			return (EPERM);
 
 		/*
 		 * Since jail implements its own visibility limits on netstat
 		 * sysctls, allow getcred.  This allows identd to work in
 		 * jail.
 		 */
 	case PRIV_NETINET_GETCRED:
 		return (0);
 
 		/*
 		 * Allow jailed root to set loginclass.
 		 */
 	case PRIV_PROC_SETLOGINCLASS:
 		return (0);
 
 	default:
 		/*
 		 * In all remaining cases, deny the privilege request.  This
 		 * includes almost all network privileges, many system
 		 * configuration privileges.
 		 */
 		return (EPERM);
 	}
 }
 
 /*
  * Return the part of pr2's name that is relative to pr1, or the whole name
  * if it does not directly follow.
  */
 
 char *
 prison_name(struct prison *pr1, struct prison *pr2)
 {
 	char *name;
 
 	/* Jails see themselves as "0" (if they see themselves at all). */
 	if (pr1 == pr2)
 		return "0";
 	name = pr2->pr_name;
 	if (prison_ischild(pr1, pr2)) {
 		/*
 		 * pr1 isn't locked (and allprison_lock may not be either)
 		 * so its length can't be counted on.  But the number of dots
 		 * can be counted on - and counted.
 		 */
 		for (; pr1 != &prison0; pr1 = pr1->pr_parent)
 			name = strchr(name, '.') + 1;
 	}
 	return (name);
 }
 
 /*
  * Return the part of pr2's path that is relative to pr1, or the whole path
  * if it does not directly follow.
  */
 static char *
 prison_path(struct prison *pr1, struct prison *pr2)
 {
 	char *path1, *path2;
 	int len1;
 
 	path1 = pr1->pr_path;
 	path2 = pr2->pr_path;
 	if (!strcmp(path1, "/"))
 		return (path2);
 	len1 = strlen(path1);
 	if (strncmp(path1, path2, len1))
 		return (path2);
 	if (path2[len1] == '\0')
 		return "/";
 	if (path2[len1] == '/')
 		return (path2 + len1);
 	return (path2);
 }
 
 
 /*
  * Jail-related sysctls.
  */
 static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
     "Jails");
 
 static int
 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 {
 	struct xprison *xp;
 	struct prison *pr, *cpr;
 #ifdef INET
 	struct in_addr *ip4 = NULL;
 	int ip4s = 0;
 #endif
 #ifdef INET6
 	struct in6_addr *ip6 = NULL;
 	int ip6s = 0;
 #endif
 	int descend, error;
 
 	xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
 	pr = req->td->td_ucred->cr_prison;
 	error = 0;
 	sx_slock(&allprison_lock);
 	FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 #if defined(INET) || defined(INET6)
  again:
 #endif
 		mtx_lock(&cpr->pr_mtx);
 #ifdef INET
 		if (cpr->pr_ip4s > 0) {
 			if (ip4s < cpr->pr_ip4s) {
 				ip4s = cpr->pr_ip4s;
 				mtx_unlock(&cpr->pr_mtx);
 				ip4 = realloc(ip4, ip4s *
 				    sizeof(struct in_addr), M_TEMP, M_WAITOK);
 				goto again;
 			}
 			bcopy(cpr->pr_ip4, ip4,
 			    cpr->pr_ip4s * sizeof(struct in_addr));
 		}
 #endif
 #ifdef INET6
 		if (cpr->pr_ip6s > 0) {
 			if (ip6s < cpr->pr_ip6s) {
 				ip6s = cpr->pr_ip6s;
 				mtx_unlock(&cpr->pr_mtx);
 				ip6 = realloc(ip6, ip6s *
 				    sizeof(struct in6_addr), M_TEMP, M_WAITOK);
 				goto again;
 			}
 			bcopy(cpr->pr_ip6, ip6,
 			    cpr->pr_ip6s * sizeof(struct in6_addr));
 		}
 #endif
 		if (cpr->pr_ref == 0) {
 			mtx_unlock(&cpr->pr_mtx);
 			continue;
 		}
 		bzero(xp, sizeof(*xp));
 		xp->pr_version = XPRISON_VERSION;
 		xp->pr_id = cpr->pr_id;
 		xp->pr_state = cpr->pr_uref > 0
 		    ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
 		strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
 		strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
 		strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
 #ifdef INET
 		xp->pr_ip4s = cpr->pr_ip4s;
 #endif
 #ifdef INET6
 		xp->pr_ip6s = cpr->pr_ip6s;
 #endif
 		mtx_unlock(&cpr->pr_mtx);
 		error = SYSCTL_OUT(req, xp, sizeof(*xp));
 		if (error)
 			break;
 #ifdef INET
 		if (xp->pr_ip4s > 0) {
 			error = SYSCTL_OUT(req, ip4,
 			    xp->pr_ip4s * sizeof(struct in_addr));
 			if (error)
 				break;
 		}
 #endif
 #ifdef INET6
 		if (xp->pr_ip6s > 0) {
 			error = SYSCTL_OUT(req, ip6,
 			    xp->pr_ip6s * sizeof(struct in6_addr));
 			if (error)
 				break;
 		}
 #endif
 	}
 	sx_sunlock(&allprison_lock);
 	free(xp, M_TEMP);
 #ifdef INET
 	free(ip4, M_TEMP);
 #endif
 #ifdef INET6
 	free(ip6, M_TEMP);
 #endif
 	return (error);
 }
 
 SYSCTL_OID(_security_jail, OID_AUTO, list,
     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_jail_list, "S", "List of active jails");
 
 static int
 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
 {
 	int error, injail;
 
 	injail = jailed(req->td->td_ucred);
 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
 
 	return (error);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_jail_jailed, "I", "Process in jail?");
 
 static int
 sysctl_jail_vnet(SYSCTL_HANDLER_ARGS)
 {
 	int error, havevnet;
 #ifdef VIMAGE
 	struct ucred *cred = req->td->td_ucred;
 
 	havevnet = jailed(cred) && prison_owns_vnet(cred);
 #else
 	havevnet = 0;
 #endif
 	error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet));
 
 	return (error);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, vnet,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_jail_vnet, "I", "Jail owns VNET?");
 
 #if defined(INET) || defined(INET6)
 SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
     &jail_max_af_ips, 0,
     "Number of IP addresses a jail may have at most per address family");
 #endif
 
 /*
  * Default parameters for jail(2) compatability.  For historical reasons,
  * the sysctl names have varying similarity to the parameter names.  Prisons
  * just see their own parameters, and can't change them.
  */
 static int
 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
 {
 	struct prison *pr;
 	int allow, error, i;
 
 	pr = req->td->td_ucred->cr_prison;
 	allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow;
 
 	/* Get the current flag value, and convert it to a boolean. */
 	i = (allow & arg2) ? 1 : 0;
 	if (arg1 != NULL)
 		i = !i;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	i = i ? arg2 : 0;
 	if (arg1 != NULL)
 		i ^= arg2;
 	/*
 	 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
 	 * for writing.
 	 */
 	mtx_lock(&prison0.pr_mtx);
 	jail_default_allow = (jail_default_allow & ~arg2) | i;
 	mtx_unlock(&prison0.pr_mtx);
 	return (0);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
     "Processes in jail can set their hostnames");
 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
     "Processes in jail are limited to creating UNIX/IP/route sockets only");
 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
     "Processes in jail can use System V IPC primitives");
 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
     "Prison root can create raw sockets");
 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
     "Processes in jail can alter system file flags");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
     "Processes in jail can mount/unmount jail-friendly file systems");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the devfs file system");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_fdescfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_FDESCFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the fdescfs file system");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the nullfs file system");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_procfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_PROCFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the procfs file system");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_linprocfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_LINPROCFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the linprocfs file system");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_linsysfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_LINSYSFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the linsysfs file system");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_tmpfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_TMPFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the tmpfs file system");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_zfs_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT_ZFS, sysctl_jail_default_allow, "I",
     "Processes in jail can mount the zfs file system");
 
 static int
 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
 {
 	struct prison *pr;
 	int level, error;
 
 	pr = req->td->td_ucred->cr_prison;
 	level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
 	error = sysctl_handle_int(oidp, &level, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	*(int *)arg1 = level;
 	return (0);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
     sysctl_jail_default_level, "I",
     "Processes in jail cannot see all mounted file systems");
 
 SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
     &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum),
     sysctl_jail_default_level, "I",
     "Ruleset for the devfs filesystem in jail");
 
 /*
  * Nodes to describe jail parameters.  Maximum length of string parameters
  * is returned in the string itself, and the other parameters exist merely
  * to make themselves and their types known.
  */
 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0,
     "Jail parameters");
 
 int
 sysctl_jail_param(SYSCTL_HANDLER_ARGS)
 {
 	int i;
 	long l;
 	size_t s;
 	char numbuf[12];
 
 	switch (oidp->oid_kind & CTLTYPE)
 	{
 	case CTLTYPE_LONG:
 	case CTLTYPE_ULONG:
 		l = 0;
 #ifdef SCTL_MASK32
 		if (!(req->flags & SCTL_MASK32))
 #endif
 			return (SYSCTL_OUT(req, &l, sizeof(l)));
 	case CTLTYPE_INT:
 	case CTLTYPE_UINT:
 		i = 0;
 		return (SYSCTL_OUT(req, &i, sizeof(i)));
 	case CTLTYPE_STRING:
 		snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2);
 		return
 		    (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
 	case CTLTYPE_STRUCT:
 		s = (size_t)arg2;
 		return (SYSCTL_OUT(req, &s, sizeof(s)));
 	}
 	return (0);
 }
 
 /*
  * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at
  * jail creation time but cannot be changed in an existing jail.
  */
 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Jail secure level");
 SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", 
     "Jail value for kern.osreldate and uname -K");
 SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, 
     "Jail value for kern.osrelease and uname -r");
 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Jail cannot see all mounted file systems");
 SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Ruleset for in-jail devfs mounts");
 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail persistence");
 #ifdef VIMAGE
 SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
     "E,jailsys", "Virtual network stack");
 #endif
 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
     "B", "Jail is in the process of shutting down");
 
 SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
 SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
     "I", "Current number of child jails");
 SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Maximum number of child jails");
 
 SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
     "Jail hostname");
 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
     "Jail NIS domainname");
 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
     "Jail host UUID");
 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
     "LU", "Jail host ID");
 
 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
 
 #ifdef INET
 SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
     "Jail IPv4 address virtualization");
 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
     "S,in_addr,a", "Jail IPv4 addresses");
 SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Do (not) use IPv4 source address selection rather than the "
     "primary jail IPv4 address.");
 #endif
 #ifdef INET6
 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
     "Jail IPv6 address virtualization");
 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
     "S,in6_addr,a", "Jail IPv6 addresses");
 SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Do (not) use IPv6 source address selection rather than the "
     "primary jail IPv6 address.");
 #endif
 
 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may set hostname");
 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may use SYSV IPC");
 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may create raw sockets");
 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may alter system file flags");
 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may set file quotas");
 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
 
 SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags");
 SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount/unmount jail-friendly file systems in general");
 SYSCTL_JAIL_PARAM(_allow_mount, devfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the devfs file system");
 SYSCTL_JAIL_PARAM(_allow_mount, fdescfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the fdescfs file system");
 SYSCTL_JAIL_PARAM(_allow_mount, nullfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the nullfs file system");
 SYSCTL_JAIL_PARAM(_allow_mount, procfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the procfs file system");
 SYSCTL_JAIL_PARAM(_allow_mount, linprocfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the linprocfs file system");
 SYSCTL_JAIL_PARAM(_allow_mount, linsysfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the linsysfs file system");
 SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the tmpfs file system");
 SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount the zfs file system");
 
 #ifdef RACCT
 void
 prison_racct_foreach(void (*callback)(struct racct *racct,
-    void *arg2, void *arg3), void *arg2, void *arg3)
+    void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+    void *arg2, void *arg3)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 
 	sx_slock(&allprison_lock);
+	if (pre != NULL)
+		(pre)();
 	LIST_FOREACH(prr, &allprison_racct, prr_next)
 		(callback)(prr->prr_racct, arg2, arg3);
+	if (post != NULL)
+		(post)();
 	sx_sunlock(&allprison_lock);
 }
 
 static struct prison_racct *
 prison_racct_find_locked(const char *name)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_XLOCKED);
 
 	if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN)
 		return (NULL);
 
 	LIST_FOREACH(prr, &allprison_racct, prr_next) {
 		if (strcmp(name, prr->prr_name) != 0)
 			continue;
 
 		/* Found prison_racct with a matching name? */
 		prison_racct_hold(prr);
 		return (prr);
 	}
 
 	/* Add new prison_racct. */
 	prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK);
 	racct_create(&prr->prr_racct);
 
 	strcpy(prr->prr_name, name);
 	refcount_init(&prr->prr_refcount, 1);
 	LIST_INSERT_HEAD(&allprison_racct, prr, prr_next);
 
 	return (prr);
 }
 
 struct prison_racct *
 prison_racct_find(const char *name)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 
 	sx_xlock(&allprison_lock);
 	prr = prison_racct_find_locked(name);
 	sx_xunlock(&allprison_lock);
 	return (prr);
 }
 
 void
 prison_racct_hold(struct prison_racct *prr)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	refcount_acquire(&prr->prr_refcount);
 }
 
 static void
 prison_racct_free_locked(struct prison_racct *prr)
 {
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_XLOCKED);
 
 	if (refcount_release(&prr->prr_refcount)) {
 		racct_destroy(&prr->prr_racct);
 		LIST_REMOVE(prr, prr_next);
 		free(prr, M_PRISON_RACCT);
 	}
 }
 
 void
 prison_racct_free(struct prison_racct *prr)
 {
 	int old;
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_UNLOCKED);
 
 	old = prr->prr_refcount;
 	if (old > 1 && atomic_cmpset_int(&prr->prr_refcount, old, old - 1))
 		return;
 
 	sx_xlock(&allprison_lock);
 	prison_racct_free_locked(prr);
 	sx_xunlock(&allprison_lock);
 }
 
 static void
 prison_racct_attach(struct prison *pr)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_XLOCKED);
 
 	prr = prison_racct_find_locked(pr->pr_name);
 	KASSERT(prr != NULL, ("cannot find prison_racct"));
 
 	pr->pr_prison_racct = prr;
 }
 
 /*
  * Handle jail renaming.  From the racct point of view, renaming means
  * moving from one prison_racct to another.
  */
 static void
 prison_racct_modify(struct prison *pr)
 {
 	struct proc *p;
 	struct ucred *cred;
 	struct prison_racct *oldprr;
 
 	ASSERT_RACCT_ENABLED();
 
 	sx_slock(&allproc_lock);
 	sx_xlock(&allprison_lock);
 
 	if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) {
 		sx_xunlock(&allprison_lock);
 		sx_sunlock(&allproc_lock);
 		return;
 	}
 
 	oldprr = pr->pr_prison_racct;
 	pr->pr_prison_racct = NULL;
 
 	prison_racct_attach(pr);
 
 	/*
 	 * Move resource utilisation records.
 	 */
 	racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct);
 
 	/*
 	 * Force rctl to reattach rules to processes.
 	 */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		cred = crhold(p->p_ucred);
 		PROC_UNLOCK(p);
 		racct_proc_ucred_changed(p, cred, cred);
 		crfree(cred);
 	}
 
 	sx_sunlock(&allproc_lock);
 	prison_racct_free_locked(oldprr);
 	sx_xunlock(&allprison_lock);
 }
 
 static void
 prison_racct_detach(struct prison *pr)
 {
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_UNLOCKED);
 
 	if (pr->pr_prison_racct == NULL)
 		return;
 	prison_racct_free(pr->pr_prison_racct);
 	pr->pr_prison_racct = NULL;
 }
 #endif /* RACCT */
 
 #ifdef DDB
 
 static void
 db_show_prison(struct prison *pr)
 {
 	int fi;
 #if defined(INET) || defined(INET6)
 	int ii;
 #endif
 	unsigned jsf;
 #ifdef INET6
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	db_printf("prison %p:\n", pr);
 	db_printf(" jid             = %d\n", pr->pr_id);
 	db_printf(" name            = %s\n", pr->pr_name);
 	db_printf(" parent          = %p\n", pr->pr_parent);
 	db_printf(" ref             = %d\n", pr->pr_ref);
 	db_printf(" uref            = %d\n", pr->pr_uref);
 	db_printf(" path            = %s\n", pr->pr_path);
 	db_printf(" cpuset          = %d\n", pr->pr_cpuset
 	    ? pr->pr_cpuset->cs_id : -1);
 #ifdef VIMAGE
 	db_printf(" vnet            = %p\n", pr->pr_vnet);
 #endif
 	db_printf(" root            = %p\n", pr->pr_root);
 	db_printf(" securelevel     = %d\n", pr->pr_securelevel);
 	db_printf(" devfs_rsnum     = %d\n", pr->pr_devfs_rsnum);
 	db_printf(" children.max    = %d\n", pr->pr_childmax);
 	db_printf(" children.cur    = %d\n", pr->pr_childcount);
 	db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
 	db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
 	db_printf(" flags           = 0x%x", pr->pr_flags);
 	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
 	    fi++)
 		if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi)))
 			db_printf(" %s", pr_flag_names[fi]);
 	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
 	    fi++) {
 		jsf = pr->pr_flags &
 		    (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 		db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name,
 		    pr_flag_jailsys[fi].disable && 
 		      (jsf == pr_flag_jailsys[fi].disable) ? "disable"
 		    : (jsf == pr_flag_jailsys[fi].new) ? "new"
 		    : "inherit");
 	}
 	db_printf(" allow           = 0x%x", pr->pr_allow);
 	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
 	    fi++)
 		if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi)))
 			db_printf(" %s", pr_allow_names[fi]);
 	db_printf("\n");
 	db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
 	db_printf(" host.hostname   = %s\n", pr->pr_hostname);
 	db_printf(" host.domainname = %s\n", pr->pr_domainname);
 	db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
 	db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
 #ifdef INET
 	db_printf(" ip4s            = %d\n", pr->pr_ip4s);
 	for (ii = 0; ii < pr->pr_ip4s; ii++)
 		db_printf(" %s %s\n",
 		    ii == 0 ? "ip4.addr        =" : "                 ",
 		    inet_ntoa(pr->pr_ip4[ii]));
 #endif
 #ifdef INET6
 	db_printf(" ip6s            = %d\n", pr->pr_ip6s);
 	for (ii = 0; ii < pr->pr_ip6s; ii++)
 		db_printf(" %s %s\n",
 		    ii == 0 ? "ip6.addr        =" : "                 ",
 		    ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
 #endif
 }
 
 DB_SHOW_COMMAND(prison, db_show_prison_command)
 {
 	struct prison *pr;
 
 	if (!have_addr) {
 		/*
 		 * Show all prisons in the list, and prison0 which is not
 		 * listed.
 		 */
 		db_show_prison(&prison0);
 		if (!db_pager_quit) {
 			TAILQ_FOREACH(pr, &allprison, pr_list) {
 				db_show_prison(pr);
 				if (db_pager_quit)
 					break;
 			}
 		}
 		return;
 	}
 
 	if (addr == 0)
 		pr = &prison0;
 	else {
 		/* Look for a prison with the ID and with references. */
 		TAILQ_FOREACH(pr, &allprison, pr_list)
 			if (pr->pr_id == addr && pr->pr_ref > 0)
 				break;
 		if (pr == NULL)
 			/* Look again, without requiring a reference. */
 			TAILQ_FOREACH(pr, &allprison, pr_list)
 				if (pr->pr_id == addr)
 					break;
 		if (pr == NULL)
 			/* Assume address points to a valid prison. */
 			pr = (struct prison *)addr;
 	}
 	db_show_prison(pr);
 }
 
 #endif /* DDB */
Index: projects/powernv/kern/kern_loginclass.c
===================================================================
--- projects/powernv/kern/kern_loginclass.c	(revision 290990)
+++ projects/powernv/kern/kern_loginclass.c	(revision 290991)
@@ -1,245 +1,250 @@
 /*-
  * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Processes may set login class name using setloginclass(2).  This
  * is usually done through call to setusercontext(3), by programs
  * such as login(1), based on information from master.passwd(5).  Kernel
  * uses this information to enforce per-class resource limits.  Current
  * login class can be determined using id(1).  Login class is inherited
  * from the parent process during fork(2).  If not set, it defaults
  * to "default".
  *
  * Code in this file implements setloginclass(2) and getloginclass(2)
  * system calls, and maintains class name storage and retrieval.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/types.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/racct.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 
 static MALLOC_DEFINE(M_LOGINCLASS, "loginclass", "loginclass structures");
 
 LIST_HEAD(, loginclass)	loginclasses;
 
 /*
  * Lock protecting loginclasses list.
  */
 static struct rwlock loginclasses_lock;
 RW_SYSINIT(loginclasses_init, &loginclasses_lock, "loginclasses lock");
 
 void
 loginclass_hold(struct loginclass *lc)
 {
 
 	refcount_acquire(&lc->lc_refcount);
 }
 
 void
 loginclass_free(struct loginclass *lc)
 {
 	int old;
 
 	old = lc->lc_refcount;
 	if (old > 1 && atomic_cmpset_int(&lc->lc_refcount, old, old - 1))
 		return;
 
 	rw_wlock(&loginclasses_lock);
 	if (!refcount_release(&lc->lc_refcount)) {
 		rw_wunlock(&loginclasses_lock);
 		return;
 	}
 
 	racct_destroy(&lc->lc_racct);
 	LIST_REMOVE(lc, lc_next);
 	rw_wunlock(&loginclasses_lock);
 
 	free(lc, M_LOGINCLASS);
 }
 
 /*
  * Look up a loginclass struct for the parameter name.
  * loginclasses_lock must be locked.
  * Increase refcount on loginclass struct returned.
  */
 static struct loginclass *
 loginclass_lookup(const char *name)
 {
 	struct loginclass *lc;
 
 	rw_assert(&loginclasses_lock, RA_LOCKED);
 	LIST_FOREACH(lc, &loginclasses, lc_next)
 		if (strcmp(name, lc->lc_name) == 0) {
 			loginclass_hold(lc);
 			break;
 		}
 
 	return (lc);
 }
 
 /*
  * Return loginclass structure with a corresponding name.  Not
  * performance critical, as it's used mainly by setloginclass(2),
  * which happens once per login session.  Caller has to use
  * loginclass_free() on the returned value when it's no longer
  * needed.
  */
 struct loginclass *
 loginclass_find(const char *name)
 {
 	struct loginclass *lc, *new_lc;
 
 	if (name[0] == '\0' || strlen(name) >= MAXLOGNAME)
 		return (NULL);
 
 	rw_rlock(&loginclasses_lock);
 	lc = loginclass_lookup(name);
 	rw_runlock(&loginclasses_lock);
 	if (lc != NULL)
 		return (lc);
 
 	new_lc = malloc(sizeof(*new_lc), M_LOGINCLASS, M_ZERO | M_WAITOK);
 	racct_create(&new_lc->lc_racct);
 	refcount_init(&new_lc->lc_refcount, 1);
 	strcpy(new_lc->lc_name, name);
 
 	rw_wlock(&loginclasses_lock);
 	/*
 	 * There's a chance someone created our loginclass while we
 	 * were in malloc and not holding the lock, so we have to
 	 * make sure we don't insert a duplicate loginclass.
 	 */
 	if ((lc = loginclass_lookup(name)) == NULL) {
 		LIST_INSERT_HEAD(&loginclasses, new_lc, lc_next);
 		rw_wunlock(&loginclasses_lock);
 		lc = new_lc;
 	} else {
 		rw_wunlock(&loginclasses_lock);
 		racct_destroy(&new_lc->lc_racct);
 		free(new_lc, M_LOGINCLASS);
 	}
 
 	return (lc);
 }
 
 /*
  * Get login class name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getloginclass_args {
 	char	*namebuf;
 	size_t	namelen;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getloginclass(struct thread *td, struct getloginclass_args *uap)
 {
 	struct loginclass *lc;
 	size_t lcnamelen;
 
 	lc = td->td_ucred->cr_loginclass;
 	lcnamelen = strlen(lc->lc_name) + 1;
 	if (lcnamelen > uap->namelen)
 		return (ERANGE);
 	return (copyout(lc->lc_name, uap->namebuf, lcnamelen));
 }
 
 /*
  * Set login class name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct setloginclass_args {
 	const char	*namebuf;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setloginclass(struct thread *td, struct setloginclass_args *uap)
 {
 	struct proc *p = td->td_proc;
 	int error;
 	char lcname[MAXLOGNAME];
 	struct loginclass *newlc;
 	struct ucred *newcred, *oldcred;
 
 	error = priv_check(td, PRIV_PROC_SETLOGINCLASS);
 	if (error != 0)
 		return (error);
 	error = copyinstr(uap->namebuf, lcname, sizeof(lcname), NULL);
 	if (error != 0)
 		return (error);
 
 	newlc = loginclass_find(lcname);
 	if (newlc == NULL)
 		return (EINVAL);
 	newcred = crget();
 
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 	newcred->cr_loginclass = newlc;
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
 #endif
 	loginclass_free(oldcred->cr_loginclass);
 	crfree(oldcred);
 
 	return (0);
 }
 
 void
 loginclass_racct_foreach(void (*callback)(struct racct *racct,
-    void *arg2, void *arg3), void *arg2, void *arg3)
+    void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+    void *arg2, void *arg3)
 {
 	struct loginclass *lc;
 
 	rw_rlock(&loginclasses_lock);
+	if (pre != NULL)
+		(pre)();
 	LIST_FOREACH(lc, &loginclasses, lc_next)
 		(callback)(lc->lc_racct, arg2, arg3);
+	if (post != NULL)
+		(post)();
 	rw_runlock(&loginclasses_lock);
 }
Index: projects/powernv/kern/kern_racct.c
===================================================================
--- projects/powernv/kern/kern_racct.c	(revision 290990)
+++ projects/powernv/kern/kern_racct.c	(revision 290991)
@@ -1,1300 +1,1316 @@
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/umtx.h>
 #include <machine/smp.h>
 
 #ifdef RCTL
 #include <sys/rctl.h>
 #endif
 
 #ifdef RACCT
 
 FEATURE(racct, "Resource Accounting");
 
 /*
  * Do not block processes that have their %cpu usage <= pcpu_threshold.
  */
 static int pcpu_threshold = 1;
 #ifdef RACCT_DEFAULT_TO_DISABLED
 int racct_enable = 0;
 #else
 int racct_enable = 1;
 #endif
 
 SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting");
 SYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable,
     0, "Enable RACCT/RCTL");
 SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold,
     0, "Processes with higher %cpu usage than this value can be throttled.");
 
 /*
  * How many seconds it takes to use the scheduler %cpu calculations.  When a
  * process starts, we compute its %cpu usage by dividing its runtime by the
  * process wall clock time.  After RACCT_PCPU_SECS pass, we use the value
  * provided by the scheduler.
  */
 #define RACCT_PCPU_SECS		3
 
 static struct mtx racct_lock;
 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
 
 static uma_zone_t racct_zone;
 
 static void racct_sub_racct(struct racct *dest, const struct racct *src);
 static void racct_sub_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
 static void racct_add_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
 
 SDT_PROVIDER_DEFINE(racct);
 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, "struct proc *", "int",
     "uint64_t");
 SDT_PROBE_DEFINE3(racct, kernel, rusage, add__failure,
     "struct proc *", "int", "uint64_t");
 SDT_PROBE_DEFINE3(racct, kernel, rusage, add__cred, "struct ucred *",
     "int", "uint64_t");
 SDT_PROBE_DEFINE3(racct, kernel, rusage, add__force, "struct proc *",
     "int", "uint64_t");
 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, "struct proc *", "int",
     "uint64_t");
 SDT_PROBE_DEFINE3(racct, kernel, rusage, set__failure,
     "struct proc *", "int", "uint64_t");
 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, "struct proc *", "int",
     "uint64_t");
 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub__cred, "struct ucred *",
     "int", "uint64_t");
 SDT_PROBE_DEFINE1(racct, kernel, racct, create, "struct racct *");
 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, "struct racct *");
 SDT_PROBE_DEFINE2(racct, kernel, racct, join, "struct racct *",
     "struct racct *");
 SDT_PROBE_DEFINE2(racct, kernel, racct, join__failure,
     "struct racct *", "struct racct *");
 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, "struct racct *",
     "struct racct *");
 
 int racct_types[] = {
 	[RACCT_CPU] =
 		RACCT_IN_MILLIONS,
 	[RACCT_DATA] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_STACK] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_CORE] =
 		RACCT_DENIABLE,
 	[RACCT_RSS] =
 		RACCT_RECLAIMABLE,
 	[RACCT_MEMLOCK] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
 	[RACCT_NPROC] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
 	[RACCT_NOFILE] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_VMEM] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_NPTS] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_SWAP] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NTHR] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
 	[RACCT_MSGQQUEUED] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_MSGQSIZE] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NMSGQ] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NSEM] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NSEMOP] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_NSHM] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_SHMSIZE] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_WALLCLOCK] =
 		RACCT_IN_MILLIONS,
 	[RACCT_PCTCPU] =
 		RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS };
 
 static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE;
 
 #ifdef SCHED_4BSD
 /*
  * Contains intermediate values for %cpu calculations to avoid using floating
  * point in the kernel.
  * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20)
  * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to
  * zero so the calculations are more straightforward.
  */
 fixpt_t ccpu_exp[] = {
 	[0] = FSCALE * 1,
 	[1] = FSCALE * 0.95122942450071400909,
 	[2] = FSCALE * 0.90483741803595957316,
 	[3] = FSCALE * 0.86070797642505780722,
 	[4] = FSCALE * 0.81873075307798185866,
 	[5] = FSCALE * 0.77880078307140486824,
 	[6] = FSCALE * 0.74081822068171786606,
 	[7] = FSCALE * 0.70468808971871343435,
 	[8] = FSCALE * 0.67032004603563930074,
 	[9] = FSCALE * 0.63762815162177329314,
 	[10] = FSCALE * 0.60653065971263342360,
 	[11] = FSCALE * 0.57694981038048669531,
 	[12] = FSCALE * 0.54881163609402643262,
 	[13] = FSCALE * 0.52204577676101604789,
 	[14] = FSCALE * 0.49658530379140951470,
 	[15] = FSCALE * 0.47236655274101470713,
 	[16] = FSCALE * 0.44932896411722159143,
 	[17] = FSCALE * 0.42741493194872666992,
 	[18] = FSCALE * 0.40656965974059911188,
 	[19] = FSCALE * 0.38674102345450120691,
 	[20] = FSCALE * 0.36787944117144232159,
 	[21] = FSCALE * 0.34993774911115535467,
 	[22] = FSCALE * 0.33287108369807955328,
 	[23] = FSCALE * 0.31663676937905321821,
 	[24] = FSCALE * 0.30119421191220209664,
 	[25] = FSCALE * 0.28650479686019010032,
 	[26] = FSCALE * 0.27253179303401260312,
 	[27] = FSCALE * 0.25924026064589150757,
 	[28] = FSCALE * 0.24659696394160647693,
 	[29] = FSCALE * 0.23457028809379765313,
 	[30] = FSCALE * 0.22313016014842982893,
 	[31] = FSCALE * 0.21224797382674305771,
 	[32] = FSCALE * 0.20189651799465540848,
 	[33] = FSCALE * 0.19204990862075411423,
 	[34] = FSCALE * 0.18268352405273465022,
 	[35] = FSCALE * 0.17377394345044512668,
 	[36] = FSCALE * 0.16529888822158653829,
 	[37] = FSCALE * 0.15723716631362761621,
 	[38] = FSCALE * 0.14956861922263505264,
 	[39] = FSCALE * 0.14227407158651357185,
 	[40] = FSCALE * 0.13533528323661269189,
 	[41] = FSCALE * 0.12873490358780421886,
 	[42] = FSCALE * 0.12245642825298191021,
 	[43] = FSCALE * 0.11648415777349695786,
 	[44] = FSCALE * 0.11080315836233388333,
 	[45] = FSCALE * 0.10539922456186433678,
 	[46] = FSCALE * 0.10025884372280373372,
 	[47] = FSCALE * 0.09536916221554961888,
 	[48] = FSCALE * 0.09071795328941250337,
 	[49] = FSCALE * 0.08629358649937051097,
 	[50] = FSCALE * 0.08208499862389879516,
 	[51] = FSCALE * 0.07808166600115315231,
 	[52] = FSCALE * 0.07427357821433388042,
 	[53] = FSCALE * 0.07065121306042958674,
 	[54] = FSCALE * 0.06720551273974976512,
 	[55] = FSCALE * 0.06392786120670757270,
 	[56] = FSCALE * 0.06081006262521796499,
 	[57] = FSCALE * 0.05784432087483846296,
 	[58] = FSCALE * 0.05502322005640722902,
 	[59] = FSCALE * 0.05233970594843239308,
 	[60] = FSCALE * 0.04978706836786394297,
 	[61] = FSCALE * 0.04735892439114092119,
 	[62] = FSCALE * 0.04504920239355780606,
 	[63] = FSCALE * 0.04285212686704017991,
 	[64] = FSCALE * 0.04076220397836621516,
 	[65] = FSCALE * 0.03877420783172200988,
 	[66] = FSCALE * 0.03688316740124000544,
 	[67] = FSCALE * 0.03508435410084502588,
 	[68] = FSCALE * 0.03337326996032607948,
 	[69] = FSCALE * 0.03174563637806794323,
 	[70] = FSCALE * 0.03019738342231850073,
 	[71] = FSCALE * 0.02872463965423942912,
 	[72] = FSCALE * 0.02732372244729256080,
 	[73] = FSCALE * 0.02599112877875534358,
 	[74] = FSCALE * 0.02472352647033939120,
 	[75] = FSCALE * 0.02351774585600910823,
 	[76] = FSCALE * 0.02237077185616559577,
 	[77] = FSCALE * 0.02127973643837716938,
 	[78] = FSCALE * 0.02024191144580438847,
 	[79] = FSCALE * 0.01925470177538692429,
 	[80] = FSCALE * 0.01831563888873418029,
 	[81] = FSCALE * 0.01742237463949351138,
 	[82] = FSCALE * 0.01657267540176124754,
 	[83] = FSCALE * 0.01576441648485449082,
 	[84] = FSCALE * 0.01499557682047770621,
 	[85] = FSCALE * 0.01426423390899925527,
 	[86] = FSCALE * 0.01356855901220093175,
 	[87] = FSCALE * 0.01290681258047986886,
 	[88] = FSCALE * 0.01227733990306844117,
 	[89] = FSCALE * 0.01167856697039544521,
 	[90] = FSCALE * 0.01110899653824230649,
 	[91] = FSCALE * 0.01056720438385265337,
 	[92] = FSCALE * 0.01005183574463358164,
 	[93] = FSCALE * 0.00956160193054350793,
 	[94] = FSCALE * 0.00909527710169581709,
 	[95] = FSCALE * 0.00865169520312063417,
 	[96] = FSCALE * 0.00822974704902002884,
 	[97] = FSCALE * 0.00782837754922577143,
 	[98] = FSCALE * 0.00744658307092434051,
 	[99] = FSCALE * 0.00708340892905212004,
 	[100] = FSCALE * 0.00673794699908546709,
 	[101] = FSCALE * 0.00640933344625638184,
 	[102] = FSCALE * 0.00609674656551563610,
 	[103] = FSCALE * 0.00579940472684214321,
 	[104] = FSCALE * 0.00551656442076077241,
 	[105] = FSCALE * 0.00524751839918138427,
 	[106] = FSCALE * 0.00499159390691021621,
 	[107] = FSCALE * 0.00474815099941147558,
 	[108] = FSCALE * 0.00451658094261266798,
 	[109] = FSCALE * 0.00429630469075234057,
 	[110] = FSCALE * 0.00408677143846406699,
 };
 #endif
 
 #define	CCPU_EXP_MAX	110
 
 /*
  * This function is analogical to the getpcpu() function in the ps(1) command.
  * They should both calculate in the same way so that the racct %cpu
  * calculations are consistent with the values showed by the ps(1) tool.
  * The calculations are more complex in the 4BSD scheduler because of the value
  * of the ccpu variable.  In ULE it is defined to be zero which saves us some
  * work.
  */
 static uint64_t
 racct_getpcpu(struct proc *p, u_int pcpu)
 {
 	u_int swtime;
 #ifdef SCHED_4BSD
 	fixpt_t pctcpu, pctcpu_next;
 #endif
 #ifdef SMP
 	struct pcpu *pc;
 	int found;
 #endif
 	fixpt_t p_pctcpu;
 	struct thread *td;
 
 	ASSERT_RACCT_ENABLED();
 
 	/*
 	 * If the process is swapped out, we count its %cpu usage as zero.
 	 * This behaviour is consistent with the userland ps(1) tool.
 	 */
 	if ((p->p_flag & P_INMEM) == 0)
 		return (0);
 	swtime = (ticks - p->p_swtick) / hz;
 
 	/*
 	 * For short-lived processes, the sched_pctcpu() returns small
 	 * values even for cpu intensive processes.  Therefore we use
 	 * our own estimate in this case.
 	 */
 	if (swtime < RACCT_PCPU_SECS)
 		return (pcpu);
 
 	p_pctcpu = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td == PCPU_GET(idlethread))
 			continue;
 #ifdef SMP
 		found = 0;
 		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			if (td == pc->pc_idlethread) {
 				found = 1;
 				break;
 			}
 		}
 		if (found)
 			continue;
 #endif
 		thread_lock(td);
 #ifdef SCHED_4BSD
 		pctcpu = sched_pctcpu(td);
 		/* Count also the yet unfinished second. */
 		pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT;
 		pctcpu_next += sched_pctcpu_delta(td);
 		p_pctcpu += max(pctcpu, pctcpu_next);
 #else
 		/*
 		 * In ULE the %cpu statistics are updated on every
 		 * sched_pctcpu() call.  So special calculations to
 		 * account for the latest (unfinished) second are
 		 * not needed.
 		 */
 		p_pctcpu += sched_pctcpu(td);
 #endif
 		thread_unlock(td);
 	}
 
 #ifdef SCHED_4BSD
 	if (swtime <= CCPU_EXP_MAX)
 		return ((100 * (uint64_t)p_pctcpu * 1000000) /
 		    (FSCALE - ccpu_exp[swtime]));
 #endif
 
 	return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE);
 }
 
 static void
 racct_add_racct(struct racct *dest, const struct racct *src)
 {
 	int i;
 
 	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 
 	/*
 	 * Update resource usage in dest.
 	 */
 	for (i = 0; i <= RACCT_MAX; i++) {
 		KASSERT(dest->r_resources[i] >= 0,
 		    ("%s: resource %d propagation meltdown: dest < 0",
 		    __func__, i));
 		KASSERT(src->r_resources[i] >= 0,
 		    ("%s: resource %d propagation meltdown: src < 0",
 		    __func__, i));
 		dest->r_resources[i] += src->r_resources[i];
 	}
 }
 
 static void
 racct_sub_racct(struct racct *dest, const struct racct *src)
 {
 	int i;
 
 	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 
 	/*
 	 * Update resource usage in dest.
 	 */
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) {
 			KASSERT(dest->r_resources[i] >= 0,
 			    ("%s: resource %d propagation meltdown: dest < 0",
 			    __func__, i));
 			KASSERT(src->r_resources[i] >= 0,
 			    ("%s: resource %d propagation meltdown: src < 0",
 			    __func__, i));
 			KASSERT(src->r_resources[i] <= dest->r_resources[i],
 			    ("%s: resource %d propagation meltdown: src > dest",
 			    __func__, i));
 		}
 		if (RACCT_CAN_DROP(i)) {
 			dest->r_resources[i] -= src->r_resources[i];
 			if (dest->r_resources[i] < 0) {
 				KASSERT(RACCT_IS_SLOPPY(i) ||
 				    RACCT_IS_DECAYING(i),
 				    ("%s: resource %d usage < 0", __func__, i));
 				dest->r_resources[i] = 0;
 			}
 		}
 	}
 }
 
 void
 racct_create(struct racct **racctp)
 {
 
 	if (!racct_enable)
 		return;
 
 	SDT_PROBE1(racct, kernel, racct, create, racctp);
 
 	KASSERT(*racctp == NULL, ("racct already allocated"));
 
 	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
 }
 
 static void
 racct_destroy_locked(struct racct **racctp)
 {
 	int i;
 	struct racct *racct;
 
 	ASSERT_RACCT_ENABLED();
 
 	SDT_PROBE1(racct, kernel, racct, destroy, racctp);
 
 	mtx_assert(&racct_lock, MA_OWNED);
 	KASSERT(racctp != NULL, ("NULL racctp"));
 	KASSERT(*racctp != NULL, ("NULL racct"));
 
 	racct = *racctp;
 
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (RACCT_IS_SLOPPY(i))
 			continue;
 		if (!RACCT_IS_RECLAIMABLE(i))
 			continue;
 		KASSERT(racct->r_resources[i] == 0,
 		    ("destroying non-empty racct: "
 		    "%ju allocated for resource %d\n",
 		    racct->r_resources[i], i));
 	}
 	uma_zfree(racct_zone, racct);
 	*racctp = NULL;
 }
 
 void
 racct_destroy(struct racct **racct)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_destroy_locked(racct);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Increase consumption of 'resource' by 'amount' for 'racct'
  * and all its parents.  Differently from other cases, 'amount' here
  * may be less than zero.
  */
 static void
 racct_adjust_resource(struct racct *racct, int resource,
     uint64_t amount)
 {
 
 	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 	KASSERT(racct != NULL, ("NULL racct"));
 
 	racct->r_resources[resource] += amount;
 	if (racct->r_resources[resource] < 0) {
 		KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource),
 		    ("%s: resource %d usage < 0", __func__, resource));
 		racct->r_resources[resource] = 0;
 	}
 	
 	/*
 	 * There are some cases where the racct %cpu resource would grow
 	 * beyond 100% per core.  For example in racct_proc_exit() we add
 	 * the process %cpu usage to the ucred racct containers.  If too
 	 * many processes terminated in a short time span, the ucred %cpu
 	 * resource could grow too much.  Also, the 4BSD scheduler sometimes
 	 * returns for a thread more than 100% cpu usage. So we set a sane
 	 * boundary here to 100% * the maxumum number of CPUs.
 	 */
 	if ((resource == RACCT_PCTCPU) &&
 	    (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU))
 		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU;
 }
 
 static int
 racct_add_locked(struct proc *p, int resource, uint64_t amount)
 {
 #ifdef RCTL
 	int error;
 #endif
 
 	ASSERT_RACCT_ENABLED();
 
 	SDT_PROBE3(racct, kernel, rusage, add, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 #ifdef RCTL
 	error = rctl_enforce(p, resource, amount);
 	if (error && RACCT_IS_DENIABLE(resource)) {
 		SDT_PROBE3(racct, kernel, rusage, add__failure, p, resource,
 		    amount);
 		return (error);
 	}
 #endif
 	racct_adjust_resource(p->p_racct, resource, amount);
 	racct_add_cred_locked(p->p_ucred, resource, amount);
 
 	return (0);
 }
 
 /*
  * Increase allocation of 'resource' by 'amount' for process 'p'.
  * Return 0 if it's below limits, or errno, if it's not.
  */
 int
 racct_add(struct proc *p, int resource, uint64_t amount)
 {
 	int error;
 
 	if (!racct_enable)
 		return (0);
 
 	mtx_lock(&racct_lock);
 	error = racct_add_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
 	return (error);
 }
 
 static void
 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
 {
 	struct prison *pr;
 
 	ASSERT_RACCT_ENABLED();
 
 	SDT_PROBE3(racct, kernel, rusage, add__cred, cred, resource, amount);
 
 	racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
 		racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
 		    amount);
 	racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount);
 }
 
 /*
  * Increase allocation of 'resource' by 'amount' for credential 'cred'.
  * Doesn't check for limits and never fails.
  *
  * XXX: Shouldn't this ever return an error?
  */
 void
 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_add_cred_locked(cred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Increase allocation of 'resource' by 'amount' for process 'p'.
  * Doesn't check for limits and never fails.
  */
 void
 racct_add_force(struct proc *p, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	SDT_PROBE3(racct, kernel, rusage, add__force, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	mtx_lock(&racct_lock);
 	racct_adjust_resource(p->p_racct, resource, amount);
 	mtx_unlock(&racct_lock);
 	racct_add_cred(p->p_ucred, resource, amount);
 }
 
 static int
 racct_set_locked(struct proc *p, int resource, uint64_t amount)
 {
 	int64_t old_amount, decayed_amount;
 	int64_t diff_proc, diff_cred;
 #ifdef RCTL
 	int error;
 #endif
 
 	ASSERT_RACCT_ENABLED();
 
 	SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	old_amount = p->p_racct->r_resources[resource];
 	/*
 	 * The diffs may be negative.
 	 */
 	diff_proc = amount - old_amount;
 	if (RACCT_IS_DECAYING(resource)) {
 		/*
 		 * Resources in per-credential racct containers may decay.
 		 * If this is the case, we need to calculate the difference
 		 * between the new amount and the proportional value of the
 		 * old amount that has decayed in the ucred racct containers.
 		 */
 		decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
 		diff_cred = amount - decayed_amount;
 	} else
 		diff_cred = diff_proc;
 #ifdef notyet
 	KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource),
 	    ("%s: usage of non-droppable resource %d dropping", __func__,
 	     resource));
 #endif
 #ifdef RCTL
 	if (diff_proc > 0) {
 		error = rctl_enforce(p, resource, diff_proc);
 		if (error && RACCT_IS_DENIABLE(resource)) {
 			SDT_PROBE3(racct, kernel, rusage, set__failure, p,
 			    resource, amount);
 			return (error);
 		}
 	}
 #endif
 	racct_adjust_resource(p->p_racct, resource, diff_proc);
 	if (diff_cred > 0)
 		racct_add_cred_locked(p->p_ucred, resource, diff_cred);
 	else if (diff_cred < 0)
 		racct_sub_cred_locked(p->p_ucred, resource, -diff_cred);
 
 	return (0);
 }
 
 /*
  * Set allocation of 'resource' to 'amount' for process 'p'.
  * Return 0 if it's below limits, or errno, if it's not.
  *
  * Note that decreasing the allocation always returns 0,
  * even if it's above the limit.
  */
 int
 racct_set(struct proc *p, int resource, uint64_t amount)
 {
 	int error;
 
 	if (!racct_enable)
 		return (0);
 
 	mtx_lock(&racct_lock);
 	error = racct_set_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
 	return (error);
 }
 
 static void
 racct_set_force_locked(struct proc *p, int resource, uint64_t amount)
 {
 	int64_t old_amount, decayed_amount;
 	int64_t diff_proc, diff_cred;
 
 	ASSERT_RACCT_ENABLED();
 
 	SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	old_amount = p->p_racct->r_resources[resource];
 	/*
 	 * The diffs may be negative.
 	 */
 	diff_proc = amount - old_amount;
 	if (RACCT_IS_DECAYING(resource)) {
 		/*
 		 * Resources in per-credential racct containers may decay.
 		 * If this is the case, we need to calculate the difference
 		 * between the new amount and the proportional value of the
 		 * old amount that has decayed in the ucred racct containers.
 		 */
 		decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
 		diff_cred = amount - decayed_amount;
 	} else
 		diff_cred = diff_proc;
 
 	racct_adjust_resource(p->p_racct, resource, diff_proc);
 	if (diff_cred > 0)
 		racct_add_cred_locked(p->p_ucred, resource, diff_cred);
 	else if (diff_cred < 0)
 		racct_sub_cred_locked(p->p_ucred, resource, -diff_cred);
 }
 
 void
 racct_set_force(struct proc *p, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_set_force_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Returns amount of 'resource' the process 'p' can keep allocated.
  * Allocating more than that would be denied, unless the resource
  * is marked undeniable.  Amount of already allocated resource does
  * not matter.
  */
 uint64_t
 racct_get_limit(struct proc *p, int resource)
 {
 
 	if (!racct_enable)
 		return (UINT64_MAX);
 
 #ifdef RCTL
 	return (rctl_get_limit(p, resource));
 #else
 	return (UINT64_MAX);
 #endif
 }
 
 /*
  * Returns amount of 'resource' the process 'p' can keep allocated.
  * Allocating more than that would be denied, unless the resource
  * is marked undeniable.  Amount of already allocated resource does
  * matter.
  */
 uint64_t
 racct_get_available(struct proc *p, int resource)
 {
 
 	if (!racct_enable)
 		return (UINT64_MAX);
 
 #ifdef RCTL
 	return (rctl_get_available(p, resource));
 #else
 	return (UINT64_MAX);
 #endif
 }
 
 /*
  * Returns amount of the %cpu resource that process 'p' can add to its %cpu
  * utilization.  Adding more than that would lead to the process being
  * throttled.
  */
 static int64_t
 racct_pcpu_available(struct proc *p)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 #ifdef RCTL
 	return (rctl_pcpu_available(p));
 #else
 	return (INT64_MAX);
 #endif
 }
 
 /*
  * Decrease allocation of 'resource' by 'amount' for process 'p'.
  */
 void
 racct_sub(struct proc *p, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	SDT_PROBE3(racct, kernel, rusage, sub, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(RACCT_CAN_DROP(resource),
 	    ("%s: called for non-droppable resource %d", __func__, resource));
 
 	mtx_lock(&racct_lock);
 	KASSERT(amount <= p->p_racct->r_resources[resource],
 	    ("%s: freeing %ju of resource %d, which is more "
 	     "than allocated %jd for %s (pid %d)", __func__, amount, resource,
 	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
 
 	racct_adjust_resource(p->p_racct, resource, -amount);
 	racct_sub_cred_locked(p->p_ucred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 static void
 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
 {
 	struct prison *pr;
 
 	ASSERT_RACCT_ENABLED();
 
 	SDT_PROBE3(racct, kernel, rusage, sub__cred, cred, resource, amount);
 
 #ifdef notyet
 	KASSERT(RACCT_CAN_DROP(resource),
 	    ("%s: called for resource %d which can not drop", __func__,
 	     resource));
 #endif
 
 	racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
 		racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
 		    -amount);
 	racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount);
 }
 
 /*
  * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
  */
 void
 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_sub_cred_locked(cred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Inherit resource usage information from the parent process.
  */
 int
 racct_proc_fork(struct proc *parent, struct proc *child)
 {
 	int i, error = 0;
 
 	if (!racct_enable)
 		return (0);
 
 	/*
 	 * Create racct for the child process.
 	 */
 	racct_create(&child->p_racct);
 
 	PROC_LOCK(parent);
 	PROC_LOCK(child);
 	mtx_lock(&racct_lock);
 
 #ifdef RCTL
 	error = rctl_proc_fork(parent, child);
 	if (error != 0)
 		goto out;
 #endif
 
 	/* Init process cpu time. */
 	child->p_prev_runtime = 0;
 	child->p_throttled = 0;
 
 	/*
 	 * Inherit resource usage.
 	 */
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (parent->p_racct->r_resources[i] == 0 ||
 		    !RACCT_IS_INHERITABLE(i))
 			continue;
 
 		error = racct_set_locked(child, i,
 		    parent->p_racct->r_resources[i]);
 		if (error != 0)
 			goto out;
 	}
 
 	error = racct_add_locked(child, RACCT_NPROC, 1);
 	error += racct_add_locked(child, RACCT_NTHR, 1);
 
 out:
 	mtx_unlock(&racct_lock);
 	PROC_UNLOCK(child);
 	PROC_UNLOCK(parent);
 
 	if (error != 0)
 		racct_proc_exit(child);
 
 	return (error);
 }
 
 /*
  * Called at the end of fork1(), to handle rules that require the process
  * to be fully initialized.
  */
 void
 racct_proc_fork_done(struct proc *child)
 {
 
 #ifdef RCTL
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK(child);
 	mtx_lock(&racct_lock);
 	rctl_enforce(child, RACCT_NPROC, 0);
 	rctl_enforce(child, RACCT_NTHR, 0);
 	mtx_unlock(&racct_lock);
 	PROC_UNLOCK(child);
 #endif
 }
 
 void
 racct_proc_exit(struct proc *p)
 {
 	int i;
 	uint64_t runtime;
 	struct timeval wallclock;
 	uint64_t pct_estimate, pct;
 
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK(p);
 	/*
 	 * We don't need to calculate rux, proc_reap() has already done this.
 	 */
 	runtime = cputick2usec(p->p_rux.rux_runtime);
 #ifdef notyet
 	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
 #else
 	if (runtime < p->p_prev_runtime)
 		runtime = p->p_prev_runtime;
 #endif
 	microuptime(&wallclock);
 	timevalsub(&wallclock, &p->p_stats->p_start);
 	if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
 		pct_estimate = (1000000 * runtime * 100) /
 		    ((uint64_t)wallclock.tv_sec * 1000000 +
 		    wallclock.tv_usec);
 	} else
 		pct_estimate = 0;
 	pct = racct_getpcpu(p, pct_estimate);
 
 	mtx_lock(&racct_lock);
 	racct_set_locked(p, RACCT_CPU, runtime);
 	racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
 
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (p->p_racct->r_resources[i] == 0)
 			continue;
 	    	if (!RACCT_IS_RECLAIMABLE(i))
 			continue;
 		racct_set_locked(p, i, 0);
 	}
 
 	mtx_unlock(&racct_lock);
 	PROC_UNLOCK(p);
 
 #ifdef RCTL
 	rctl_racct_release(p->p_racct);
 #endif
 	racct_destroy(&p->p_racct);
 }
 
 /*
  * Called after credentials change, to move resource utilisation
  * between raccts.
  */
 void
 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
     struct ucred *newcred)
 {
 	struct uidinfo *olduip, *newuip;
 	struct loginclass *oldlc, *newlc;
 	struct prison *oldpr, *newpr, *pr;
 
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 
 	newuip = newcred->cr_ruidinfo;
 	olduip = oldcred->cr_ruidinfo;
 	newlc = newcred->cr_loginclass;
 	oldlc = oldcred->cr_loginclass;
 	newpr = newcred->cr_prison;
 	oldpr = oldcred->cr_prison;
 
 	mtx_lock(&racct_lock);
 	if (newuip != olduip) {
 		racct_sub_racct(olduip->ui_racct, p->p_racct);
 		racct_add_racct(newuip->ui_racct, p->p_racct);
 	}
 	if (newlc != oldlc) {
 		racct_sub_racct(oldlc->lc_racct, p->p_racct);
 		racct_add_racct(newlc->lc_racct, p->p_racct);
 	}
 	if (newpr != oldpr) {
 		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
 			racct_sub_racct(pr->pr_prison_racct->prr_racct,
 			    p->p_racct);
 		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
 			racct_add_racct(pr->pr_prison_racct->prr_racct,
 			    p->p_racct);
 	}
 	mtx_unlock(&racct_lock);
 
 #ifdef RCTL
 	rctl_proc_ucred_changed(p, newcred);
 #endif
 }
 
 void
 racct_move(struct racct *dest, struct racct *src)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	mtx_lock(&racct_lock);
 
 	racct_add_racct(dest, src);
 	racct_sub_racct(src, src);
 
 	mtx_unlock(&racct_lock);
 }
 
 static void
 racct_proc_throttle(struct proc *p)
 {
 	struct thread *td;
 #ifdef SMP
 	int cpuid;
 #endif
 
 	ASSERT_RACCT_ENABLED();
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * Do not block kernel processes.  Also do not block processes with
 	 * low %cpu utilization to improve interactivity.
 	 */
 	if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) ||
 	    (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold))
 		return;
 	p->p_throttled = 1;
 
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		switch (td->td_state) {
 		case TDS_RUNQ:
 			/*
 			 * If the thread is on the scheduler run-queue, we can
 			 * not just remove it from there.  So we set the flag
 			 * TDF_NEEDRESCHED for the thread, so that once it is
 			 * running, it is taken off the cpu as soon as possible.
 			 */
 			td->td_flags |= TDF_NEEDRESCHED;
 			break;
 		case TDS_RUNNING:
 			/*
 			 * If the thread is running, we request a context
 			 * switch for it by setting the TDF_NEEDRESCHED flag.
 			 */
 			td->td_flags |= TDF_NEEDRESCHED;
 #ifdef SMP
 			cpuid = td->td_oncpu;
 			if ((cpuid != NOCPU) && (td != curthread))
 				ipi_cpu(cpuid, IPI_AST);
 #endif
 			break;
 		default:
 			break;
 		}
 		thread_unlock(td);
 	}
 }
 
 static void
 racct_proc_wakeup(struct proc *p)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (p->p_throttled) {
 		p->p_throttled = 0;
 		wakeup(p->p_racct);
 	}
 }
 
 static void
 racct_decay_resource(struct racct *racct, void * res, void* dummy)
 {
 	int resource;
 	int64_t r_old, r_new;
 
 	ASSERT_RACCT_ENABLED();
+	mtx_assert(&racct_lock, MA_OWNED);
 
 	resource = *(int *)res;
 	r_old = racct->r_resources[resource];
 
 	/* If there is nothing to decay, just exit. */
 	if (r_old <= 0)
 		return;
 
-	mtx_lock(&racct_lock);
 	r_new = r_old * RACCT_DECAY_FACTOR / FSCALE;
 	racct->r_resources[resource] = r_new;
+}
+
+static void
+racct_decay_pre(void)
+{
+
+	mtx_lock(&racct_lock);
+}
+
+static void
+racct_decay_post(void)
+{
+
 	mtx_unlock(&racct_lock);
 }
 
 static void
 racct_decay(int resource)
 {
 
 	ASSERT_RACCT_ENABLED();
 
-	ui_racct_foreach(racct_decay_resource, &resource, NULL);
-	loginclass_racct_foreach(racct_decay_resource, &resource, NULL);
-	prison_racct_foreach(racct_decay_resource, &resource, NULL);
+	ui_racct_foreach(racct_decay_resource, racct_decay_pre,
+	    racct_decay_post, &resource, NULL);
+	loginclass_racct_foreach(racct_decay_resource, racct_decay_pre,
+	    racct_decay_post, &resource, NULL);
+	prison_racct_foreach(racct_decay_resource, racct_decay_pre,
+	    racct_decay_post, &resource, NULL);
 }
 
 static void
 racctd(void)
 {
 	struct thread *td;
 	struct proc *p;
 	struct timeval wallclock;
 	uint64_t runtime;
 	uint64_t pct, pct_estimate;
 
 	ASSERT_RACCT_ENABLED();
 
 	for (;;) {
 		racct_decay(RACCT_PCTCPU);
 
 		sx_slock(&allproc_lock);
 
 		LIST_FOREACH(p, &zombproc, p_list) {
 			PROC_LOCK(p);
 			racct_set(p, RACCT_PCTCPU, 0);
 			PROC_UNLOCK(p);
 		}
 
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 
 			microuptime(&wallclock);
 			timevalsub(&wallclock, &p->p_stats->p_start);
 			PROC_STATLOCK(p);
 			FOREACH_THREAD_IN_PROC(p, td)
 				ruxagg(p, td);
 			runtime = cputick2usec(p->p_rux.rux_runtime);
 			PROC_STATUNLOCK(p);
 #ifdef notyet
 			KASSERT(runtime >= p->p_prev_runtime,
 			    ("runtime < p_prev_runtime"));
 #else
 			if (runtime < p->p_prev_runtime)
 				runtime = p->p_prev_runtime;
 #endif
 			p->p_prev_runtime = runtime;
 			if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
 				pct_estimate = (1000000 * runtime * 100) /
 				    ((uint64_t)wallclock.tv_sec * 1000000 +
 				    wallclock.tv_usec);
 			} else
 				pct_estimate = 0;
 			pct = racct_getpcpu(p, pct_estimate);
 			mtx_lock(&racct_lock);
 			racct_set_force_locked(p, RACCT_PCTCPU, pct);
 			racct_set_locked(p, RACCT_CPU, runtime);
 			racct_set_locked(p, RACCT_WALLCLOCK,
 			    (uint64_t)wallclock.tv_sec * 1000000 +
 			    wallclock.tv_usec);
 			mtx_unlock(&racct_lock);
 			PROC_UNLOCK(p);
 		}
 
 		/*
 		 * To ensure that processes are throttled in a fair way, we need
 		 * to iterate over all processes again and check the limits
 		 * for %cpu resource only after ucred racct containers have been
 		 * properly filled.
 		 */
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 
 			if (racct_pcpu_available(p) <= 0)
 				racct_proc_throttle(p);
 			else if (p->p_throttled)
 				racct_proc_wakeup(p);
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 		pause("-", hz);
 	}
 }
 
 static struct kproc_desc racctd_kp = {
 	"racctd",
 	racctd,
 	NULL
 };
 
 static void
 racctd_init(void)
 {
 	if (!racct_enable)
 		return;
 
 	kproc_start(&racctd_kp);
 }
 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);
 
 static void
 racct_init(void)
 {
 	if (!racct_enable)
 		return;
 
 	racct_zone = uma_zcreate("racct", sizeof(struct racct),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	/*
 	 * XXX: Move this somewhere.
 	 */
 	prison0.pr_prison_racct = prison_racct_find("0");
 }
 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
 
 #endif /* !RACCT */
Index: projects/powernv/kern/kern_rctl.c
===================================================================
--- projects/powernv/kern/kern_rctl.c	(revision 290990)
+++ projects/powernv/kern/kern_rctl.c	(revision 290991)
@@ -1,1951 +1,1972 @@
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/refcount.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/loginclass.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/rctl.h>
 #include <sys/resourcevar.h>
 #include <sys/sx.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/types.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <vm/uma.h>
 
 #ifdef RCTL
 #ifndef RACCT
 #error "The RCTL option requires the RACCT option"
 #endif
 
 FEATURE(rctl, "Resource Limits");
 
 #define	HRF_DEFAULT		0
 #define	HRF_DONT_INHERIT	1
 #define	HRF_DONT_ACCUMULATE	2
 
 #define	RCTL_MAX_INBUFSIZE	4 * 1024
 #define	RCTL_MAX_OUTBUFSIZE	16 * 1024 * 1024
 #define	RCTL_LOG_BUFSIZE	128
 
 #define	RCTL_PCPU_SHIFT		(10 * 1000000)
 
 unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
 
 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
     &rctl_maxbufsize, 0, "Maximum output buffer size");
 
 /*
  * 'rctl_rule_link' connects a rule with every racct it's related to.
  * For example, rule 'user:X:openfiles:deny=N/process' is linked
  * with uidinfo for user X, and to each process of that user.
  */
 struct rctl_rule_link {
 	LIST_ENTRY(rctl_rule_link)	rrl_next;
 	struct rctl_rule		*rrl_rule;
 	int				rrl_exceeded;
 };
 
 struct dict {
 	const char	*d_name;
 	int		d_value;
 };
 
 static struct dict subjectnames[] = {
 	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
 	{ "user", RCTL_SUBJECT_TYPE_USER },
 	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
 	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
 	{ NULL, -1 }};
 
 static struct dict resourcenames[] = {
 	{ "cputime", RACCT_CPU },
 	{ "datasize", RACCT_DATA },
 	{ "stacksize", RACCT_STACK },
 	{ "coredumpsize", RACCT_CORE },
 	{ "memoryuse", RACCT_RSS },
 	{ "memorylocked", RACCT_MEMLOCK },
 	{ "maxproc", RACCT_NPROC },
 	{ "openfiles", RACCT_NOFILE },
 	{ "vmemoryuse", RACCT_VMEM },
 	{ "pseudoterminals", RACCT_NPTS },
 	{ "swapuse", RACCT_SWAP },
 	{ "nthr", RACCT_NTHR },
 	{ "msgqqueued", RACCT_MSGQQUEUED },
 	{ "msgqsize", RACCT_MSGQSIZE },
 	{ "nmsgq", RACCT_NMSGQ },
 	{ "nsem", RACCT_NSEM },
 	{ "nsemop", RACCT_NSEMOP },
 	{ "nshm", RACCT_NSHM },
 	{ "shmsize", RACCT_SHMSIZE },
 	{ "wallclock", RACCT_WALLCLOCK },
 	{ "pcpu", RACCT_PCTCPU },
 	{ NULL, -1 }};
 
 static struct dict actionnames[] = {
 	{ "sighup", RCTL_ACTION_SIGHUP },
 	{ "sigint", RCTL_ACTION_SIGINT },
 	{ "sigquit", RCTL_ACTION_SIGQUIT },
 	{ "sigill", RCTL_ACTION_SIGILL },
 	{ "sigtrap", RCTL_ACTION_SIGTRAP },
 	{ "sigabrt", RCTL_ACTION_SIGABRT },
 	{ "sigemt", RCTL_ACTION_SIGEMT },
 	{ "sigfpe", RCTL_ACTION_SIGFPE },
 	{ "sigkill", RCTL_ACTION_SIGKILL },
 	{ "sigbus", RCTL_ACTION_SIGBUS },
 	{ "sigsegv", RCTL_ACTION_SIGSEGV },
 	{ "sigsys", RCTL_ACTION_SIGSYS },
 	{ "sigpipe", RCTL_ACTION_SIGPIPE },
 	{ "sigalrm", RCTL_ACTION_SIGALRM },
 	{ "sigterm", RCTL_ACTION_SIGTERM },
 	{ "sigurg", RCTL_ACTION_SIGURG },
 	{ "sigstop", RCTL_ACTION_SIGSTOP },
 	{ "sigtstp", RCTL_ACTION_SIGTSTP },
 	{ "sigchld", RCTL_ACTION_SIGCHLD },
 	{ "sigttin", RCTL_ACTION_SIGTTIN },
 	{ "sigttou", RCTL_ACTION_SIGTTOU },
 	{ "sigio", RCTL_ACTION_SIGIO },
 	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
 	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
 	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
 	{ "sigprof", RCTL_ACTION_SIGPROF },
 	{ "sigwinch", RCTL_ACTION_SIGWINCH },
 	{ "siginfo", RCTL_ACTION_SIGINFO },
 	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
 	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
 	{ "sigthr", RCTL_ACTION_SIGTHR },
 	{ "deny", RCTL_ACTION_DENY },
 	{ "log", RCTL_ACTION_LOG },
 	{ "devctl", RCTL_ACTION_DEVCTL },
 	{ NULL, -1 }};
 
 static void rctl_init(void);
 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
 
 static uma_zone_t rctl_rule_link_zone;
 static uma_zone_t rctl_rule_zone;
 static struct rwlock rctl_lock;
 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
 
 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
 
 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
 
 static const char *
 rctl_subject_type_name(int subject)
 {
 	int i;
 
 	for (i = 0; subjectnames[i].d_name != NULL; i++) {
 		if (subjectnames[i].d_value == subject)
 			return (subjectnames[i].d_name);
 	}
 
 	panic("rctl_subject_type_name: unknown subject type %d", subject);
 }
 
 static const char *
 rctl_action_name(int action)
 {
 	int i;
 
 	for (i = 0; actionnames[i].d_name != NULL; i++) {
 		if (actionnames[i].d_value == action)
 			return (actionnames[i].d_name);
 	}
 
 	panic("rctl_action_name: unknown action %d", action);
 }
 
 const char *
 rctl_resource_name(int resource)
 {
 	int i;
 
 	for (i = 0; resourcenames[i].d_name != NULL; i++) {
 		if (resourcenames[i].d_value == resource)
 			return (resourcenames[i].d_name);
 	}
 
 	panic("rctl_resource_name: unknown resource %d", resource);
 }
 
 /*
  * Return the amount of resource that can be allocated by 'p' before
  * hitting 'rule'.
  */
 static int64_t
 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
 {
 	int resource;
 	int64_t available = INT64_MAX;
 	struct ucred *cred = p->p_ucred;
 
 	ASSERT_RACCT_ENABLED();
 	rw_assert(&rctl_lock, RA_LOCKED);
 
 	resource = rule->rr_resource;
 	switch (rule->rr_per) {
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		available = rule->rr_amount -
 		    p->p_racct->r_resources[resource];
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		available = rule->rr_amount -
 		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		available = rule->rr_amount -
 		    cred->cr_loginclass->lc_racct->r_resources[resource];
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		available = rule->rr_amount -
 		    cred->cr_prison->pr_prison_racct->prr_racct->
 		        r_resources[resource];
 		break;
 	default:
 		panic("rctl_compute_available: unknown per %d",
 		    rule->rr_per);
 	}
 
 	return (available);
 }
 
 /*
  * Return non-zero if allocating 'amount' by proc 'p' would exceed
  * resource limit specified by 'rule'.
  */
 static int
 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
     int64_t amount)
 {
 	int64_t available;
 
 	ASSERT_RACCT_ENABLED();
 
 	rw_assert(&rctl_lock, RA_LOCKED);
 
 	available = rctl_available_resource(p, rule);
 	if (available >= amount)
 		return (0);
 
 	return (1);
 }
 
 /*
  * Special version of rctl_available() function for the %cpu resource.
  * We slightly cheat here and return less than we normally would.
  */
 int64_t
 rctl_pcpu_available(const struct proc *p) {
 	struct rctl_rule *rule;
 	struct rctl_rule_link *link;
 	int64_t available, minavailable, limit;
 
 	ASSERT_RACCT_ENABLED();
 
 	minavailable = INT64_MAX;
 	limit = 0;
 
 	rw_rlock(&rctl_lock);
 
 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 		rule = link->rrl_rule;
 		if (rule->rr_resource != RACCT_PCTCPU)
 			continue;
 		if (rule->rr_action != RCTL_ACTION_DENY)
 			continue;
 		available = rctl_available_resource(p, rule);
 		if (available < minavailable) {
 			minavailable = available;
 			limit = rule->rr_amount;
 		}
 	}
 
 	rw_runlock(&rctl_lock);
 
 	/*
 	 * Return slightly less than actual value of the available
 	 * %cpu resource.  This makes %cpu throttling more agressive
 	 * and lets us act sooner than the limits are already exceeded.
 	 */
 	if (limit != 0) {
 		if (limit > 2 * RCTL_PCPU_SHIFT)
 			minavailable -= RCTL_PCPU_SHIFT;
 		else
 			minavailable -= (limit / 2);
 	}
 
 	return (minavailable);
 }
 
 /*
  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
  * to what it keeps allocated now.  Returns non-zero if the allocation should
  * be denied, 0 otherwise.
  */
 int
 rctl_enforce(struct proc *p, int resource, uint64_t amount)
 {
 	struct rctl_rule *rule;
 	struct rctl_rule_link *link;
 	struct sbuf sb;
 	int should_deny = 0;
 	char *buf;
 	static int curtime = 0;
 	static struct timeval lasttime;
 
 	ASSERT_RACCT_ENABLED();
 
 	rw_rlock(&rctl_lock);
 
 	/*
 	 * There may be more than one matching rule; go through all of them.
 	 * Denial should be done last, after logging and sending signals.
 	 */
 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 		rule = link->rrl_rule;
 		if (rule->rr_resource != resource)
 			continue;
 		if (!rctl_would_exceed(p, rule, amount)) {
 			link->rrl_exceeded = 0;
 			continue;
 		}
 
 		switch (rule->rr_action) {
 		case RCTL_ACTION_DENY:
 			should_deny = 1;
 			continue;
 		case RCTL_ACTION_LOG:
 			/*
 			 * If rrl_exceeded != 0, it means we've already
 			 * logged a warning for this process.
 			 */
 			if (link->rrl_exceeded != 0)
 				continue;
 
 			/*
 			 * If the process state is not fully initialized yet,
 			 * we can't access most of the required fields, e.g.
 			 * p->p_comm.  This happens when called from fork1().
 			 * Ignore this rule for now; it will be processed just
 			 * after fork, when called from racct_proc_fork_done().
 			 */
 			if (p->p_state != PRS_NORMAL)
 				continue;
 
 			if (!ppsratecheck(&lasttime, &curtime, 10))
 				continue;
 
 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
 			if (buf == NULL) {
 				printf("rctl_enforce: out of memory\n");
 				continue;
 			}
 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
 			rctl_rule_to_sbuf(&sb, rule);
 			sbuf_finish(&sb);
 			printf("rctl: rule \"%s\" matched by pid %d "
 			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
 			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
 			sbuf_delete(&sb);
 			free(buf, M_RCTL);
 			link->rrl_exceeded = 1;
 			continue;
 		case RCTL_ACTION_DEVCTL:
 			if (link->rrl_exceeded != 0)
 				continue;
 
 			if (p->p_state != PRS_NORMAL)
 				continue;
 	
 			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
 			if (buf == NULL) {
 				printf("rctl_enforce: out of memory\n");
 				continue;
 			}
 			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
 			sbuf_printf(&sb, "rule=");
 			rctl_rule_to_sbuf(&sb, rule);
 			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
 			    p->p_pid, p->p_ucred->cr_ruid,
 			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
 			sbuf_finish(&sb);
 			devctl_notify_f("RCTL", "rule", "matched",
 			    sbuf_data(&sb), M_NOWAIT);
 			sbuf_delete(&sb);
 			free(buf, M_RCTL);
 			link->rrl_exceeded = 1;
 			continue;
 		default:
 			if (link->rrl_exceeded != 0)
 				continue;
 
 			if (p->p_state != PRS_NORMAL)
 				continue;
 
 			KASSERT(rule->rr_action > 0 &&
 			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
 			    ("rctl_enforce: unknown action %d",
 			     rule->rr_action));
 
 			/*
 			 * We're using the fact that RCTL_ACTION_SIG* values
 			 * are equal to their counterparts from sys/signal.h.
 			 */
 			kern_psignal(p, rule->rr_action);
 			link->rrl_exceeded = 1;
 			continue;
 		}
 	}
 
 	rw_runlock(&rctl_lock);
 
 	if (should_deny) {
 		/*
 		 * Return fake error code; the caller should change it
 		 * into one proper for the situation - EFSIZ, ENOMEM etc.
 		 */
 		return (EDOOFUS);
 	}
 
 	return (0);
 }
 
 uint64_t
 rctl_get_limit(struct proc *p, int resource)
 {
 	struct rctl_rule *rule;
 	struct rctl_rule_link *link;
 	uint64_t amount = UINT64_MAX;
 
 	ASSERT_RACCT_ENABLED();
 
 	rw_rlock(&rctl_lock);
 
 	/*
 	 * There may be more than one matching rule; go through all of them.
 	 * Denial should be done last, after logging and sending signals.
 	 */
 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 		rule = link->rrl_rule;
 		if (rule->rr_resource != resource)
 			continue;
 		if (rule->rr_action != RCTL_ACTION_DENY)
 			continue;
 		if (rule->rr_amount < amount)
 			amount = rule->rr_amount;
 	}
 
 	rw_runlock(&rctl_lock);
 
 	return (amount);
 }
 
 uint64_t
 rctl_get_available(struct proc *p, int resource)
 {
 	struct rctl_rule *rule;
 	struct rctl_rule_link *link;
 	int64_t available, minavailable, allocated;
 
 	minavailable = INT64_MAX;
 
 	ASSERT_RACCT_ENABLED();
 
 	rw_rlock(&rctl_lock);
 
 	/*
 	 * There may be more than one matching rule; go through all of them.
 	 * Denial should be done last, after logging and sending signals.
 	 */
 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 		rule = link->rrl_rule;
 		if (rule->rr_resource != resource)
 			continue;
 		if (rule->rr_action != RCTL_ACTION_DENY)
 			continue;
 		available = rctl_available_resource(p, rule);
 		if (available < minavailable)
 			minavailable = available;
 	}
 
 	rw_runlock(&rctl_lock);
 
 	/*
 	 * XXX: Think about this _hard_.
 	 */
 	allocated = p->p_racct->r_resources[resource];
 	if (minavailable < INT64_MAX - allocated)
 		minavailable += allocated;
 	if (minavailable < 0)
 		minavailable = 0;
 	return (minavailable);
 }
 
 static int
 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
 		if (rule->rr_subject_type != filter->rr_subject_type)
 			return (0);
 
 		switch (filter->rr_subject_type) {
 		case RCTL_SUBJECT_TYPE_PROCESS:
 			if (filter->rr_subject.rs_proc != NULL &&
 			    rule->rr_subject.rs_proc !=
 			    filter->rr_subject.rs_proc)
 				return (0);
 			break;
 		case RCTL_SUBJECT_TYPE_USER:
 			if (filter->rr_subject.rs_uip != NULL &&
 			    rule->rr_subject.rs_uip !=
 			    filter->rr_subject.rs_uip)
 				return (0);
 			break;
 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
 			if (filter->rr_subject.rs_loginclass != NULL &&
 			    rule->rr_subject.rs_loginclass !=
 			    filter->rr_subject.rs_loginclass)
 				return (0);
 			break;
 		case RCTL_SUBJECT_TYPE_JAIL:
 			if (filter->rr_subject.rs_prison_racct != NULL &&
 			    rule->rr_subject.rs_prison_racct !=
 			    filter->rr_subject.rs_prison_racct)
 				return (0);
 			break;
 		default:
 			panic("rctl_rule_matches: unknown subject type %d",
 			    filter->rr_subject_type);
 		}
 	}
 
 	if (filter->rr_resource != RACCT_UNDEFINED) {
 		if (rule->rr_resource != filter->rr_resource)
 			return (0);
 	}
 
 	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
 		if (rule->rr_action != filter->rr_action)
 			return (0);
 	}
 
 	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
 		if (rule->rr_amount != filter->rr_amount)
 			return (0);
 	}
 
 	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
 		if (rule->rr_per != filter->rr_per)
 			return (0);
 	}
 
 	return (1);
 }
 
 static int
 str2value(const char *str, int *value, struct dict *table)
 {
 	int i;
 
 	if (value == NULL)
 		return (EINVAL);
 
 	for (i = 0; table[i].d_name != NULL; i++) {
 		if (strcasecmp(table[i].d_name, str) == 0) {
 			*value =  table[i].d_value;
 			return (0);
 		}
 	}
 
 	return (EINVAL);
 }
 
 static int
 str2id(const char *str, id_t *value)
 {
 	char *end;
 
 	if (str == NULL)
 		return (EINVAL);
 
 	*value = strtoul(str, &end, 10);
 	if ((size_t)(end - str) != strlen(str))
 		return (EINVAL);
 
 	return (0);
 }
 
 static int
 str2int64(const char *str, int64_t *value)
 {
 	char *end;
 
 	if (str == NULL)
 		return (EINVAL);
 
 	*value = strtoul(str, &end, 10);
 	if ((size_t)(end - str) != strlen(str))
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Connect the rule to the racct, increasing refcount for the rule.
  */
 static void
 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
 {
 	struct rctl_rule_link *link;
 
 	ASSERT_RACCT_ENABLED();
 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 
 	rctl_rule_acquire(rule);
 	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
 	link->rrl_rule = rule;
 	link->rrl_exceeded = 0;
 
 	rw_wlock(&rctl_lock);
 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
 	rw_wunlock(&rctl_lock);
 }
 
 static int
 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
 {
 	struct rctl_rule_link *link;
 
 	ASSERT_RACCT_ENABLED();
 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 	rw_assert(&rctl_lock, RA_WLOCKED);
 
 	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
 	if (link == NULL)
 		return (ENOMEM);
 	rctl_rule_acquire(rule);
 	link->rrl_rule = rule;
 	link->rrl_exceeded = 0;
 
 	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
 	return (0);
 }
 
 /*
  * Remove limits for a rules matching the filter and release
  * the refcounts for the rules, possibly freeing them.  Returns
  * the number of limit structures removed.
  */
 static int
 rctl_racct_remove_rules(struct racct *racct,
     const struct rctl_rule *filter)
 {
 	int removed = 0;
 	struct rctl_rule_link *link, *linktmp;
 
 	ASSERT_RACCT_ENABLED();
 	rw_assert(&rctl_lock, RA_WLOCKED);
 
 	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
 		if (!rctl_rule_matches(link->rrl_rule, filter))
 			continue;
 
 		LIST_REMOVE(link, rrl_next);
 		rctl_rule_release(link->rrl_rule);
 		uma_zfree(rctl_rule_link_zone, link);
 		removed++;
 	}
 	return (removed);
 }
 
 static void
 rctl_rule_acquire_subject(struct rctl_rule *rule)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_UNDEFINED:
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		if (rule->rr_subject.rs_prison_racct != NULL)
 			prison_racct_hold(rule->rr_subject.rs_prison_racct);
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		if (rule->rr_subject.rs_uip != NULL)
 			uihold(rule->rr_subject.rs_uip);
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		if (rule->rr_subject.rs_loginclass != NULL)
 			loginclass_hold(rule->rr_subject.rs_loginclass);
 		break;
 	default:
 		panic("rctl_rule_acquire_subject: unknown subject type %d",
 		    rule->rr_subject_type);
 	}
 }
 
 static void
 rctl_rule_release_subject(struct rctl_rule *rule)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_UNDEFINED:
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		if (rule->rr_subject.rs_prison_racct != NULL)
 			prison_racct_free(rule->rr_subject.rs_prison_racct);
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		if (rule->rr_subject.rs_uip != NULL)
 			uifree(rule->rr_subject.rs_uip);
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		if (rule->rr_subject.rs_loginclass != NULL)
 			loginclass_free(rule->rr_subject.rs_loginclass);
 		break;
 	default:
 		panic("rctl_rule_release_subject: unknown subject type %d",
 		    rule->rr_subject_type);
 	}
 }
 
 struct rctl_rule *
 rctl_rule_alloc(int flags)
 {
 	struct rctl_rule *rule;
 
 	ASSERT_RACCT_ENABLED();
 
 	rule = uma_zalloc(rctl_rule_zone, flags);
 	if (rule == NULL)
 		return (NULL);
 	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 	rule->rr_subject.rs_proc = NULL;
 	rule->rr_subject.rs_uip = NULL;
 	rule->rr_subject.rs_loginclass = NULL;
 	rule->rr_subject.rs_prison_racct = NULL;
 	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 	rule->rr_resource = RACCT_UNDEFINED;
 	rule->rr_action = RCTL_ACTION_UNDEFINED;
 	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 	refcount_init(&rule->rr_refcount, 1);
 
 	return (rule);
 }
 
 struct rctl_rule *
 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
 {
 	struct rctl_rule *copy;
 
 	ASSERT_RACCT_ENABLED();
 
 	copy = uma_zalloc(rctl_rule_zone, flags);
 	if (copy == NULL)
 		return (NULL);
 	copy->rr_subject_type = rule->rr_subject_type;
 	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
 	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
 	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
 	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
 	copy->rr_per = rule->rr_per;
 	copy->rr_resource = rule->rr_resource;
 	copy->rr_action = rule->rr_action;
 	copy->rr_amount = rule->rr_amount;
 	refcount_init(&copy->rr_refcount, 1);
 	rctl_rule_acquire_subject(copy);
 
 	return (copy);
 }
 
 void
 rctl_rule_acquire(struct rctl_rule *rule)
 {
 
 	ASSERT_RACCT_ENABLED();
 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 
 	refcount_acquire(&rule->rr_refcount);
 }
 
 static void
 rctl_rule_free(void *context, int pending)
 {
 	struct rctl_rule *rule;
 	
 	rule = (struct rctl_rule *)context;
 
 	ASSERT_RACCT_ENABLED();
 	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
 	
 	/*
 	 * We don't need locking here; rule is guaranteed to be inaccessible.
 	 */
 	
 	rctl_rule_release_subject(rule);
 	uma_zfree(rctl_rule_zone, rule);
 }
 
 void
 rctl_rule_release(struct rctl_rule *rule)
 {
 
 	ASSERT_RACCT_ENABLED();
 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 
 	if (refcount_release(&rule->rr_refcount)) {
 		/*
 		 * rctl_rule_release() is often called when iterating
 		 * over all the uidinfo structures in the system,
 		 * holding uihashtbl_lock.  Since rctl_rule_free()
 		 * might end up calling uifree(), this would lead
 		 * to lock recursion.  Use taskqueue to avoid this.
 		 */
 		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
 		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
 	}
 }
 
 static int
 rctl_rule_fully_specified(const struct rctl_rule *rule)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_UNDEFINED:
 		return (0);
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		if (rule->rr_subject.rs_proc == NULL)
 			return (0);
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		if (rule->rr_subject.rs_uip == NULL)
 			return (0);
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		if (rule->rr_subject.rs_loginclass == NULL)
 			return (0);
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		if (rule->rr_subject.rs_prison_racct == NULL)
 			return (0);
 		break;
 	default:
 		panic("rctl_rule_fully_specified: unknown subject type %d",
 		    rule->rr_subject_type);
 	}
 	if (rule->rr_resource == RACCT_UNDEFINED)
 		return (0);
 	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
 		return (0);
 	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
 		return (0);
 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
 		return (0);
 
 	return (1);
 }
 
 static int
 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
 {
 	int error = 0;
 	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
 	     *amountstr, *perstr;
 	struct rctl_rule *rule;
 	id_t id;
 
 	ASSERT_RACCT_ENABLED();
 
 	rule = rctl_rule_alloc(M_WAITOK);
 
 	subjectstr = strsep(&rulestr, ":");
 	subject_idstr = strsep(&rulestr, ":");
 	resourcestr = strsep(&rulestr, ":");
 	actionstr = strsep(&rulestr, "=/");
 	amountstr = strsep(&rulestr, "/");
 	perstr = rulestr;
 
 	if (subjectstr == NULL || subjectstr[0] == '\0')
 		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 	else {
 		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
 		if (error != 0)
 			goto out;
 	}
 
 	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
 		rule->rr_subject.rs_proc = NULL;
 		rule->rr_subject.rs_uip = NULL;
 		rule->rr_subject.rs_loginclass = NULL;
 		rule->rr_subject.rs_prison_racct = NULL;
 	} else {
 		switch (rule->rr_subject_type) {
 		case RCTL_SUBJECT_TYPE_UNDEFINED:
 			error = EINVAL;
 			goto out;
 		case RCTL_SUBJECT_TYPE_PROCESS:
 			error = str2id(subject_idstr, &id);
 			if (error != 0)
 				goto out;
 			sx_assert(&allproc_lock, SA_LOCKED);
 			rule->rr_subject.rs_proc = pfind(id);
 			if (rule->rr_subject.rs_proc == NULL) {
 				error = ESRCH;
 				goto out;
 			}
 			PROC_UNLOCK(rule->rr_subject.rs_proc);
 			break;
 		case RCTL_SUBJECT_TYPE_USER:
 			error = str2id(subject_idstr, &id);
 			if (error != 0)
 				goto out;
 			rule->rr_subject.rs_uip = uifind(id);
 			break;
 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
 			rule->rr_subject.rs_loginclass =
 			    loginclass_find(subject_idstr);
 			if (rule->rr_subject.rs_loginclass == NULL) {
 				error = ENAMETOOLONG;
 				goto out;
 			}
 			break;
 		case RCTL_SUBJECT_TYPE_JAIL:
 			rule->rr_subject.rs_prison_racct =
 			    prison_racct_find(subject_idstr);
 			if (rule->rr_subject.rs_prison_racct == NULL) {
 				error = ENAMETOOLONG;
 				goto out;
 			}
 			break;
                default:
                        panic("rctl_string_to_rule: unknown subject type %d",
                            rule->rr_subject_type);
                }
 	}
 
 	if (resourcestr == NULL || resourcestr[0] == '\0')
 		rule->rr_resource = RACCT_UNDEFINED;
 	else {
 		error = str2value(resourcestr, &rule->rr_resource,
 		    resourcenames);
 		if (error != 0)
 			goto out;
 	}
 
 	if (actionstr == NULL || actionstr[0] == '\0')
 		rule->rr_action = RCTL_ACTION_UNDEFINED;
 	else {
 		error = str2value(actionstr, &rule->rr_action, actionnames);
 		if (error != 0)
 			goto out;
 	}
 
 	if (amountstr == NULL || amountstr[0] == '\0')
 		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 	else {
 		error = str2int64(amountstr, &rule->rr_amount);
 		if (error != 0)
 			goto out;
 		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
 			rule->rr_amount *= 1000000;
 	}
 
 	if (perstr == NULL || perstr[0] == '\0')
 		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 	else {
 		error = str2value(perstr, &rule->rr_per, subjectnames);
 		if (error != 0)
 			goto out;
 	}
 
 out:
 	if (error == 0)
 		*rulep = rule;
 	else
 		rctl_rule_release(rule);
 
 	return (error);
 }
 
 /*
  * Link a rule with all the subjects it applies to.
  */
 int
 rctl_rule_add(struct rctl_rule *rule)
 {
 	struct proc *p;
 	struct ucred *cred;
 	struct uidinfo *uip;
 	struct prison *pr;
 	struct prison_racct *prr;
 	struct loginclass *lc;
 	struct rctl_rule *rule2;
 	int match;
 
 	ASSERT_RACCT_ENABLED();
 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 
 	/*
 	 * Some rules just don't make sense.  Note that the one below
 	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
 	 * for example, is not deniable in the racct sense, but the
 	 * limit is enforced in a different way, so "deny" rules for %CPU
 	 * do make sense.
 	 */
 	if (rule->rr_action == RCTL_ACTION_DENY &&
 	    (rule->rr_resource == RACCT_CPU ||
 	    rule->rr_resource == RACCT_WALLCLOCK))
 		return (EOPNOTSUPP);
 
 	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
 	    RACCT_IS_SLOPPY(rule->rr_resource))
 		return (EOPNOTSUPP);
 
 	/*
 	 * Make sure there are no duplicated rules.  Also, for the "deny"
 	 * rules, remove ones differing only by "amount".
 	 */
 	if (rule->rr_action == RCTL_ACTION_DENY) {
 		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
 		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
 		rctl_rule_remove(rule2);
 		rctl_rule_release(rule2);
 	} else
 		rctl_rule_remove(rule);
 
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		p = rule->rr_subject.rs_proc;
 		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
 
 		rctl_racct_add_rule(p->p_racct, rule);
 		/*
 		 * In case of per-process rule, we don't have anything more
 		 * to do.
 		 */
 		return (0);
 
 	case RCTL_SUBJECT_TYPE_USER:
 		uip = rule->rr_subject.rs_uip;
 		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
 		rctl_racct_add_rule(uip->ui_racct, rule);
 		break;
 
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		lc = rule->rr_subject.rs_loginclass;
 		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
 		rctl_racct_add_rule(lc->lc_racct, rule);
 		break;
 
 	case RCTL_SUBJECT_TYPE_JAIL:
 		prr = rule->rr_subject.rs_prison_racct;
 		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
 		rctl_racct_add_rule(prr->prr_racct, rule);
 		break;
 
 	default:
 		panic("rctl_rule_add: unknown subject type %d",
 		    rule->rr_subject_type);
 	}
 
 	/*
 	 * Now go through all the processes and add the new rule to the ones
 	 * it applies to.
 	 */
 	sx_assert(&allproc_lock, SA_LOCKED);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		cred = p->p_ucred;
 		switch (rule->rr_subject_type) {
 		case RCTL_SUBJECT_TYPE_USER:
 			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
 			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
 				break;
 			continue;
 		case RCTL_SUBJECT_TYPE_LOGINCLASS:
 			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
 				break;
 			continue;
 		case RCTL_SUBJECT_TYPE_JAIL:
 			match = 0;
 			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
 				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
 					match = 1;
 					break;
 				}
 			}
 			if (match)
 				break;
 			continue;
 		default:
 			panic("rctl_rule_add: unknown subject type %d",
 			    rule->rr_subject_type);
 		}
 
 		rctl_racct_add_rule(p->p_racct, rule);
 	}
 
 	return (0);
 }
 
 static void
+rctl_rule_pre_callback(void)
+{
+
+	rw_wlock(&rctl_lock);
+}
+
+static void
+rctl_rule_post_callback(void)
+{
+
+	rw_wunlock(&rctl_lock);
+}
+
+static void
 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
 {
 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
 	int found = 0;
 
 	ASSERT_RACCT_ENABLED();
+	rw_assert(&rctl_lock, RA_WLOCKED);
 
-	rw_wlock(&rctl_lock);
 	found += rctl_racct_remove_rules(racct, filter);
-	rw_wunlock(&rctl_lock);
 
 	*((int *)arg3) += found;
 }
 
 /*
  * Remove all rules that match the filter.
  */
 int
 rctl_rule_remove(struct rctl_rule *filter)
 {
 	int found = 0;
 	struct proc *p;
 
 	ASSERT_RACCT_ENABLED();
 
 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
 	    filter->rr_subject.rs_proc != NULL) {
 		p = filter->rr_subject.rs_proc;
 		rw_wlock(&rctl_lock);
 		found = rctl_racct_remove_rules(p->p_racct, filter);
 		rw_wunlock(&rctl_lock);
 		if (found)
 			return (0);
 		return (ESRCH);
 	}
 
-	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
-	    (void *)&found);
-	ui_racct_foreach(rctl_rule_remove_callback, filter,
-	    (void *)&found);
-	prison_racct_foreach(rctl_rule_remove_callback, filter,
-	    (void *)&found);
+	loginclass_racct_foreach(rctl_rule_remove_callback,
+	    rctl_rule_pre_callback, rctl_rule_post_callback,
+	    filter, (void *)&found);
+	ui_racct_foreach(rctl_rule_remove_callback,
+	    rctl_rule_pre_callback, rctl_rule_post_callback,
+	    filter, (void *)&found);
+	prison_racct_foreach(rctl_rule_remove_callback,
+	    rctl_rule_pre_callback, rctl_rule_post_callback,
+	    filter, (void *)&found);
 
 	sx_assert(&allproc_lock, SA_LOCKED);
 	rw_wlock(&rctl_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		found += rctl_racct_remove_rules(p->p_racct, filter);
 	}
 	rw_wunlock(&rctl_lock);
 
 	if (found)
 		return (0);
 	return (ESRCH);
 }
 
 /*
  * Appends a rule to the sbuf.
  */
 static void
 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
 {
 	int64_t amount;
 
 	ASSERT_RACCT_ENABLED();
 
 	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
 
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		if (rule->rr_subject.rs_proc == NULL)
 			sbuf_printf(sb, ":");
 		else
 			sbuf_printf(sb, "%d:",
 			    rule->rr_subject.rs_proc->p_pid);
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		if (rule->rr_subject.rs_uip == NULL)
 			sbuf_printf(sb, ":");
 		else
 			sbuf_printf(sb, "%d:",
 			    rule->rr_subject.rs_uip->ui_uid);
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		if (rule->rr_subject.rs_loginclass == NULL)
 			sbuf_printf(sb, ":");
 		else
 			sbuf_printf(sb, "%s:",
 			    rule->rr_subject.rs_loginclass->lc_name);
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		if (rule->rr_subject.rs_prison_racct == NULL)
 			sbuf_printf(sb, ":");
 		else
 			sbuf_printf(sb, "%s:",
 			    rule->rr_subject.rs_prison_racct->prr_name);
 		break;
 	default:
 		panic("rctl_rule_to_sbuf: unknown subject type %d",
 		    rule->rr_subject_type);
 	}
 
 	amount = rule->rr_amount;
 	if (amount != RCTL_AMOUNT_UNDEFINED &&
 	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
 		amount /= 1000000;
 
 	sbuf_printf(sb, "%s:%s=%jd",
 	    rctl_resource_name(rule->rr_resource),
 	    rctl_action_name(rule->rr_action),
 	    amount);
 
 	if (rule->rr_per != rule->rr_subject_type)
 		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
 }
 
 /*
  * Routine used by RCTL syscalls to read in input string.
  */
 static int
 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
 {
 	int error;
 	char *str;
 
 	ASSERT_RACCT_ENABLED();
 
 	if (inbuflen <= 0)
 		return (EINVAL);
 	if (inbuflen > RCTL_MAX_INBUFSIZE)
 		return (E2BIG);
 
 	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
 	error = copyinstr(inbufp, str, inbuflen, NULL);
 	if (error != 0) {
 		free(str, M_RCTL);
 		return (error);
 	}
 
 	*inputstr = str;
 
 	return (0);
 }
 
 /*
  * Routine used by RCTL syscalls to write out output string.
  */
 static int
 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
 {
 	int error;
 
 	ASSERT_RACCT_ENABLED();
 
 	if (outputsbuf == NULL)
 		return (0);
 
 	sbuf_finish(outputsbuf);
 	if (outbuflen < sbuf_len(outputsbuf) + 1) {
 		sbuf_delete(outputsbuf);
 		return (ERANGE);
 	}
 	error = copyout(sbuf_data(outputsbuf), outbufp,
 	    sbuf_len(outputsbuf) + 1);
 	sbuf_delete(outputsbuf);
 	return (error);
 }
 
 static struct sbuf *
 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
 {
 	int i;
 	int64_t amount;
 	struct sbuf *sb;
 
 	ASSERT_RACCT_ENABLED();
 
 	sb = sbuf_new_auto();
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
 			continue;
 		amount = racct->r_resources[i];
 		if (RACCT_IS_IN_MILLIONS(i))
 			amount /= 1000000;
 		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
 	}
 	sbuf_setpos(sb, sbuf_len(sb) - 1);
 	return (sb);
 }
 
 int
 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 {
 	int error;
 	char *inputstr;
 	struct rctl_rule *filter;
 	struct sbuf *outputsbuf = NULL;
 	struct proc *p;
 	struct uidinfo *uip;
 	struct loginclass *lc;
 	struct prison_racct *prr;
 
 	if (!racct_enable)
 		return (ENOSYS);
 
 	error = priv_check(td, PRIV_RCTL_GET_RACCT);
 	if (error != 0)
 		return (error);
 
 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 	if (error != 0)
 		return (error);
 
 	sx_slock(&allproc_lock);
 	error = rctl_string_to_rule(inputstr, &filter);
 	free(inputstr, M_RCTL);
 	if (error != 0) {
 		sx_sunlock(&allproc_lock);
 		return (error);
 	}
 
 	switch (filter->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		p = filter->rr_subject.rs_proc;
 		if (p == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		uip = filter->rr_subject.rs_uip;
 		if (uip == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		lc = filter->rr_subject.rs_loginclass;
 		if (lc == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		prr = filter->rr_subject.rs_prison_racct;
 		if (prr == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
 		break;
 	default:
 		error = EINVAL;
 	}
 out:
 	rctl_rule_release(filter);
 	sx_sunlock(&allproc_lock);
 	if (error != 0)
 		return (error);
 
 	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
 
 	return (error);
 }
 
 static void
 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
 {
 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
 	struct rctl_rule_link *link;
 	struct sbuf *sb = (struct sbuf *)arg3;
 
 	ASSERT_RACCT_ENABLED();
+	rw_assert(&rctl_lock, RA_LOCKED);
 
-	rw_rlock(&rctl_lock);
 	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
 		if (!rctl_rule_matches(link->rrl_rule, filter))
 			continue;
 		rctl_rule_to_sbuf(sb, link->rrl_rule);
 		sbuf_printf(sb, ",");
 	}
-	rw_runlock(&rctl_lock);
 }
 
 int
 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 {
 	int error;
 	size_t bufsize;
 	char *inputstr, *buf;
 	struct sbuf *sb;
 	struct rctl_rule *filter;
 	struct rctl_rule_link *link;
 	struct proc *p;
 
 	if (!racct_enable)
 		return (ENOSYS);
 
 	error = priv_check(td, PRIV_RCTL_GET_RULES);
 	if (error != 0)
 		return (error);
 
 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 	if (error != 0)
 		return (error);
 
 	sx_slock(&allproc_lock);
 	error = rctl_string_to_rule(inputstr, &filter);
 	free(inputstr, M_RCTL);
 	if (error != 0) {
 		sx_sunlock(&allproc_lock);
 		return (error);
 	}
 
 	bufsize = uap->outbuflen;
 	if (bufsize > rctl_maxbufsize) {
 		sx_sunlock(&allproc_lock);
 		return (E2BIG);
 	}
 
 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 	KASSERT(sb != NULL, ("sbuf_new failed"));
 
 	FOREACH_PROC_IN_SYSTEM(p) {
 		rw_rlock(&rctl_lock);
 		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 			/*
 			 * Non-process rules will be added to the buffer later.
 			 * Adding them here would result in duplicated output.
 			 */
 			if (link->rrl_rule->rr_subject_type !=
 			    RCTL_SUBJECT_TYPE_PROCESS)
 				continue;
 			if (!rctl_rule_matches(link->rrl_rule, filter))
 				continue;
 			rctl_rule_to_sbuf(sb, link->rrl_rule);
 			sbuf_printf(sb, ",");
 		}
 		rw_runlock(&rctl_lock);
 	}
 
-	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
-	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
-	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
+	loginclass_racct_foreach(rctl_get_rules_callback,
+	    rctl_rule_pre_callback, rctl_rule_post_callback,
+	    filter, sb);
+	ui_racct_foreach(rctl_get_rules_callback,
+	    rctl_rule_pre_callback, rctl_rule_post_callback,
+	    filter, sb);
+	prison_racct_foreach(rctl_get_rules_callback,
+	    rctl_rule_pre_callback, rctl_rule_post_callback,
+	    filter, sb);
 	if (sbuf_error(sb) == ENOMEM) {
 		error = ERANGE;
 		goto out;
 	}
 
 	/*
 	 * Remove trailing ",".
 	 */
 	if (sbuf_len(sb) > 0)
 		sbuf_setpos(sb, sbuf_len(sb) - 1);
 
 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 out:
 	rctl_rule_release(filter);
 	sx_sunlock(&allproc_lock);
 	free(buf, M_RCTL);
 	return (error);
 }
 
 int
 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 {
 	int error;
 	size_t bufsize;
 	char *inputstr, *buf;
 	struct sbuf *sb;
 	struct rctl_rule *filter;
 	struct rctl_rule_link *link;
 
 	if (!racct_enable)
 		return (ENOSYS);
 
 	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
 	if (error != 0)
 		return (error);
 
 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 	if (error != 0)
 		return (error);
 
 	sx_slock(&allproc_lock);
 	error = rctl_string_to_rule(inputstr, &filter);
 	free(inputstr, M_RCTL);
 	if (error != 0) {
 		sx_sunlock(&allproc_lock);
 		return (error);
 	}
 
 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
 		rctl_rule_release(filter);
 		sx_sunlock(&allproc_lock);
 		return (EINVAL);
 	}
 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
 		rctl_rule_release(filter);
 		sx_sunlock(&allproc_lock);
 		return (EOPNOTSUPP);
 	}
 	if (filter->rr_subject.rs_proc == NULL) {
 		rctl_rule_release(filter);
 		sx_sunlock(&allproc_lock);
 		return (EINVAL);
 	}
 
 	bufsize = uap->outbuflen;
 	if (bufsize > rctl_maxbufsize) {
 		rctl_rule_release(filter);
 		sx_sunlock(&allproc_lock);
 		return (E2BIG);
 	}
 
 	buf = malloc(bufsize, M_RCTL, M_WAITOK);
 	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 	KASSERT(sb != NULL, ("sbuf_new failed"));
 
 	rw_rlock(&rctl_lock);
 	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
 	    rrl_next) {
 		rctl_rule_to_sbuf(sb, link->rrl_rule);
 		sbuf_printf(sb, ",");
 	}
 	rw_runlock(&rctl_lock);
 	if (sbuf_error(sb) == ENOMEM) {
 		error = ERANGE;
 		goto out;
 	}
 
 	/*
 	 * Remove trailing ",".
 	 */
 	if (sbuf_len(sb) > 0)
 		sbuf_setpos(sb, sbuf_len(sb) - 1);
 
 	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 out:
 	rctl_rule_release(filter);
 	sx_sunlock(&allproc_lock);
 	free(buf, M_RCTL);
 	return (error);
 }
 
 int
 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 {
 	int error;
 	struct rctl_rule *rule;
 	char *inputstr;
 
 	if (!racct_enable)
 		return (ENOSYS);
 
 	error = priv_check(td, PRIV_RCTL_ADD_RULE);
 	if (error != 0)
 		return (error);
 
 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 	if (error != 0)
 		return (error);
 
 	sx_slock(&allproc_lock);
 	error = rctl_string_to_rule(inputstr, &rule);
 	free(inputstr, M_RCTL);
 	if (error != 0) {
 		sx_sunlock(&allproc_lock);
 		return (error);
 	}
 	/*
 	 * The 'per' part of a rule is optional.
 	 */
 	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
 	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
 		rule->rr_per = rule->rr_subject_type;
 
 	if (!rctl_rule_fully_specified(rule)) {
 		error = EINVAL;
 		goto out;
 	}
 
 	error = rctl_rule_add(rule);
 
 out:
 	rctl_rule_release(rule);
 	sx_sunlock(&allproc_lock);
 	return (error);
 }
 
 int
 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 {
 	int error;
 	struct rctl_rule *filter;
 	char *inputstr;
 
 	if (!racct_enable)
 		return (ENOSYS);
 
 	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
 	if (error != 0)
 		return (error);
 
 	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 	if (error != 0)
 		return (error);
 
 	sx_slock(&allproc_lock);
 	error = rctl_string_to_rule(inputstr, &filter);
 	free(inputstr, M_RCTL);
 	if (error != 0) {
 		sx_sunlock(&allproc_lock);
 		return (error);
 	}
 
 	error = rctl_rule_remove(filter);
 	rctl_rule_release(filter);
 	sx_sunlock(&allproc_lock);
 
 	return (error);
 }
 
 /*
  * Update RCTL rule list after credential change.
  */
 void
 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
 {
 	int rulecnt, i;
 	struct rctl_rule_link *link, *newlink;
 	struct uidinfo *newuip;
 	struct loginclass *newlc;
 	struct prison_racct *newprr;
 	LIST_HEAD(, rctl_rule_link) newrules;
 
 	ASSERT_RACCT_ENABLED();
 
 	newuip = newcred->cr_ruidinfo;
 	newlc = newcred->cr_loginclass;
 	newprr = newcred->cr_prison->pr_prison_racct;
 	
 	LIST_INIT(&newrules);
 
 again:
 	/*
 	 * First, count the rules that apply to the process with new
 	 * credentials.
 	 */
 	rulecnt = 0;
 	rw_rlock(&rctl_lock);
 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 		if (link->rrl_rule->rr_subject_type ==
 		    RCTL_SUBJECT_TYPE_PROCESS)
 			rulecnt++;
 	}
 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
 		rulecnt++;
 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
 		rulecnt++;
 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
 		rulecnt++;
 	rw_runlock(&rctl_lock);
 
 	/*
 	 * Create temporary list.  We've dropped the rctl_lock in order
 	 * to use M_WAITOK.
 	 */
 	for (i = 0; i < rulecnt; i++) {
 		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
 		newlink->rrl_rule = NULL;
 		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
 	}
 
 	newlink = LIST_FIRST(&newrules);
 
 	/*
 	 * Assign rules to the newly allocated list entries.
 	 */
 	rw_wlock(&rctl_lock);
 	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 		if (link->rrl_rule->rr_subject_type ==
 		    RCTL_SUBJECT_TYPE_PROCESS) {
 			if (newlink == NULL)
 				goto goaround;
 			rctl_rule_acquire(link->rrl_rule);
 			newlink->rrl_rule = link->rrl_rule;
 			newlink = LIST_NEXT(newlink, rrl_next);
 			rulecnt--;
 		}
 	}
 	
 	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
 		if (newlink == NULL)
 			goto goaround;
 		rctl_rule_acquire(link->rrl_rule);
 		newlink->rrl_rule = link->rrl_rule;
 		newlink = LIST_NEXT(newlink, rrl_next);
 		rulecnt--;
 	}
 
 	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
 		if (newlink == NULL)
 			goto goaround;
 		rctl_rule_acquire(link->rrl_rule);
 		newlink->rrl_rule = link->rrl_rule;
 		newlink = LIST_NEXT(newlink, rrl_next);
 		rulecnt--;
 	}
 
 	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
 		if (newlink == NULL)
 			goto goaround;
 		rctl_rule_acquire(link->rrl_rule);
 		newlink->rrl_rule = link->rrl_rule;
 		newlink = LIST_NEXT(newlink, rrl_next);
 		rulecnt--;
 	}
 
 	if (rulecnt == 0) {
 		/*
 		 * Free the old rule list.
 		 */
 		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
 			link = LIST_FIRST(&p->p_racct->r_rule_links);
 			LIST_REMOVE(link, rrl_next);
 			rctl_rule_release(link->rrl_rule);
 			uma_zfree(rctl_rule_link_zone, link);
 		}
 
 		/*
 		 * Replace lists and we're done.
 		 *
 		 * XXX: Is there any way to switch list heads instead
 		 *      of iterating here?
 		 */
 		while (!LIST_EMPTY(&newrules)) {
 			newlink = LIST_FIRST(&newrules);
 			LIST_REMOVE(newlink, rrl_next);
 			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
 			    newlink, rrl_next);
 		}
 
 		rw_wunlock(&rctl_lock);
 
 		return;
 	}
 
 goaround:
 	rw_wunlock(&rctl_lock);
 
 	/*
 	 * Rule list changed while we were not holding the rctl_lock.
 	 * Free the new list and try again.
 	 */
 	while (!LIST_EMPTY(&newrules)) {
 		newlink = LIST_FIRST(&newrules);
 		LIST_REMOVE(newlink, rrl_next);
 		if (newlink->rrl_rule != NULL)
 			rctl_rule_release(newlink->rrl_rule);
 		uma_zfree(rctl_rule_link_zone, newlink);
 	}
 
 	goto again;
 }
 
 /*
  * Assign RCTL rules to the newly created process.
  */
 int
 rctl_proc_fork(struct proc *parent, struct proc *child)
 {
 	int error;
 	struct rctl_rule_link *link;
 	struct rctl_rule *rule;
 
 	LIST_INIT(&child->p_racct->r_rule_links);
 
 	ASSERT_RACCT_ENABLED();
 	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
 
 	rw_wlock(&rctl_lock);
 
 	/*
 	 * Go through limits applicable to the parent and assign them
 	 * to the child.  Rules with 'process' subject have to be duplicated
 	 * in order to make their rr_subject point to the new process.
 	 */
 	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
 		if (link->rrl_rule->rr_subject_type ==
 		    RCTL_SUBJECT_TYPE_PROCESS) {
 			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
 			if (rule == NULL)
 				goto fail;
 			KASSERT(rule->rr_subject.rs_proc == parent,
 			    ("rule->rr_subject.rs_proc != parent"));
 			rule->rr_subject.rs_proc = child;
 			error = rctl_racct_add_rule_locked(child->p_racct,
 			    rule);
 			rctl_rule_release(rule);
 			if (error != 0)
 				goto fail;
 		} else {
 			error = rctl_racct_add_rule_locked(child->p_racct,
 			    link->rrl_rule);
 			if (error != 0)
 				goto fail;
 		}
 	}
 
 	rw_wunlock(&rctl_lock);
 	return (0);
 
 fail:
 	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
 		link = LIST_FIRST(&child->p_racct->r_rule_links);
 		LIST_REMOVE(link, rrl_next);
 		rctl_rule_release(link->rrl_rule);
 		uma_zfree(rctl_rule_link_zone, link);
 	}
 	rw_wunlock(&rctl_lock);
 	return (EAGAIN);
 }
 
 /*
  * Release rules attached to the racct.
  */
 void
 rctl_racct_release(struct racct *racct)
 {
 	struct rctl_rule_link *link;
 
 	ASSERT_RACCT_ENABLED();
 
 	rw_wlock(&rctl_lock);
 	while (!LIST_EMPTY(&racct->r_rule_links)) {
 		link = LIST_FIRST(&racct->r_rule_links);
 		LIST_REMOVE(link, rrl_next);
 		rctl_rule_release(link->rrl_rule);
 		uma_zfree(rctl_rule_link_zone, link);
 	}
 	rw_wunlock(&rctl_lock);
 }
 
 static void
 rctl_init(void)
 {
 
 	if (!racct_enable)
 		return;
 
 	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
 	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 }
 
 #else /* !RCTL */
 
 int
 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 {
 	
 	return (ENOSYS);
 }
 
 int
 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 {
 	
 	return (ENOSYS);
 }
 
 int
 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 {
 	
 	return (ENOSYS);
 }
 
 int
 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 {
 	
 	return (ENOSYS);
 }
 
 int
 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 {
 	
 	return (ENOSYS);
 }
 
 #endif /* !RCTL */
Index: projects/powernv/kern/kern_resource.c
===================================================================
--- projects/powernv/kern/kern_resource.c	(revision 290990)
+++ projects/powernv/kern/kern_resource.c	(revision 290991)
@@ -1,1438 +1,1443 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/file.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/refcount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/time.h>
 #include <sys/umtx.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 
 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
 static struct rwlock uihashtbl_lock;
 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
 static u_long uihash;		/* size of hash table - 1 */
 
 static void	calcru1(struct proc *p, struct rusage_ext *ruxp,
 		    struct timeval *up, struct timeval *sp);
 static int	donice(struct thread *td, struct proc *chgp, int n);
 static struct uidinfo *uilookup(uid_t uid);
 static void	ruxagg_locked(struct rusage_ext *rux, struct thread *td);
 
 static __inline int	lim_shared(struct plimit *limp);
 
 /*
  * Resource controls and accounting.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getpriority_args {
 	int	which;
 	int	who;
 };
 #endif
 int
 sys_getpriority(struct thread *td, register struct getpriority_args *uap)
 {
 	struct proc *p;
 	struct pgrp *pg;
 	int error, low;
 
 	error = 0;
 	low = PRIO_MAX + 1;
 	switch (uap->which) {
 
 	case PRIO_PROCESS:
 		if (uap->who == 0)
 			low = td->td_proc->p_nice;
 		else {
 			p = pfind(uap->who);
 			if (p == NULL)
 				break;
 			if (p_cansee(td, p) == 0)
 				low = p->p_nice;
 			PROC_UNLOCK(p);
 		}
 		break;
 
 	case PRIO_PGRP:
 		sx_slock(&proctree_lock);
 		if (uap->who == 0) {
 			pg = td->td_proc->p_pgrp;
 			PGRP_LOCK(pg);
 		} else {
 			pg = pgfind(uap->who);
 			if (pg == NULL) {
 				sx_sunlock(&proctree_lock);
 				break;
 			}
 		}
 		sx_sunlock(&proctree_lock);
 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p_cansee(td, p) == 0) {
 				if (p->p_nice < low)
 					low = p->p_nice;
 			}
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(pg);
 		break;
 
 	case PRIO_USER:
 		if (uap->who == 0)
 			uap->who = td->td_ucred->cr_uid;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p_cansee(td, p) == 0 &&
 			    p->p_ucred->cr_uid == uap->who) {
 				if (p->p_nice < low)
 					low = p->p_nice;
 			}
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (low == PRIO_MAX + 1 && error == 0)
 		error = ESRCH;
 	td->td_retval[0] = low;
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setpriority_args {
 	int	which;
 	int	who;
 	int	prio;
 };
 #endif
 int
 sys_setpriority(struct thread *td, struct setpriority_args *uap)
 {
 	struct proc *curp, *p;
 	struct pgrp *pg;
 	int found = 0, error = 0;
 
 	curp = td->td_proc;
 	switch (uap->which) {
 	case PRIO_PROCESS:
 		if (uap->who == 0) {
 			PROC_LOCK(curp);
 			error = donice(td, curp, uap->prio);
 			PROC_UNLOCK(curp);
 		} else {
 			p = pfind(uap->who);
 			if (p == NULL)
 				break;
 			error = p_cansee(td, p);
 			if (error == 0)
 				error = donice(td, p, uap->prio);
 			PROC_UNLOCK(p);
 		}
 		found++;
 		break;
 
 	case PRIO_PGRP:
 		sx_slock(&proctree_lock);
 		if (uap->who == 0) {
 			pg = curp->p_pgrp;
 			PGRP_LOCK(pg);
 		} else {
 			pg = pgfind(uap->who);
 			if (pg == NULL) {
 				sx_sunlock(&proctree_lock);
 				break;
 			}
 		}
 		sx_sunlock(&proctree_lock);
 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p_cansee(td, p) == 0) {
 				error = donice(td, p, uap->prio);
 				found++;
 			}
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(pg);
 		break;
 
 	case PRIO_USER:
 		if (uap->who == 0)
 			uap->who = td->td_ucred->cr_uid;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p->p_ucred->cr_uid == uap->who &&
 			    p_cansee(td, p) == 0) {
 				error = donice(td, p, uap->prio);
 				found++;
 			}
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (found == 0 && error == 0)
 		error = ESRCH;
 	return (error);
 }
 
 /*
  * Set "nice" for a (whole) process.
  */
 static int
 donice(struct thread *td, struct proc *p, int n)
 {
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if ((error = p_cansched(td, p)))
 		return (error);
 	if (n > PRIO_MAX)
 		n = PRIO_MAX;
 	if (n < PRIO_MIN)
 		n = PRIO_MIN;
 	if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
 		return (EACCES);
 	sched_nice(p, n);
 	return (0);
 }
 
 static int unprivileged_idprio;
 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW,
     &unprivileged_idprio, 0, "Allow non-root users to set an idle priority");
 
 /*
  * Set realtime priority for LWP.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rtprio_thread_args {
 	int		function;
 	lwpid_t		lwpid;
 	struct rtprio	*rtp;
 };
 #endif
 int
 sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
 {
 	struct proc *p;
 	struct rtprio rtp;
 	struct thread *td1;
 	int cierror, error;
 
 	/* Perform copyin before acquiring locks if needed. */
 	if (uap->function == RTP_SET)
 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
 	else
 		cierror = 0;
 
 	if (uap->lwpid == 0 || uap->lwpid == td->td_tid) {
 		p = td->td_proc;
 		td1 = td;
 		PROC_LOCK(p);
 	} else {
 		/* Only look up thread in current process */
 		td1 = tdfind(uap->lwpid, curproc->p_pid);
 		if (td1 == NULL)
 			return (ESRCH);
 		p = td1->td_proc;
 	}
 
 	switch (uap->function) {
 	case RTP_LOOKUP:
 		if ((error = p_cansee(td, p)))
 			break;
 		pri_to_rtp(td1, &rtp);
 		PROC_UNLOCK(p);
 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
 	case RTP_SET:
 		if ((error = p_cansched(td, p)) || (error = cierror))
 			break;
 
 		/* Disallow setting rtprio in most cases if not superuser. */
 
 		/*
 		 * Realtime priority has to be restricted for reasons which
 		 * should be obvious.  However, for idleprio processes, there is
 		 * a potential for system deadlock if an idleprio process gains
 		 * a lock on a resource that other processes need (and the
 		 * idleprio process can't run due to a CPU-bound normal
 		 * process).  Fix me!  XXX
 		 *
 		 * This problem is not only related to idleprio process.
 		 * A user level program can obtain a file lock and hold it
 		 * indefinitely.  Additionally, without idleprio processes it is
 		 * still conceivable that a program with low priority will never
 		 * get to run.  In short, allowing this feature might make it
 		 * easier to lock a resource indefinitely, but it is not the
 		 * only thing that makes it possible.
 		 */
 		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
 		    (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
 		    unprivileged_idprio == 0)) {
 			error = priv_check(td, PRIV_SCHED_RTPRIO);
 			if (error)
 				break;
 		}
 		error = rtp_to_pri(&rtp, td1);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Set realtime priority.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rtprio_args {
 	int		function;
 	pid_t		pid;
 	struct rtprio	*rtp;
 };
 #endif
 int
 sys_rtprio(struct thread *td, register struct rtprio_args *uap)
 {
 	struct proc *p;
 	struct thread *tdp;
 	struct rtprio rtp;
 	int cierror, error;
 
 	/* Perform copyin before acquiring locks if needed. */
 	if (uap->function == RTP_SET)
 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
 	else
 		cierror = 0;
 
 	if (uap->pid == 0) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		p = pfind(uap->pid);
 		if (p == NULL)
 			return (ESRCH);
 	}
 
 	switch (uap->function) {
 	case RTP_LOOKUP:
 		if ((error = p_cansee(td, p)))
 			break;
 		/*
 		 * Return OUR priority if no pid specified,
 		 * or if one is, report the highest priority
 		 * in the process.  There isn't much more you can do as
 		 * there is only room to return a single priority.
 		 * Note: specifying our own pid is not the same
 		 * as leaving it zero.
 		 */
 		if (uap->pid == 0) {
 			pri_to_rtp(td, &rtp);
 		} else {
 			struct rtprio rtp2;
 
 			rtp.type = RTP_PRIO_IDLE;
 			rtp.prio = RTP_PRIO_MAX;
 			FOREACH_THREAD_IN_PROC(p, tdp) {
 				pri_to_rtp(tdp, &rtp2);
 				if (rtp2.type <  rtp.type ||
 				    (rtp2.type == rtp.type &&
 				    rtp2.prio < rtp.prio)) {
 					rtp.type = rtp2.type;
 					rtp.prio = rtp2.prio;
 				}
 			}
 		}
 		PROC_UNLOCK(p);
 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
 	case RTP_SET:
 		if ((error = p_cansched(td, p)) || (error = cierror))
 			break;
 
 		/*
 		 * Disallow setting rtprio in most cases if not superuser.
 		 * See the comment in sys_rtprio_thread about idprio
 		 * threads holding a lock.
 		 */
 		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
 		    (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
 		    !unprivileged_idprio)) {
 			error = priv_check(td, PRIV_SCHED_RTPRIO);
 			if (error)
 				break;
 		}
 
 		/*
 		 * If we are setting our own priority, set just our
 		 * thread but if we are doing another process,
 		 * do all the threads on that process. If we
 		 * specify our own pid we do the latter.
 		 */
 		if (uap->pid == 0) {
 			error = rtp_to_pri(&rtp, td);
 		} else {
 			FOREACH_THREAD_IN_PROC(p, td) {
 				if ((error = rtp_to_pri(&rtp, td)) != 0)
 					break;
 			}
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 int
 rtp_to_pri(struct rtprio *rtp, struct thread *td)
 {
 	u_char  newpri, oldclass, oldpri;
 
 	switch (RTP_PRIO_BASE(rtp->type)) {
 	case RTP_PRIO_REALTIME:
 		if (rtp->prio > RTP_PRIO_MAX)
 			return (EINVAL);
 		newpri = PRI_MIN_REALTIME + rtp->prio;
 		break;
 	case RTP_PRIO_NORMAL:
 		if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE))
 			return (EINVAL);
 		newpri = PRI_MIN_TIMESHARE + rtp->prio;
 		break;
 	case RTP_PRIO_IDLE:
 		if (rtp->prio > RTP_PRIO_MAX)
 			return (EINVAL);
 		newpri = PRI_MIN_IDLE + rtp->prio;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	thread_lock(td);
 	oldclass = td->td_pri_class;
 	sched_class(td, rtp->type);	/* XXX fix */
 	oldpri = td->td_user_pri;
 	sched_user_prio(td, newpri);
 	if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL ||
 	    td->td_pri_class != RTP_PRIO_NORMAL))
 		sched_prio(td, td->td_user_pri);
 	if (TD_ON_UPILOCK(td) && oldpri != newpri) {
 		critical_enter();
 		thread_unlock(td);
 		umtx_pi_adjust(td, oldpri);
 		critical_exit();
 	} else
 		thread_unlock(td);
 	return (0);
 }
 
 void
 pri_to_rtp(struct thread *td, struct rtprio *rtp)
 {
 
 	thread_lock(td);
 	switch (PRI_BASE(td->td_pri_class)) {
 	case PRI_REALTIME:
 		rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
 		break;
 	case PRI_TIMESHARE:
 		rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE;
 		break;
 	case PRI_IDLE:
 		rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE;
 		break;
 	default:
 		break;
 	}
 	rtp->type = td->td_pri_class;
 	thread_unlock(td);
 }
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct osetrlimit_args {
 	u_int	which;
 	struct	orlimit *rlp;
 };
 #endif
 int
 osetrlimit(struct thread *td, register struct osetrlimit_args *uap)
 {
 	struct orlimit olim;
 	struct rlimit lim;
 	int error;
 
 	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
 		return (error);
 	lim.rlim_cur = olim.rlim_cur;
 	lim.rlim_max = olim.rlim_max;
 	error = kern_setrlimit(td, uap->which, &lim);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct ogetrlimit_args {
 	u_int	which;
 	struct	orlimit *rlp;
 };
 #endif
 int
 ogetrlimit(struct thread *td, register struct ogetrlimit_args *uap)
 {
 	struct orlimit olim;
 	struct rlimit rl;
 	int error;
 
 	if (uap->which >= RLIM_NLIMITS)
 		return (EINVAL);
 	lim_rlimit(td, uap->which, &rl);
 
 	/*
 	 * XXX would be more correct to convert only RLIM_INFINITY to the
 	 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
 	 * values.  Most 64->32 and 32->16 conversions, including not
 	 * unimportant ones of uids are even more broken than what we
 	 * do here (they blindly truncate).  We don't do this correctly
 	 * here since we have little experience with EOVERFLOW yet.
 	 * Elsewhere, getuid() can't fail...
 	 */
 	olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
 	olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
 	error = copyout(&olim, uap->rlp, sizeof(olim));
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct __setrlimit_args {
 	u_int	which;
 	struct	rlimit *rlp;
 };
 #endif
 int
 sys_setrlimit(struct thread *td, register struct __setrlimit_args *uap)
 {
 	struct rlimit alim;
 	int error;
 
 	if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
 		return (error);
 	error = kern_setrlimit(td, uap->which, &alim);
 	return (error);
 }
 
 static void
 lim_cb(void *arg)
 {
 	struct rlimit rlim;
 	struct thread *td;
 	struct proc *p;
 
 	p = arg;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * Check if the process exceeds its cpu resource allocation.  If
 	 * it reaches the max, arrange to kill the process in ast().
 	 */
 	if (p->p_cpulimit == RLIM_INFINITY)
 		return;
 	PROC_STATLOCK(p);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		ruxagg(p, td);
 	}
 	PROC_STATUNLOCK(p);
 	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
 		lim_rlimit_proc(p, RLIMIT_CPU, &rlim);
 		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
 			killproc(p, "exceeded maximum CPU limit");
 		} else {
 			if (p->p_cpulimit < rlim.rlim_max)
 				p->p_cpulimit += 5;
 			kern_psignal(p, SIGXCPU);
 		}
 	}
 	if ((p->p_flag & P_WEXIT) == 0)
 		callout_reset_sbt(&p->p_limco, SBT_1S, 0,
 		    lim_cb, p, C_PREL(1));
 }
 
 int
 kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp)
 {
 
 	return (kern_proc_setrlimit(td, td->td_proc, which, limp));
 }
 
 int
 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
     struct rlimit *limp)
 {
 	struct plimit *newlim, *oldlim;
 	register struct rlimit *alimp;
 	struct rlimit oldssiz;
 	int error;
 
 	if (which >= RLIM_NLIMITS)
 		return (EINVAL);
 
 	/*
 	 * Preserve historical bugs by treating negative limits as unsigned.
 	 */
 	if (limp->rlim_cur < 0)
 		limp->rlim_cur = RLIM_INFINITY;
 	if (limp->rlim_max < 0)
 		limp->rlim_max = RLIM_INFINITY;
 
 	oldssiz.rlim_cur = 0;
 	newlim = lim_alloc();
 	PROC_LOCK(p);
 	oldlim = p->p_limit;
 	alimp = &oldlim->pl_rlimit[which];
 	if (limp->rlim_cur > alimp->rlim_max ||
 	    limp->rlim_max > alimp->rlim_max)
 		if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
 			PROC_UNLOCK(p);
 			lim_free(newlim);
 			return (error);
 		}
 	if (limp->rlim_cur > limp->rlim_max)
 		limp->rlim_cur = limp->rlim_max;
 	lim_copy(newlim, oldlim);
 	alimp = &newlim->pl_rlimit[which];
 
 	switch (which) {
 
 	case RLIMIT_CPU:
 		if (limp->rlim_cur != RLIM_INFINITY &&
 		    p->p_cpulimit == RLIM_INFINITY)
 			callout_reset_sbt(&p->p_limco, SBT_1S, 0,
 			    lim_cb, p, C_PREL(1));
 		p->p_cpulimit = limp->rlim_cur;
 		break;
 	case RLIMIT_DATA:
 		if (limp->rlim_cur > maxdsiz)
 			limp->rlim_cur = maxdsiz;
 		if (limp->rlim_max > maxdsiz)
 			limp->rlim_max = maxdsiz;
 		break;
 
 	case RLIMIT_STACK:
 		if (limp->rlim_cur > maxssiz)
 			limp->rlim_cur = maxssiz;
 		if (limp->rlim_max > maxssiz)
 			limp->rlim_max = maxssiz;
 		oldssiz = *alimp;
 		if (p->p_sysent->sv_fixlimit != NULL)
 			p->p_sysent->sv_fixlimit(&oldssiz,
 			    RLIMIT_STACK);
 		break;
 
 	case RLIMIT_NOFILE:
 		if (limp->rlim_cur > maxfilesperproc)
 			limp->rlim_cur = maxfilesperproc;
 		if (limp->rlim_max > maxfilesperproc)
 			limp->rlim_max = maxfilesperproc;
 		break;
 
 	case RLIMIT_NPROC:
 		if (limp->rlim_cur > maxprocperuid)
 			limp->rlim_cur = maxprocperuid;
 		if (limp->rlim_max > maxprocperuid)
 			limp->rlim_max = maxprocperuid;
 		if (limp->rlim_cur < 1)
 			limp->rlim_cur = 1;
 		if (limp->rlim_max < 1)
 			limp->rlim_max = 1;
 		break;
 	}
 	if (p->p_sysent->sv_fixlimit != NULL)
 		p->p_sysent->sv_fixlimit(limp, which);
 	*alimp = *limp;
 	p->p_limit = newlim;
 	PROC_UPDATE_COW(p);
 	PROC_UNLOCK(p);
 	lim_free(oldlim);
 
 	if (which == RLIMIT_STACK &&
 	    /*
 	     * Skip calls from exec_new_vmspace(), done when stack is
 	     * not mapped yet.
 	     */
 	    (td != curthread || (p->p_flag & P_INEXEC) == 0)) {
 		/*
 		 * Stack is allocated to the max at exec time with only
 		 * "rlim_cur" bytes accessible.  If stack limit is going
 		 * up make more accessible, if going down make inaccessible.
 		 */
 		if (limp->rlim_cur != oldssiz.rlim_cur) {
 			vm_offset_t addr;
 			vm_size_t size;
 			vm_prot_t prot;
 
 			if (limp->rlim_cur > oldssiz.rlim_cur) {
 				prot = p->p_sysent->sv_stackprot;
 				size = limp->rlim_cur - oldssiz.rlim_cur;
 				addr = p->p_sysent->sv_usrstack -
 				    limp->rlim_cur;
 			} else {
 				prot = VM_PROT_NONE;
 				size = oldssiz.rlim_cur - limp->rlim_cur;
 				addr = p->p_sysent->sv_usrstack -
 				    oldssiz.rlim_cur;
 			}
 			addr = trunc_page(addr);
 			size = round_page(size);
 			(void)vm_map_protect(&p->p_vmspace->vm_map,
 			    addr, addr + size, prot, FALSE);
 		}
 	}
 
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct __getrlimit_args {
 	u_int	which;
 	struct	rlimit *rlp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getrlimit(struct thread *td, register struct __getrlimit_args *uap)
 {
 	struct rlimit rlim;
 	int error;
 
 	if (uap->which >= RLIM_NLIMITS)
 		return (EINVAL);
 	lim_rlimit(td, uap->which, &rlim);
 	error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
 	return (error);
 }
 
 /*
  * Transform the running time and tick information for children of proc p
  * into user and system time usage.
  */
 void
 calccru(struct proc *p, struct timeval *up, struct timeval *sp)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	calcru1(p, &p->p_crux, up, sp);
 }
 
 /*
  * Transform the running time and tick information in proc p into user
  * and system time usage.  If appropriate, include the current time slice
  * on this CPU.
  */
 void
 calcru(struct proc *p, struct timeval *up, struct timeval *sp)
 {
 	struct thread *td;
 	uint64_t runtime, u;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * If we are getting stats for the current process, then add in the
 	 * stats that this thread has accumulated in its current time slice.
 	 * We reset the thread and CPU state as if we had performed a context
 	 * switch right here.
 	 */
 	td = curthread;
 	if (td->td_proc == p) {
 		u = cpu_ticks();
 		runtime = u - PCPU_GET(switchtime);
 		td->td_runtime += runtime;
 		td->td_incruntime += runtime;
 		PCPU_SET(switchtime, u);
 	}
 	/* Make sure the per-thread stats are current. */
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td->td_incruntime == 0)
 			continue;
 		ruxagg(p, td);
 	}
 	calcru1(p, &p->p_rux, up, sp);
 }
 
 /* Collect resource usage for a single thread. */
 void
 rufetchtd(struct thread *td, struct rusage *ru)
 {
 	struct proc *p;
 	uint64_t runtime, u;
 
 	p = td->td_proc;
 	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	/*
 	 * If we are getting stats for the current thread, then add in the
 	 * stats that this thread has accumulated in its current time slice.
 	 * We reset the thread and CPU state as if we had performed a context
 	 * switch right here.
 	 */
 	if (td == curthread) {
 		u = cpu_ticks();
 		runtime = u - PCPU_GET(switchtime);
 		td->td_runtime += runtime;
 		td->td_incruntime += runtime;
 		PCPU_SET(switchtime, u);
 	}
 	ruxagg(p, td);
 	*ru = td->td_ru;
 	calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
 }
 
 static void
 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
     struct timeval *sp)
 {
 	/* {user, system, interrupt, total} {ticks, usec}: */
 	uint64_t ut, uu, st, su, it, tt, tu;
 
 	ut = ruxp->rux_uticks;
 	st = ruxp->rux_sticks;
 	it = ruxp->rux_iticks;
 	tt = ut + st + it;
 	if (tt == 0) {
 		/* Avoid divide by zero */
 		st = 1;
 		tt = 1;
 	}
 	tu = cputick2usec(ruxp->rux_runtime);
 	if ((int64_t)tu < 0) {
 		/* XXX: this should be an assert /phk */
 		printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
 		    (intmax_t)tu, p->p_pid, p->p_comm);
 		tu = ruxp->rux_tu;
 	}
 
 	if (tu >= ruxp->rux_tu) {
 		/*
 		 * The normal case, time increased.
 		 * Enforce monotonicity of bucketed numbers.
 		 */
 		uu = (tu * ut) / tt;
 		if (uu < ruxp->rux_uu)
 			uu = ruxp->rux_uu;
 		su = (tu * st) / tt;
 		if (su < ruxp->rux_su)
 			su = ruxp->rux_su;
 	} else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
 		/*
 		 * When we calibrate the cputicker, it is not uncommon to
 		 * see the presumably fixed frequency increase slightly over
 		 * time as a result of thermal stabilization and NTP
 		 * discipline (of the reference clock).  We therefore ignore
 		 * a bit of backwards slop because we  expect to catch up
 		 * shortly.  We use a 3 microsecond limit to catch low
 		 * counts and a 1% limit for high counts.
 		 */
 		uu = ruxp->rux_uu;
 		su = ruxp->rux_su;
 		tu = ruxp->rux_tu;
 	} else { /* tu < ruxp->rux_tu */
 		/*
 		 * What happened here was likely that a laptop, which ran at
 		 * a reduced clock frequency at boot, kicked into high gear.
 		 * The wisdom of spamming this message in that case is
 		 * dubious, but it might also be indicative of something
 		 * serious, so lets keep it and hope laptops can be made
 		 * more truthful about their CPU speed via ACPI.
 		 */
 		printf("calcru: runtime went backwards from %ju usec "
 		    "to %ju usec for pid %d (%s)\n",
 		    (uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
 		    p->p_pid, p->p_comm);
 		uu = (tu * ut) / tt;
 		su = (tu * st) / tt;
 	}
 
 	ruxp->rux_uu = uu;
 	ruxp->rux_su = su;
 	ruxp->rux_tu = tu;
 
 	up->tv_sec = uu / 1000000;
 	up->tv_usec = uu % 1000000;
 	sp->tv_sec = su / 1000000;
 	sp->tv_usec = su % 1000000;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getrusage_args {
 	int	who;
 	struct	rusage *rusage;
 };
 #endif
 int
 sys_getrusage(register struct thread *td, register struct getrusage_args *uap)
 {
 	struct rusage ru;
 	int error;
 
 	error = kern_getrusage(td, uap->who, &ru);
 	if (error == 0)
 		error = copyout(&ru, uap->rusage, sizeof(struct rusage));
 	return (error);
 }
 
 int
 kern_getrusage(struct thread *td, int who, struct rusage *rup)
 {
 	struct proc *p;
 	int error;
 
 	error = 0;
 	p = td->td_proc;
 	PROC_LOCK(p);
 	switch (who) {
 	case RUSAGE_SELF:
 		rufetchcalc(p, rup, &rup->ru_utime,
 		    &rup->ru_stime);
 		break;
 
 	case RUSAGE_CHILDREN:
 		*rup = p->p_stats->p_cru;
 		calccru(p, &rup->ru_utime, &rup->ru_stime);
 		break;
 
 	case RUSAGE_THREAD:
 		PROC_STATLOCK(p);
 		thread_lock(td);
 		rufetchtd(td, rup);
 		thread_unlock(td);
 		PROC_STATUNLOCK(p);
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 void
 rucollect(struct rusage *ru, struct rusage *ru2)
 {
 	long *ip, *ip2;
 	int i;
 
 	if (ru->ru_maxrss < ru2->ru_maxrss)
 		ru->ru_maxrss = ru2->ru_maxrss;
 	ip = &ru->ru_first;
 	ip2 = &ru2->ru_first;
 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
 		*ip++ += *ip2++;
 }
 
 void
 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
     struct rusage_ext *rux2)
 {
 
 	rux->rux_runtime += rux2->rux_runtime;
 	rux->rux_uticks += rux2->rux_uticks;
 	rux->rux_sticks += rux2->rux_sticks;
 	rux->rux_iticks += rux2->rux_iticks;
 	rux->rux_uu += rux2->rux_uu;
 	rux->rux_su += rux2->rux_su;
 	rux->rux_tu += rux2->rux_tu;
 	rucollect(ru, ru2);
 }
 
 /*
  * Aggregate tick counts into the proc's rusage_ext.
  */
 static void
 ruxagg_locked(struct rusage_ext *rux, struct thread *td)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
 	rux->rux_runtime += td->td_incruntime;
 	rux->rux_uticks += td->td_uticks;
 	rux->rux_sticks += td->td_sticks;
 	rux->rux_iticks += td->td_iticks;
 }
 
 void
 ruxagg(struct proc *p, struct thread *td)
 {
 
 	thread_lock(td);
 	ruxagg_locked(&p->p_rux, td);
 	ruxagg_locked(&td->td_rux, td);
 	td->td_incruntime = 0;
 	td->td_uticks = 0;
 	td->td_iticks = 0;
 	td->td_sticks = 0;
 	thread_unlock(td);
 }
 
 /*
  * Update the rusage_ext structure and fetch a valid aggregate rusage
  * for proc p if storage for one is supplied.
  */
 void
 rufetch(struct proc *p, struct rusage *ru)
 {
 	struct thread *td;
 
 	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 
 	*ru = p->p_ru;
 	if (p->p_numthreads > 0)  {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			ruxagg(p, td);
 			rucollect(ru, &td->td_ru);
 		}
 	}
 }
 
 /*
  * Atomically perform a rufetch and a calcru together.
  * Consumers, can safely assume the calcru is executed only once
  * rufetch is completed.
  */
 void
 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
     struct timeval *sp)
 {
 
 	PROC_STATLOCK(p);
 	rufetch(p, ru);
 	calcru(p, up, sp);
 	PROC_STATUNLOCK(p);
 }
 
 /*
  * Allocate a new resource limits structure and initialize its
  * reference count and mutex pointer.
  */
 struct plimit *
 lim_alloc()
 {
 	struct plimit *limp;
 
 	limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
 	refcount_init(&limp->pl_refcnt, 1);
 	return (limp);
 }
 
 struct plimit *
 lim_hold(struct plimit *limp)
 {
 
 	refcount_acquire(&limp->pl_refcnt);
 	return (limp);
 }
 
 static __inline int
 lim_shared(struct plimit *limp)
 {
 
 	return (limp->pl_refcnt > 1);
 }
 
 void
 lim_fork(struct proc *p1, struct proc *p2)
 {
 
 	PROC_LOCK_ASSERT(p1, MA_OWNED);
 	PROC_LOCK_ASSERT(p2, MA_OWNED);
 
 	p2->p_limit = lim_hold(p1->p_limit);
 	callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
 	if (p1->p_cpulimit != RLIM_INFINITY)
 		callout_reset_sbt(&p2->p_limco, SBT_1S, 0,
 		    lim_cb, p2, C_PREL(1));
 }
 
 void
 lim_free(struct plimit *limp)
 {
 
 	if (refcount_release(&limp->pl_refcnt))
 		free((void *)limp, M_PLIMIT);
 }
 
 /*
  * Make a copy of the plimit structure.
  * We share these structures copy-on-write after fork.
  */
 void
 lim_copy(struct plimit *dst, struct plimit *src)
 {
 
 	KASSERT(!lim_shared(dst), ("lim_copy to shared limit"));
 	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
 }
 
 /*
  * Return the hard limit for a particular system resource.  The
  * which parameter specifies the index into the rlimit array.
  */
 rlim_t
 lim_max(struct thread *td, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit(td, which, &rl);
 	return (rl.rlim_max);
 }
 
 rlim_t
 lim_max_proc(struct proc *p, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit_proc(p, which, &rl);
 	return (rl.rlim_max);
 }
 
 /*
  * Return the current (soft) limit for a particular system resource.
  * The which parameter which specifies the index into the rlimit array
  */
 rlim_t
 lim_cur(struct thread *td, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit(td, which, &rl);
 	return (rl.rlim_cur);
 }
 
 rlim_t
 lim_cur_proc(struct proc *p, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit_proc(p, which, &rl);
 	return (rl.rlim_cur);
 }
 
 /*
  * Return a copy of the entire rlimit structure for the system limit
  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
  */
 void
 lim_rlimit(struct thread *td, int which, struct rlimit *rlp)
 {
 	struct proc *p = td->td_proc;
 
 	MPASS(td == curthread);
 	KASSERT(which >= 0 && which < RLIM_NLIMITS,
 	    ("request for invalid resource limit"));
 	*rlp = td->td_limit->pl_rlimit[which];
 	if (p->p_sysent->sv_fixlimit != NULL)
 		p->p_sysent->sv_fixlimit(rlp, which);
 }
 
 void
 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(which >= 0 && which < RLIM_NLIMITS,
 	    ("request for invalid resource limit"));
 	*rlp = p->p_limit->pl_rlimit[which];
 	if (p->p_sysent->sv_fixlimit != NULL)
 		p->p_sysent->sv_fixlimit(rlp, which);
 }
 
 void
 uihashinit()
 {
 
 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
 	rw_init(&uihashtbl_lock, "uidinfo hash");
 }
 
 /*
  * Look up a uidinfo struct for the parameter uid.
  * uihashtbl_lock must be locked.
  * Increase refcount on uidinfo struct returned.
  */
 static struct uidinfo *
 uilookup(uid_t uid)
 {
 	struct uihashhead *uipp;
 	struct uidinfo *uip;
 
 	rw_assert(&uihashtbl_lock, RA_LOCKED);
 	uipp = UIHASH(uid);
 	LIST_FOREACH(uip, uipp, ui_hash)
 		if (uip->ui_uid == uid) {
 			uihold(uip);
 			break;
 		}
 
 	return (uip);
 }
 
 /*
  * Find or allocate a struct uidinfo for a particular uid.
  * Returns with uidinfo struct referenced.
  * uifree() should be called on a struct uidinfo when released.
  */
 struct uidinfo *
 uifind(uid_t uid)
 {
 	struct uidinfo *new_uip, *uip;
 
 	rw_rlock(&uihashtbl_lock);
 	uip = uilookup(uid);
 	rw_runlock(&uihashtbl_lock);
 	if (uip != NULL)
 		return (uip);
 
 	new_uip = malloc(sizeof(*new_uip), M_UIDINFO, M_WAITOK | M_ZERO);
 	racct_create(&new_uip->ui_racct);
 	refcount_init(&new_uip->ui_ref, 1);
 	new_uip->ui_uid = uid;
 	mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF);
 
 	rw_wlock(&uihashtbl_lock);
 	/*
 	 * There's a chance someone created our uidinfo while we
 	 * were in malloc and not holding the lock, so we have to
 	 * make sure we don't insert a duplicate uidinfo.
 	 */
 	if ((uip = uilookup(uid)) == NULL) {
 		LIST_INSERT_HEAD(UIHASH(uid), new_uip, ui_hash);
 		rw_wunlock(&uihashtbl_lock);
 		uip = new_uip;
 	} else {
 		rw_wunlock(&uihashtbl_lock);
 		racct_destroy(&new_uip->ui_racct);
 		mtx_destroy(&new_uip->ui_vmsize_mtx);
 		free(new_uip, M_UIDINFO);
 	}
 	return (uip);
 }
 
 /*
  * Place another refcount on a uidinfo struct.
  */
 void
 uihold(struct uidinfo *uip)
 {
 
 	refcount_acquire(&uip->ui_ref);
 }
 
 /*-
  * Since uidinfo structs have a long lifetime, we use an
  * opportunistic refcounting scheme to avoid locking the lookup hash
  * for each release.
  *
  * If the refcount hits 0, we need to free the structure,
  * which means we need to lock the hash.
  * Optimal case:
  *   After locking the struct and lowering the refcount, if we find
  *   that we don't need to free, simply unlock and return.
  * Suboptimal case:
  *   If refcount lowering results in need to free, bump the count
  *   back up, lose the lock and acquire the locks in the proper
  *   order to try again.
  */
 void
 uifree(struct uidinfo *uip)
 {
 	int old;
 
 	/* Prepare for optimal case. */
 	old = uip->ui_ref;
 	if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1))
 		return;
 
 	/* Prepare for suboptimal case. */
 	rw_wlock(&uihashtbl_lock);
 	if (refcount_release(&uip->ui_ref) == 0) {
 		rw_wunlock(&uihashtbl_lock);
 		return;
 	}
 
 	racct_destroy(&uip->ui_racct);
 	LIST_REMOVE(uip, ui_hash);
 	rw_wunlock(&uihashtbl_lock);
 
 	if (uip->ui_sbsize != 0)
 		printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
 		    uip->ui_uid, uip->ui_sbsize);
 	if (uip->ui_proccnt != 0)
 		printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
 		    uip->ui_uid, uip->ui_proccnt);
 	if (uip->ui_vmsize != 0)
 		printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
 		    uip->ui_uid, (unsigned long long)uip->ui_vmsize);
 	mtx_destroy(&uip->ui_vmsize_mtx);
 	free(uip, M_UIDINFO);
 }
 
 #ifdef RACCT
 void
 ui_racct_foreach(void (*callback)(struct racct *racct,
-    void *arg2, void *arg3), void *arg2, void *arg3)
+    void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+    void *arg2, void *arg3)
 {
 	struct uidinfo *uip;
 	struct uihashhead *uih;
 
 	rw_rlock(&uihashtbl_lock);
+	if (pre != NULL)
+		(pre)();
 	for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
 		LIST_FOREACH(uip, uih, ui_hash) {
 			(callback)(uip->ui_racct, arg2, arg3);
 		}
 	}
+	if (post != NULL)
+		(post)();
 	rw_runlock(&uihashtbl_lock);
 }
 #endif
 
 static inline int
 chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name)
 {
 
 	/* Don't allow them to exceed max, but allow subtraction. */
 	if (diff > 0 && max != 0) {
 		if (atomic_fetchadd_long(limit, (long)diff) + diff > max) {
 			atomic_subtract_long(limit, (long)diff);
 			return (0);
 		}
 	} else {
 		atomic_add_long(limit, (long)diff);
 		if (*limit < 0)
 			printf("negative %s for uid = %d\n", name, uip->ui_uid);
 	}
 	return (1);
 }
 
 /*
  * Change the count associated with number of processes
  * a given user is using.  When 'max' is 0, don't enforce a limit
  */
 int
 chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
 {
 
 	return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt"));
 }
 
 /*
  * Change the total socket buffer size a user has used.
  */
 int
 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max)
 {
 	int diff, rv;
 
 	diff = to - *hiwat;
 	if (diff > 0 && max == 0) {
 		rv = 0;
 	} else {
 		rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize");
 		if (rv != 0)
 			*hiwat = to;
 	}
 	return (rv);
 }
 
 /*
  * Change the count associated with number of pseudo-terminals
  * a given user is using.  When 'max' is 0, don't enforce a limit
  */
 int
 chgptscnt(struct uidinfo *uip, int diff, rlim_t max)
 {
 
 	return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt"));
 }
 
 int
 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max)
 {
 
 	return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt"));
 }
Index: projects/powernv/mips/atheros/apb.c
===================================================================
--- projects/powernv/mips/atheros/apb.c	(revision 290990)
+++ projects/powernv/mips/atheros/apb.c	(revision 290991)
@@ -1,541 +1,543 @@
 /*-
  * Copyright (c) 2009, Oleksandr Tymoshenko <gonzo@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/pmc.h>
 #include <sys/pmckern.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 
 #include <mips/atheros/apbvar.h>
 #include <mips/atheros/ar71xxreg.h>
 #include <mips/atheros/ar71xx_setup.h>
 
 #define	APB_INTR_PMC	5
 
 #undef APB_DEBUG
 #ifdef APB_DEBUG
 #define dprintf printf
 #else 
 #define dprintf(x, arg...)
 #endif  /* APB_DEBUG */
 
 #define	DEVTOAPB(dev)	((struct apb_ivar *) device_get_ivars(dev))
 
 static int	apb_activate_resource(device_t, device_t, int, int,
 		    struct resource *);
 static device_t	apb_add_child(device_t, u_int, const char *, int);
 static struct resource *
 		apb_alloc_resource(device_t, device_t, int, int *, u_long,
 		    u_long, u_long, u_int);
 static int	apb_attach(device_t);
 static int	apb_deactivate_resource(device_t, device_t, int, int,
 		    struct resource *);
 static struct resource_list *
 		apb_get_resource_list(device_t, device_t);
 static void	apb_hinted_child(device_t, const char *, int);
 static int	apb_filter(void *);
 static int	apb_probe(device_t);
 static int	apb_release_resource(device_t, device_t, int, int,
 		    struct resource *);
 static int	apb_setup_intr(device_t, device_t, struct resource *, int,
 		    driver_filter_t *, driver_intr_t *, void *, void **);
 static int	apb_teardown_intr(device_t, device_t, struct resource *,
 		    void *);
 
 static void 
 apb_mask_irq(void *source)
 {
 	unsigned int irq = (unsigned int)source;
 	uint32_t reg;
 
 	reg = ATH_READ_REG(AR71XX_MISC_INTR_MASK);
 	ATH_WRITE_REG(AR71XX_MISC_INTR_MASK, reg & ~(1 << irq));
 
 }
 
 static void 
 apb_unmask_irq(void *source)
 {
 	uint32_t reg;
 	unsigned int irq = (unsigned int)source;
 
 	reg = ATH_READ_REG(AR71XX_MISC_INTR_MASK);
 	ATH_WRITE_REG(AR71XX_MISC_INTR_MASK, reg | (1 << irq));
 }
 
 static int
 apb_probe(device_t dev)
 {
 
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 static int
 apb_attach(device_t dev)
 {
 	struct apb_softc *sc = device_get_softc(dev);
 	int rid = 0;
 
 	device_set_desc(dev, "APB Bus bridge");
 
 	sc->apb_mem_rman.rm_type = RMAN_ARRAY;
 	sc->apb_mem_rman.rm_descr = "APB memory window";
 
 	if (rman_init(&sc->apb_mem_rman) != 0 ||
 	    rman_manage_region(&sc->apb_mem_rman, 
 			AR71XX_APB_BASE, 
 			AR71XX_APB_BASE + AR71XX_APB_SIZE - 1) != 0)
 		panic("apb_attach: failed to set up memory rman");
 
 	sc->apb_irq_rman.rm_type = RMAN_ARRAY;
 	sc->apb_irq_rman.rm_descr = "APB IRQ";
 
 	if (rman_init(&sc->apb_irq_rman) != 0 ||
 	    rman_manage_region(&sc->apb_irq_rman, 
 			APB_IRQ_BASE, APB_IRQ_END) != 0)
 		panic("apb_attach: failed to set up IRQ rman");
 
 	if ((sc->sc_misc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, 
 	    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
 		device_printf(dev, "unable to allocate IRQ resource\n");
 		return (ENXIO);
 	}
 
 	if ((bus_setup_intr(dev, sc->sc_misc_irq, INTR_TYPE_MISC, 
 	    apb_filter, NULL, sc, &sc->sc_misc_ih))) {
 		device_printf(dev,
 		    "WARNING: unable to register interrupt handler\n");
 		return (ENXIO);
 	}
 
 	bus_generic_probe(dev);
 	bus_enumerate_hinted_children(dev);
 	bus_generic_attach(dev);
 
 	/*
 	 * Unmask performance counter IRQ
 	 */
 	apb_unmask_irq((void*)APB_INTR_PMC);
 	sc->sc_intr_counter[APB_INTR_PMC] = mips_intrcnt_create("apb irq5: pmc");
 
 	return (0);
 }
 
 static struct resource *
 apb_alloc_resource(device_t bus, device_t child, int type, int *rid,
     u_long start, u_long end, u_long count, u_int flags)
 {
 	struct apb_softc		*sc = device_get_softc(bus);
 	struct apb_ivar			*ivar = device_get_ivars(child);
 	struct resource			*rv;
 	struct resource_list_entry	*rle;
 	struct rman			*rm;
 	int				 isdefault, needactivate, passthrough;
 
 	isdefault = (start == 0UL && end == ~0UL);
 	needactivate = flags & RF_ACTIVE;
 	/*
 	 * Pass memory requests to nexus device
 	 */
 	passthrough = (device_get_parent(child) != bus);
 	rle = NULL;
 
 	dprintf("%s: entry (%p, %p, %d, %d, %p, %p, %ld, %d)\n",
 	    __func__, bus, child, type, *rid, (void *)(intptr_t)start,
 	    (void *)(intptr_t)end, count, flags);
 
 	if (passthrough)
 		return (BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type,
 		    rid, start, end, count, flags));
 
 	/*
 	 * If this is an allocation of the "default" range for a given RID,
 	 * and we know what the resources for this device are (ie. they aren't
 	 * maintained by a child bus), then work out the start/end values.
 	 */
 
 	if (isdefault) {
 		rle = resource_list_find(&ivar->resources, type, *rid);
 		if (rle == NULL) {
 			return (NULL);
 		}
 
 		if (rle->res != NULL) {
 			panic("%s: resource entry is busy", __func__);
 		}
 		start = rle->start;
 		end = rle->end;
 		count = rle->count;
 
 		dprintf("%s: default resource (%p, %p, %ld)\n",
 		    __func__, (void *)(intptr_t)start,
 		    (void *)(intptr_t)end, count);
 	}
 
 	switch (type) {
 	case SYS_RES_IRQ:
 		rm = &sc->apb_irq_rman;
 		break;
 	case SYS_RES_MEMORY:
 		rm = &sc->apb_mem_rman;
 		break;
 	default:
 		printf("%s: unknown resource type %d\n", __func__, type);
 		return (0);
 	}
 
 	rv = rman_reserve_resource(rm, start, end, count, flags, child);
 	if (rv == 0) {
 		printf("%s: could not reserve resource\n", __func__);
 		return (0);
 	}
 
 	rman_set_rid(rv, *rid);
 
 	if (needactivate) {
 		if (bus_activate_resource(child, type, *rid, rv)) {
 			printf("%s: could not activate resource\n", __func__);
 			rman_release_resource(rv);
 			return (0);
 		}
 	}
 
 	return (rv);
 }
 
 static int
 apb_activate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *r)
 {
 
 	/* XXX: should we mask/unmask IRQ here? */
 	return (BUS_ACTIVATE_RESOURCE(device_get_parent(bus), child,
 		type, rid, r));
 }
 
 static int
 apb_deactivate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *r)
 {
 
 	/* XXX: should we mask/unmask IRQ here? */
 	return (BUS_DEACTIVATE_RESOURCE(device_get_parent(bus), child,
 		type, rid, r));
 }
 
 static int
 apb_release_resource(device_t dev, device_t child, int type,
     int rid, struct resource *r)
 {
 	struct resource_list *rl;
 	struct resource_list_entry *rle;
 
 	rl = apb_get_resource_list(dev, child);
 	if (rl == NULL)
 		return (EINVAL);
 	rle = resource_list_find(rl, type, rid);
 	if (rle == NULL)
 		return (EINVAL);
 	rman_release_resource(r);
 	rle->res = NULL;
 
 	return (0);
 }
 
 static int
 apb_setup_intr(device_t bus, device_t child, struct resource *ires,
 		int flags, driver_filter_t *filt, driver_intr_t *handler,
 		void *arg, void **cookiep)
 {
 	struct apb_softc *sc = device_get_softc(bus);
 	struct intr_event *event;
 	int irq, error;
 
 	irq = rman_get_start(ires);
 
 	if (irq > APB_IRQ_END)
 		panic("%s: bad irq %d", __func__, irq);
 
 	event = sc->sc_eventstab[irq];
 	if (event == NULL) {
 		error = intr_event_create(&event, (void *)irq, 0, irq, 
 		    apb_mask_irq, apb_unmask_irq,
 		    NULL, NULL,
 		    "apb intr%d:", irq);
 
 		if (error == 0) {
 			sc->sc_eventstab[irq] = event;
 			sc->sc_intr_counter[irq] =
 			    mips_intrcnt_create(event->ie_name);
 		}
 		else
 			return (error);
 	}
 
 	intr_event_add_handler(event, device_get_nameunit(child), filt,
 	    handler, arg, intr_priority(flags), flags, cookiep);
 	mips_intrcnt_setname(sc->sc_intr_counter[irq], event->ie_fullname);
 
 	apb_unmask_irq((void*)irq);
 
 	return (0);
 }
 
 static int
 apb_teardown_intr(device_t dev, device_t child, struct resource *ires,
     void *cookie)
 {
 	struct apb_softc *sc = device_get_softc(dev);
 	int irq, result;
 
 	irq = rman_get_start(ires);
 	if (irq > APB_IRQ_END)
 		panic("%s: bad irq %d", __func__, irq);
 
 	if (sc->sc_eventstab[irq] == NULL)
 		panic("Trying to teardown unoccupied IRQ");
 
 	apb_mask_irq((void*)irq);
 
 	result = intr_event_remove_handler(cookie);
 	if (!result)
 		sc->sc_eventstab[irq] = NULL;
 
 	return (result);
 }
 
 static int
 apb_filter(void *arg)
 {
 	struct apb_softc *sc = arg;
 	struct intr_event *event;
 	uint32_t reg, irq;
 	struct thread *td;
 	struct trapframe *tf;
 
 	reg = ATH_READ_REG(AR71XX_MISC_INTR_STATUS);
 	for (irq = 0; irq < APB_NIRQS; irq++) {
 		if (reg & (1 << irq)) {
 
 			switch (ar71xx_soc) {
 			case AR71XX_SOC_AR7240:
 			case AR71XX_SOC_AR7241:
 			case AR71XX_SOC_AR7242:
 			case AR71XX_SOC_AR9330:
 			case AR71XX_SOC_AR9331:
 			case AR71XX_SOC_AR9341:
 			case AR71XX_SOC_AR9342:
 			case AR71XX_SOC_AR9344:
+			case AR71XX_SOC_QCA9533:
+			case AR71XX_SOC_QCA9533_V2:
 			case AR71XX_SOC_QCA9556:
 			case AR71XX_SOC_QCA9558:
 				/* ACK/clear the given interrupt */
 				ATH_WRITE_REG(AR71XX_MISC_INTR_STATUS,
 				    (1 << irq));
 				break;
 			default:
 				/* fallthrough */
 				break;
 			}
 
 			event = sc->sc_eventstab[irq];
 			if (!event || TAILQ_EMPTY(&event->ie_handlers)) {
 				if (irq == APB_INTR_PMC) {
 					td = PCPU_GET(curthread);
 					tf = td->td_intr_frame;
 
 					if (pmc_intr)
 						(*pmc_intr)(PCPU_GET(cpuid), tf);
 
 					mips_intrcnt_inc(sc->sc_intr_counter[irq]);
 
 					continue;
 				}
 				/* Ignore timer interrupts */
 				if (irq != 0 && irq != 8 && irq != 9 && irq != 10)
 					printf("Stray APB IRQ %d\n", irq);
 				continue;
 			}
 
 			intr_event_handle(event, PCPU_GET(curthread)->td_intr_frame);
 			mips_intrcnt_inc(sc->sc_intr_counter[irq]);
 		}
 	}
 
 	return (FILTER_HANDLED);
 }
 
 static void
 apb_hinted_child(device_t bus, const char *dname, int dunit)
 {
 	device_t		child;
 	long			maddr;
 	int			msize;
 	int			irq;
 	int			result;
 	int			mem_hints_count;
 
 	child = BUS_ADD_CHILD(bus, 0, dname, dunit);
 
 	/*
 	 * Set hard-wired resources for hinted child using
 	 * specific RIDs.
 	 */
 	mem_hints_count = 0;
 	if (resource_long_value(dname, dunit, "maddr", &maddr) == 0)
 		mem_hints_count++;
 	if (resource_int_value(dname, dunit, "msize", &msize) == 0)
 		mem_hints_count++;
 
 	/* check if all info for mem resource has been provided */
 	if ((mem_hints_count > 0) && (mem_hints_count < 2)) {
 		printf("Either maddr or msize hint is missing for %s%d\n",
 		    dname, dunit);
 	} else if (mem_hints_count) {
 		result = bus_set_resource(child, SYS_RES_MEMORY, 0,
 		    maddr, msize);
 		if (result != 0)
 			device_printf(bus, 
 			    "warning: bus_set_resource() failed\n");
 	}
 
 	if (resource_int_value(dname, dunit, "irq", &irq) == 0) {
 		result = bus_set_resource(child, SYS_RES_IRQ, 0, irq, 1);
 		if (result != 0)
 			device_printf(bus,
 			    "warning: bus_set_resource() failed\n");
 	}
 }
 
 static device_t
 apb_add_child(device_t bus, u_int order, const char *name, int unit)
 {
 	device_t		child;
 	struct apb_ivar	*ivar;
 
 	ivar = malloc(sizeof(struct apb_ivar), M_DEVBUF, M_WAITOK | M_ZERO);
 	if (ivar == NULL) {
 		printf("Failed to allocate ivar\n");
 		return (0);
 	}
 	resource_list_init(&ivar->resources);
 
 	child = device_add_child_ordered(bus, order, name, unit);
 	if (child == NULL) {
 		printf("Can't add child %s%d ordered\n", name, unit);
 		return (0);
 	}
 
 	device_set_ivars(child, ivar);
 
 	return (child);
 }
 
 /*
  * Helper routine for bus_generic_rl_get_resource/bus_generic_rl_set_resource
  * Provides pointer to resource_list for these routines
  */
 static struct resource_list *
 apb_get_resource_list(device_t dev, device_t child)
 {
 	struct apb_ivar *ivar;
 
 	ivar = device_get_ivars(child);
 	return (&(ivar->resources));
 }
 
 static int
 apb_print_all_resources(device_t dev)
 {
 	struct apb_ivar *ndev = DEVTOAPB(dev);
 	struct resource_list *rl = &ndev->resources;
 	int retval = 0;
 
 	if (STAILQ_FIRST(rl))
 		retval += printf(" at");
 
 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
 
 	return (retval);
 }
 
 static int
 apb_print_child(device_t bus, device_t child)
 {
 	int retval = 0;
 
 	retval += bus_print_child_header(bus, child);
 	retval += apb_print_all_resources(child);
 	if (device_get_flags(child))
 		retval += printf(" flags %#x", device_get_flags(child));
 	retval += printf(" on %s\n", device_get_nameunit(bus));
 
 	return (retval);
 }
 
 
 static device_method_t apb_methods[] = {
 	DEVMETHOD(bus_activate_resource,	apb_activate_resource),
 	DEVMETHOD(bus_add_child,		apb_add_child),
 	DEVMETHOD(bus_alloc_resource,		apb_alloc_resource),
 	DEVMETHOD(bus_deactivate_resource,	apb_deactivate_resource),
 	DEVMETHOD(bus_get_resource_list,	apb_get_resource_list),
 	DEVMETHOD(bus_hinted_child,		apb_hinted_child),
 	DEVMETHOD(bus_release_resource,		apb_release_resource),
 	DEVMETHOD(bus_setup_intr,		apb_setup_intr),
 	DEVMETHOD(bus_teardown_intr,		apb_teardown_intr),
 	DEVMETHOD(device_attach,		apb_attach),
 	DEVMETHOD(device_probe,			apb_probe),
 	DEVMETHOD(bus_get_resource,		bus_generic_rl_get_resource),
 	DEVMETHOD(bus_set_resource,		bus_generic_rl_set_resource),
 	DEVMETHOD(bus_print_child,		apb_print_child),
 
 	DEVMETHOD_END
 };
 
 static driver_t apb_driver = {
 	"apb",
 	apb_methods,
 	sizeof(struct apb_softc),
 };
 static devclass_t apb_devclass;
 
 DRIVER_MODULE(apb, nexus, apb_driver, apb_devclass, 0, 0);
Index: projects/powernv/mips/atheros/ar71xx_ehci.c
===================================================================
--- projects/powernv/mips/atheros/ar71xx_ehci.c	(revision 290990)
+++ projects/powernv/mips/atheros/ar71xx_ehci.c	(revision 290991)
@@ -1,276 +1,278 @@
 /*-
  * Copyright (c) 2008 Sam Leffler.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * AR71XX attachment driver for the USB Enhanced Host Controller.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bus.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/condvar.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 
 #include <machine/bus.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 
 #include <dev/usb/usb_core.h>
 #include <dev/usb/usb_busdma.h>
 #include <dev/usb/usb_process.h>
 #include <dev/usb/usb_util.h>
 
 #include <dev/usb/usb_controller.h>
 #include <dev/usb/usb_bus.h>
 #include <dev/usb/controller/ehci.h>
 #include <dev/usb/controller/ehcireg.h>
 
 #include <mips/atheros/ar71xx_setup.h>
 #include <mips/atheros/ar71xxreg.h> /* for stuff in ar71xx_cpudef.h */
 #include <mips/atheros/ar71xx_cpudef.h>
 #include <mips/atheros/ar71xx_bus_space_reversed.h>
 
 #define EHCI_HC_DEVSTR		"AR71XX Integrated USB 2.0 controller"
 
 struct ar71xx_ehci_softc {
 	ehci_softc_t		base;	/* storage for EHCI code */
 };
 
 static device_attach_t ar71xx_ehci_attach;
 static device_detach_t ar71xx_ehci_detach;
 
 bs_r_1_proto(reversed);
 bs_w_1_proto(reversed);
 
 static int
 ar71xx_ehci_probe(device_t self)
 {
 
 	device_set_desc(self, EHCI_HC_DEVSTR);
 
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 static void
 ar71xx_ehci_intr(void *arg)
 {
 
 	/* XXX TODO: should really see if this was our interrupt.. */
 	ar71xx_device_flush_ddr(AR71XX_CPU_DDR_FLUSH_USB);
 	ehci_interrupt(arg);
 }
 
 static int
 ar71xx_ehci_attach(device_t self)
 {
 	struct ar71xx_ehci_softc *isc = device_get_softc(self);
 	ehci_softc_t *sc = &isc->base;
 	int err;
 	int rid;
 
 	/* initialise some bus fields */
 	sc->sc_bus.parent = self;
 	sc->sc_bus.devices = sc->sc_devices;
 	sc->sc_bus.devices_max = EHCI_MAX_DEVICES;
 	sc->sc_bus.dma_bits = 32;
 
 	/* get all DMA memory */
 	if (usb_bus_mem_alloc_all(&sc->sc_bus,
 	    USB_GET_DMA_TAG(self), &ehci_iterate_hw_softc)) {
 		return (ENOMEM);
 	}
 
 	sc->sc_bus.usbrev = USB_REV_2_0;
 
 	/* NB: hints fix the memory location and irq */
 
 	rid = 0;
 	sc->sc_io_res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 	if (!sc->sc_io_res) {
 		device_printf(self, "Could not map memory\n");
 		goto error;
 	}
 
 	/*
 	 * Craft special resource for bus space ops that handle
 	 * byte-alignment of non-word addresses.  
 	 */
 	sc->sc_io_tag = ar71xx_bus_space_reversed;
 	sc->sc_io_hdl = rman_get_bushandle(sc->sc_io_res);
 	sc->sc_io_size = rman_get_size(sc->sc_io_res);
 
 	rid = 0;
 	sc->sc_irq_res = bus_alloc_resource_any(self, SYS_RES_IRQ, &rid,
 	    RF_ACTIVE | RF_SHAREABLE);
 	if (sc->sc_irq_res == NULL) {
 		device_printf(self, "Could not allocate irq\n");
 		goto error;
 	}
 	sc->sc_bus.bdev = device_add_child(self, "usbus", -1);
 	if (!sc->sc_bus.bdev) {
 		device_printf(self, "Could not add USB device\n");
 		goto error;
 	}
 	device_set_ivars(sc->sc_bus.bdev, &sc->sc_bus);
 	device_set_desc(sc->sc_bus.bdev, EHCI_HC_DEVSTR);
 
 	sprintf(sc->sc_vendor, "Atheros");
 
 	err = bus_setup_intr(self, sc->sc_irq_res, INTR_TYPE_BIO | INTR_MPSAFE,
 	    NULL, ar71xx_ehci_intr, sc, &sc->sc_intr_hdl);
 	if (err) {
 		device_printf(self, "Could not setup irq, %d\n", err);
 		sc->sc_intr_hdl = NULL;
 		goto error;
 	}
 
 	/*
 	 * Arrange to force Host mode, select big-endian byte alignment,
 	 * and arrange to not terminate reset operations (the adapter
 	 * will ignore it if we do but might as well save a reg write).
 	 * Also, the controller has an embedded Transaction Translator
 	 * which means port speed must be read from the Port Status
 	 * register following a port enable.
 	 */
 	sc->sc_flags = EHCI_SCFLG_SETMODE;
 
 	switch (ar71xx_soc) {
 		case AR71XX_SOC_AR7241:
 		case AR71XX_SOC_AR7242:
 		case AR71XX_SOC_AR9130:
 		case AR71XX_SOC_AR9132:
 		case AR71XX_SOC_AR9330:
 		case AR71XX_SOC_AR9331:
 		case AR71XX_SOC_AR9341:
 		case AR71XX_SOC_AR9342:
 		case AR71XX_SOC_AR9344:
+		case AR71XX_SOC_QCA9533:
+		case AR71XX_SOC_QCA9533_V2:
 		case AR71XX_SOC_QCA9556:
 		case AR71XX_SOC_QCA9558:
 			sc->sc_flags |= EHCI_SCFLG_TT | EHCI_SCFLG_NORESTERM;
 			break;
 		default:
 			/* fallthrough */
 			break;
 	}
 
 	/*
 	 * ehci_reset() needs the correct offset to access the host controller
 	 * registers. The AR724x/AR913x offsets aren't 0.
 	*/
 	sc->sc_offs = EHCI_CAPLENGTH(EREAD4(sc, EHCI_CAPLEN_HCIVERSION));
 
 	(void) ehci_reset(sc);
 
 	err = ehci_init(sc);
 	if (!err) {
 		err = device_probe_and_attach(sc->sc_bus.bdev);
 	}
 	if (err) {
 		device_printf(self, "USB init failed err=%d\n", err);
 		goto error;
 	}
 	return (0);
 
 error:
 	ar71xx_ehci_detach(self);
 	return (ENXIO);
 }
 
 static int
 ar71xx_ehci_detach(device_t self)
 {
 	struct ar71xx_ehci_softc *isc = device_get_softc(self);
 	ehci_softc_t *sc = &isc->base;
 	device_t bdev;
 	int err;
 
  	if (sc->sc_bus.bdev) {
 		bdev = sc->sc_bus.bdev;
 		device_detach(bdev);
 		device_delete_child(self, bdev);
 	}
 	/* during module unload there are lots of children leftover */
 	device_delete_children(self);
 
  	if (sc->sc_irq_res && sc->sc_intr_hdl) {
 		/*
 		 * only call ehci_detach() after ehci_init()
 		 */
 		ehci_detach(sc);
 
 		err = bus_teardown_intr(self, sc->sc_irq_res, sc->sc_intr_hdl);
 
 		if (err)
 			/* XXX or should we panic? */
 			device_printf(self, "Could not tear down irq, %d\n",
 			    err);
 		sc->sc_intr_hdl = NULL;
 	}
 
  	if (sc->sc_irq_res) {
 		bus_release_resource(self, SYS_RES_IRQ, 0, sc->sc_irq_res);
 		sc->sc_irq_res = NULL;
 	}
 	if (sc->sc_io_res) {
 		bus_release_resource(self, SYS_RES_MEMORY, 0,
 		    sc->sc_io_res);
 		sc->sc_io_res = NULL;
 	}
 	usb_bus_mem_free_all(&sc->sc_bus, &ehci_iterate_hw_softc);
 
 	return (0);
 }
 
 static device_method_t ehci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, ar71xx_ehci_probe),
 	DEVMETHOD(device_attach, ar71xx_ehci_attach),
 	DEVMETHOD(device_detach, ar71xx_ehci_detach),
 	DEVMETHOD(device_suspend, bus_generic_suspend),
 	DEVMETHOD(device_resume, bus_generic_resume),
 	DEVMETHOD(device_shutdown, bus_generic_shutdown),
 
 	DEVMETHOD_END
 };
 
 static driver_t ehci_driver = {
 	.name = "ehci",
 	.methods = ehci_methods,
 	.size = sizeof(struct ar71xx_ehci_softc),
 };
 
 static devclass_t ehci_devclass;
 
 DRIVER_MODULE(ehci, nexus, ehci_driver, ehci_devclass, 0, 0);
 DRIVER_MODULE(ehci, apb, ehci_driver, ehci_devclass, 0, 0);
 
 MODULE_DEPEND(ehci, usb, 1, 1, 1);
Index: projects/powernv/mips/atheros/ar71xx_gpio.c
===================================================================
--- projects/powernv/mips/atheros/ar71xx_gpio.c	(revision 290990)
+++ projects/powernv/mips/atheros/ar71xx_gpio.c	(revision 290991)
@@ -1,572 +1,589 @@
 /*-
  * Copyright (c) 2009, Oleksandr Tymoshenko <gonzo@FreeBSD.org>
  * Copyright (c) 2009, Luiz Otavio O Souza. 
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * GPIO driver for AR71xx 
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/gpio.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <mips/atheros/ar71xxreg.h>
 #include <mips/atheros/ar71xx_setup.h>
 #include <mips/atheros/ar71xx_cpudef.h>
 #include <mips/atheros/ar71xx_gpiovar.h>
 #include <dev/gpio/gpiobusvar.h>
 #include <mips/atheros/ar933xreg.h>
 #include <mips/atheros/ar934xreg.h>
+#include <mips/atheros/qca953xreg.h>
 #include <mips/atheros/qca955xreg.h>
 
 #include "gpio_if.h"
 
 #define	DEFAULT_CAPS	(GPIO_PIN_INPUT | GPIO_PIN_OUTPUT)
 
 /*
  * Helpers
  */
 static void ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, 
     uint32_t mask);
 static void ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, 
     uint32_t mask);
 static void ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, 
     struct gpio_pin *pin, uint32_t flags);
 
 /*
  * Driver stuff
  */
 static int ar71xx_gpio_probe(device_t dev);
 static int ar71xx_gpio_attach(device_t dev);
 static int ar71xx_gpio_detach(device_t dev);
 static int ar71xx_gpio_filter(void *arg);
 static void ar71xx_gpio_intr(void *arg);
 
 /*
  * GPIO interface
  */
 static device_t ar71xx_gpio_get_bus(device_t);
 static int ar71xx_gpio_pin_max(device_t dev, int *maxpin);
 static int ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps);
 static int ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t
     *flags);
 static int ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name);
 static int ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags);
 static int ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value);
 static int ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val);
 static int ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin);
 
 /*
  * Enable/disable the GPIO function control space.
  *
  * This is primarily for the AR71xx, which has SPI CS1/CS2, UART, SLIC, I2S
  * as GPIO pin options.
  */
 static void
 ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, uint32_t mask)
 {
+
+	/*
+	 * XXX TODO: refactor this out into a per-chipset method.
+	 */
 	if (ar71xx_soc == AR71XX_SOC_AR9341 ||
 	    ar71xx_soc == AR71XX_SOC_AR9342 ||
 	    ar71xx_soc == AR71XX_SOC_AR9344 ||
+	    ar71xx_soc == AR71XX_SOC_QCA9533 ||
+	    ar71xx_soc == AR71XX_SOC_QCA9533_V2 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9556 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9558)
 		GPIO_SET_BITS(sc, AR934X_GPIO_REG_FUNC, mask);
 	else
 		GPIO_SET_BITS(sc, AR71XX_GPIO_FUNCTION, mask);
 }
 
 static void
 ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, uint32_t mask)
 {
+
+	/*
+	 * XXX TODO: refactor this out into a per-chipset method.
+	 */
 	if (ar71xx_soc == AR71XX_SOC_AR9341 ||
 	    ar71xx_soc == AR71XX_SOC_AR9342 ||
 	    ar71xx_soc == AR71XX_SOC_AR9344 ||
+	    ar71xx_soc == AR71XX_SOC_QCA9533 ||
+	    ar71xx_soc == AR71XX_SOC_QCA9533_V2 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9556 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9558)
 		GPIO_CLEAR_BITS(sc, AR934X_GPIO_REG_FUNC, mask);
 	else
 		GPIO_CLEAR_BITS(sc, AR71XX_GPIO_FUNCTION, mask);
 }
 
 static void
 ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin,
     unsigned int flags)
 {
 	uint32_t mask;
 
 	mask = 1 << pin->gp_pin;
 
 	/*
 	 * Manage input/output
 	 */
 	if (flags & (GPIO_PIN_INPUT|GPIO_PIN_OUTPUT)) {
 		pin->gp_flags &= ~(GPIO_PIN_INPUT|GPIO_PIN_OUTPUT);
 		if (flags & GPIO_PIN_OUTPUT) {
 			pin->gp_flags |= GPIO_PIN_OUTPUT;
 			GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask);
 		}
 		else {
 			pin->gp_flags |= GPIO_PIN_INPUT;
 			GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask);
 		}
 	}
 }
 
 static device_t
 ar71xx_gpio_get_bus(device_t dev)
 {
 	struct ar71xx_gpio_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (sc->busdev);
 }
 
 static int
 ar71xx_gpio_pin_max(device_t dev, int *maxpin)
 {
 
 	switch (ar71xx_soc) {
 		case AR71XX_SOC_AR9130:
 		case AR71XX_SOC_AR9132:
 			*maxpin = AR91XX_GPIO_PINS - 1;
 			break;
 		case AR71XX_SOC_AR7240:
 		case AR71XX_SOC_AR7241:
 		case AR71XX_SOC_AR7242:
 			*maxpin = AR724X_GPIO_PINS - 1;
 			break;
 		case AR71XX_SOC_AR9330:
 		case AR71XX_SOC_AR9331:
 			*maxpin = AR933X_GPIO_COUNT - 1;
 			break;
 		case AR71XX_SOC_AR9341:
 		case AR71XX_SOC_AR9342:
 		case AR71XX_SOC_AR9344:
 			*maxpin = AR934X_GPIO_COUNT - 1;
+			break;
+		case AR71XX_SOC_QCA9533:
+		case AR71XX_SOC_QCA9533_V2:
+			*maxpin = QCA953X_GPIO_COUNT - 1;
 			break;
 		case AR71XX_SOC_QCA9556:
 		case AR71XX_SOC_QCA9558:
 			*maxpin = QCA955X_GPIO_COUNT - 1;
 			break;
 		default:
 			*maxpin = AR71XX_GPIO_PINS - 1;
 	}
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	GPIO_LOCK(sc);
 	*caps = sc->gpio_pins[i].gp_caps;
 	GPIO_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	GPIO_LOCK(sc);
 	*flags = sc->gpio_pins[i].gp_flags;
 	GPIO_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	GPIO_LOCK(sc);
 	memcpy(name, sc->gpio_pins[i].gp_name, GPIOMAXNAME);
 	GPIO_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags)
 {
 	int i;
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	ar71xx_gpio_pin_configure(sc, &sc->gpio_pins[i], flags);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	if (value)
 		GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin));
 	else
 		GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin));
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	*val = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0;
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin)
 {
 	int res, i;
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	res = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0;
 	if (res)
 		GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin));
 	else
 		GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin));
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_filter(void *arg)
 {
 
 	/* TODO: something useful */
 	return (FILTER_STRAY);
 }
 
 
 
 static void
 ar71xx_gpio_intr(void *arg)
 {
 	struct ar71xx_gpio_softc *sc = arg;
 	GPIO_LOCK(sc);
 	/* TODO: something useful */
 	GPIO_UNLOCK(sc);
 }
 
 static int
 ar71xx_gpio_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Atheros AR71XX GPIO driver");
 	return (0);
 }
 
 static int
 ar71xx_gpio_attach(device_t dev)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i, j, maxpin;
 	int mask, pinon;
 	uint32_t oe;
 
 	KASSERT((device_get_unit(dev) == 0),
 	    ("ar71xx_gpio: Only one gpio module supported"));
 
 	mtx_init(&sc->gpio_mtx, device_get_nameunit(dev), NULL, MTX_DEF);
 
 	/* Map control/status registers. */
 	sc->gpio_mem_rid = 0;
 	sc->gpio_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &sc->gpio_mem_rid, RF_ACTIVE);
 
 	if (sc->gpio_mem_res == NULL) {
 		device_printf(dev, "couldn't map memory\n");
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	if ((sc->gpio_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 
 	    &sc->gpio_irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
 		device_printf(dev, "unable to allocate IRQ resource\n");
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	if ((bus_setup_intr(dev, sc->gpio_irq_res, INTR_TYPE_MISC, 
 	    ar71xx_gpio_filter, ar71xx_gpio_intr, sc, &sc->gpio_ih))) {
 		device_printf(dev,
 		    "WARNING: unable to register interrupt handler\n");
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	sc->dev = dev;
 
 	/* Enable function bits that are required */
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "function_set", &mask) == 0) {
 		device_printf(dev, "function_set: 0x%x\n", mask);
 		ar71xx_gpio_function_enable(sc, mask);
 	}
 	/* Disable function bits that are required */
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "function_clear", &mask) == 0) {
 		device_printf(dev, "function_clear: 0x%x\n", mask);
 		ar71xx_gpio_function_disable(sc, mask);
 	}
 
 	/* Disable interrupts for all pins. */
 	GPIO_WRITE(sc, AR71XX_GPIO_INT_MASK, 0);
 
 	/* Initialise all pins specified in the mask, up to the pin count */
 	(void) ar71xx_gpio_pin_max(dev, &maxpin);
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "pinmask", &mask) != 0)
 		mask = 0;
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "pinon", &pinon) != 0)
 		pinon = 0;
 	device_printf(dev, "gpio pinmask=0x%x\n", mask);
 	for (j = 0; j <= maxpin; j++) {
 		if ((mask & (1 << j)) == 0)
 			continue;
 		sc->gpio_npins++;
 	}
 	/* Iniatilize the GPIO pins, keep the loader settings. */
 	oe = GPIO_READ(sc, AR71XX_GPIO_OE);
 	sc->gpio_pins = malloc(sizeof(*sc->gpio_pins) * sc->gpio_npins,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 	for (i = 0, j = 0; j <= maxpin; j++) {
 		if ((mask & (1 << j)) == 0)
 			continue;
 		snprintf(sc->gpio_pins[i].gp_name, GPIOMAXNAME,
 		    "pin %d", j);
 		sc->gpio_pins[i].gp_pin = j;
 		sc->gpio_pins[i].gp_caps = DEFAULT_CAPS;
 		if (oe & (1 << j))
 			sc->gpio_pins[i].gp_flags = GPIO_PIN_OUTPUT;
 		else
 			sc->gpio_pins[i].gp_flags = GPIO_PIN_INPUT;
 		i++;
 	}
 
 	/* Turn on the hinted pins. */
 	for (i = 0; i < sc->gpio_npins; i++) {
 		j = sc->gpio_pins[i].gp_pin;
 		if ((pinon & (1 << j)) != 0) {
 			ar71xx_gpio_pin_setflags(dev, j, GPIO_PIN_OUTPUT);
 			ar71xx_gpio_pin_set(dev, j, 1);
 		}
 	}
 
 	/*
 	 * Search through the function hints, in case there's some
 	 * overrides such as LNA control.
 	 *
 	 * hint.gpio.X.func.<pin>.gpiofunc=<func value>
 	 * hint.gpio.X.func.<pin>.gpiomode=1 (for output, default low)
 	 */
 	for (i = 0; i <= maxpin; i++) {
 		char buf[32];
 		int gpiofunc, gpiomode;
 
 		snprintf(buf, 32, "func.%d.gpiofunc", i);
 		if (resource_int_value(device_get_name(dev),
 		    device_get_unit(dev),
 		    buf,
 		    &gpiofunc) != 0)
 			continue;
 		/* Get the mode too */
 		snprintf(buf, 32, "func.%d.gpiomode", i);
 		if (resource_int_value(device_get_name(dev),
 		    device_get_unit(dev),
 		    buf,
 		    &gpiomode) != 0)
 			continue;
 
 		/* We only handle mode=1 for now */
 		if (gpiomode != 1)
 			continue;
 
 		device_printf(dev, "%s: GPIO %d: func=%d, mode=%d\n",
 		    __func__,
 		    i,
 		    gpiofunc,
 		    gpiomode);
 
 		/* Set output (bit == 0) */
 		oe = GPIO_READ(sc, AR71XX_GPIO_OE);
 		oe &= ~ (1 << i);
 		GPIO_WRITE(sc, AR71XX_GPIO_OE, oe);
 
 		/* Set pin value = 0, so it stays low by default */
 		oe = GPIO_READ(sc, AR71XX_GPIO_OUT);
 		oe &= ~ (1 << i);
 		GPIO_WRITE(sc, AR71XX_GPIO_OUT, oe);
 
 		/* Finally: Set the output config */
 		ar71xx_gpio_ouput_configure(i, gpiofunc);
 	}
 
 	sc->busdev = gpiobus_attach_bus(dev);
 	if (sc->busdev == NULL) {
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_detach(device_t dev)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 
 	KASSERT(mtx_initialized(&sc->gpio_mtx), ("gpio mutex not initialized"));
 
 	gpiobus_detach_bus(dev);
 	if (sc->gpio_ih)
 		bus_teardown_intr(dev, sc->gpio_irq_res, sc->gpio_ih);
 	if (sc->gpio_irq_res)
 		bus_release_resource(dev, SYS_RES_IRQ, sc->gpio_irq_rid,
 		    sc->gpio_irq_res);
 	if (sc->gpio_mem_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->gpio_mem_rid,
 		    sc->gpio_mem_res);
 	if (sc->gpio_pins)
 		free(sc->gpio_pins, M_DEVBUF);
 	mtx_destroy(&sc->gpio_mtx);
 
 	return(0);
 }
 
 static device_method_t ar71xx_gpio_methods[] = {
 	DEVMETHOD(device_probe, ar71xx_gpio_probe),
 	DEVMETHOD(device_attach, ar71xx_gpio_attach),
 	DEVMETHOD(device_detach, ar71xx_gpio_detach),
 
 	/* GPIO protocol */
 	DEVMETHOD(gpio_get_bus, ar71xx_gpio_get_bus),
 	DEVMETHOD(gpio_pin_max, ar71xx_gpio_pin_max),
 	DEVMETHOD(gpio_pin_getname, ar71xx_gpio_pin_getname),
 	DEVMETHOD(gpio_pin_getflags, ar71xx_gpio_pin_getflags),
 	DEVMETHOD(gpio_pin_getcaps, ar71xx_gpio_pin_getcaps),
 	DEVMETHOD(gpio_pin_setflags, ar71xx_gpio_pin_setflags),
 	DEVMETHOD(gpio_pin_get, ar71xx_gpio_pin_get),
 	DEVMETHOD(gpio_pin_set, ar71xx_gpio_pin_set),
 	DEVMETHOD(gpio_pin_toggle, ar71xx_gpio_pin_toggle),
 	{0, 0},
 };
 
 static driver_t ar71xx_gpio_driver = {
 	"gpio",
 	ar71xx_gpio_methods,
 	sizeof(struct ar71xx_gpio_softc),
 };
 static devclass_t ar71xx_gpio_devclass;
 
 DRIVER_MODULE(ar71xx_gpio, apb, ar71xx_gpio_driver, ar71xx_gpio_devclass, 0, 0);
Index: projects/powernv/mips/atheros/ar71xx_setup.c
===================================================================
--- projects/powernv/mips/atheros/ar71xx_setup.c	(revision 290990)
+++ projects/powernv/mips/atheros/ar71xx_setup.c	(revision 290991)
@@ -1,217 +1,235 @@
 /*-
  * Copyright (c) 2010 Adrian Chadd
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/kdb.h>
 #include <sys/reboot.h>
  
 #include <vm/vm.h>
 #include <vm/vm_page.h>
  
 #include <net/ethernet.h>
  
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/md_var.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
  
 #include <mips/atheros/ar71xxreg.h>
 #include <mips/atheros/ar933xreg.h>
 #include <mips/atheros/ar934xreg.h>
 #include <mips/atheros/qca955xreg.h>
+#include <mips/atheros/qca953xreg.h>
 
 #include <mips/atheros/ar71xx_setup.h>
 
 #include <mips/atheros/ar71xx_cpudef.h>
 
 #include <mips/atheros/ar71xx_chip.h>
 #include <mips/atheros/ar724x_chip.h>
 #include <mips/atheros/ar91xx_chip.h>
 #include <mips/atheros/ar933x_chip.h>
 #include <mips/atheros/ar934x_chip.h>
+#include <mips/atheros/qca953x_chip.h>
 #include <mips/atheros/qca955x_chip.h>
 
 #define	AR71XX_SYS_TYPE_LEN		128
 
 static char ar71xx_sys_type[AR71XX_SYS_TYPE_LEN];
 enum ar71xx_soc_type ar71xx_soc;
 struct ar71xx_cpu_def * ar71xx_cpu_ops = NULL;
 
 void
 ar71xx_detect_sys_type(void)
 {
 	char *chip = "????";
 	uint32_t id;
 	uint32_t major;
 	uint32_t minor;
 	uint32_t rev = 0;
 
 	id = ATH_READ_REG(AR71XX_RST_RESET_REG_REV_ID);
 	major = id & REV_ID_MAJOR_MASK;
 
 	switch (major) {
 	case REV_ID_MAJOR_AR71XX:
 		minor = id & AR71XX_REV_ID_MINOR_MASK;
 		rev = id >> AR71XX_REV_ID_REVISION_SHIFT;
 		rev &= AR71XX_REV_ID_REVISION_MASK;
 		ar71xx_cpu_ops = &ar71xx_chip_def;
 		switch (minor) {
 		case AR71XX_REV_ID_MINOR_AR7130:
 			ar71xx_soc = AR71XX_SOC_AR7130;
 			chip = "7130";
 			break;
 
 		case AR71XX_REV_ID_MINOR_AR7141:
 			ar71xx_soc = AR71XX_SOC_AR7141;
 			chip = "7141";
 			break;
 
 		case AR71XX_REV_ID_MINOR_AR7161:
 			ar71xx_soc = AR71XX_SOC_AR7161;
 			chip = "7161";
 			break;
 		}
 		break;
 
 	case REV_ID_MAJOR_AR7240:
 		ar71xx_soc = AR71XX_SOC_AR7240;
 		chip = "7240";
 		ar71xx_cpu_ops = &ar724x_chip_def;
 		rev = (id & AR724X_REV_ID_REVISION_MASK);
 		break;
 
 	case REV_ID_MAJOR_AR7241:
 		ar71xx_soc = AR71XX_SOC_AR7241;
 		chip = "7241";
 		ar71xx_cpu_ops = &ar724x_chip_def;
 		rev = (id & AR724X_REV_ID_REVISION_MASK);
 		break;
 
 	case REV_ID_MAJOR_AR7242:
 		ar71xx_soc = AR71XX_SOC_AR7242;
 		chip = "7242";
 		ar71xx_cpu_ops = &ar724x_chip_def;
 		rev = (id & AR724X_REV_ID_REVISION_MASK);
 		break;
 
 	case REV_ID_MAJOR_AR913X:
 		minor = id & AR91XX_REV_ID_MINOR_MASK;
 		rev = id >> AR91XX_REV_ID_REVISION_SHIFT;
 		rev &= AR91XX_REV_ID_REVISION_MASK;
 		ar71xx_cpu_ops = &ar91xx_chip_def;
 		switch (minor) {
 		case AR91XX_REV_ID_MINOR_AR9130:
 			ar71xx_soc = AR71XX_SOC_AR9130;
 			chip = "9130";
 			break;
 
 		case AR91XX_REV_ID_MINOR_AR9132:
 			ar71xx_soc = AR71XX_SOC_AR9132;
 			chip = "9132";
 			break;
 		}
 		break;
 	case REV_ID_MAJOR_AR9330:
 		minor = 0;
 		rev = (id & AR933X_REV_ID_REVISION_MASK);
 		chip = "9330";
 		ar71xx_cpu_ops = &ar933x_chip_def;
 		ar71xx_soc = AR71XX_SOC_AR9330;
 		break;
 	case REV_ID_MAJOR_AR9331:
 		minor = 1;
 		rev = (id & AR933X_REV_ID_REVISION_MASK);
 		chip = "9331";
 		ar71xx_soc = AR71XX_SOC_AR9331;
 		ar71xx_cpu_ops = &ar933x_chip_def;
 		break;
 
 	case REV_ID_MAJOR_AR9341:
 		minor = 0;
 		rev = (id & AR934X_REV_ID_REVISION_MASK);
 		chip = "9341";
 		ar71xx_soc = AR71XX_SOC_AR9341;
 		ar71xx_cpu_ops = &ar934x_chip_def;
 		break;
 
 	case REV_ID_MAJOR_AR9342:
 		minor = 0;
 		rev = (id & AR934X_REV_ID_REVISION_MASK);
 		chip = "9342";
 		ar71xx_soc = AR71XX_SOC_AR9342;
 		ar71xx_cpu_ops = &ar934x_chip_def;
 		break;
 
 	case REV_ID_MAJOR_AR9344:
 		minor = 0;
 		rev = (id & AR934X_REV_ID_REVISION_MASK);
 		chip = "9344";
 		ar71xx_soc = AR71XX_SOC_AR9344;
 		ar71xx_cpu_ops = &ar934x_chip_def;
+		break;
+
+	case REV_ID_MAJOR_QCA9533:
+		minor = 0;
+		rev = (id & QCA953X_REV_ID_REVISION_MASK);
+		chip = "9533";
+		ar71xx_soc = AR71XX_SOC_QCA9533;
+		ar71xx_cpu_ops = &qca953x_chip_def;
+		break;
+
+	case REV_ID_MAJOR_QCA9533_V2:
+		minor = 0;
+		rev = (id & QCA953X_REV_ID_REVISION_MASK);
+		chip = "9533v2";
+		ar71xx_soc = AR71XX_SOC_QCA9533_V2;
+		ar71xx_cpu_ops = &qca953x_chip_def;
 		break;
 
 	case REV_ID_MAJOR_QCA9556:
 		minor = 0;
 		rev = (id & QCA955X_REV_ID_REVISION_MASK);
 		chip = "9556";
 		ar71xx_soc = AR71XX_SOC_QCA9556;
 		ar71xx_cpu_ops = &qca955x_chip_def;
 		break;
 
 	case REV_ID_MAJOR_QCA9558:
 		minor = 0;
 		rev = (id & QCA955X_REV_ID_REVISION_MASK);
 		chip = "9558";
 		ar71xx_soc = AR71XX_SOC_QCA9558;
 		ar71xx_cpu_ops = &qca955x_chip_def;
 		break;
 
 	default:
 		panic("ar71xx: unknown chip id:0x%08x\n", id);
 	}
 
 	sprintf(ar71xx_sys_type, "Atheros AR%s rev %u", chip, rev);
 }
 
 const char *
 ar71xx_get_system_type(void)
 {
 	return ar71xx_sys_type;
 }
 
Index: projects/powernv/mips/atheros/ar71xx_setup.h
===================================================================
--- projects/powernv/mips/atheros/ar71xx_setup.h	(revision 290990)
+++ projects/powernv/mips/atheros/ar71xx_setup.h	(revision 290991)
@@ -1,55 +1,57 @@
 /*-
  * Copyright (c) 2010 Adrian Chadd
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* $FreeBSD$ */
 
 #ifndef	__AR71XX_SETUP_H__
 #define	__AR71XX_SETUP_H__
 
 enum ar71xx_soc_type {
 	AR71XX_SOC_UNKNOWN,
 	AR71XX_SOC_AR7130,
 	AR71XX_SOC_AR7141,
 	AR71XX_SOC_AR7161,
 	AR71XX_SOC_AR7240,
 	AR71XX_SOC_AR7241,
 	AR71XX_SOC_AR7242,
 	AR71XX_SOC_AR9130,
 	AR71XX_SOC_AR9132,
 	AR71XX_SOC_AR9330,
 	AR71XX_SOC_AR9331,
 	AR71XX_SOC_AR9341,
 	AR71XX_SOC_AR9342,
 	AR71XX_SOC_AR9344,
 	AR71XX_SOC_QCA9556,
 	AR71XX_SOC_QCA9558,
+	AR71XX_SOC_QCA9533,
+	AR71XX_SOC_QCA9533_V2,
 };
 extern enum ar71xx_soc_type ar71xx_soc;
 
 extern void ar71xx_detect_sys_type(void);
 extern const char *ar71xx_get_system_type(void);
 
 #endif
Index: projects/powernv/mips/atheros/files.ar71xx
===================================================================
--- projects/powernv/mips/atheros/files.ar71xx	(revision 290990)
+++ projects/powernv/mips/atheros/files.ar71xx	(revision 290991)
@@ -1,33 +1,34 @@
 # $FreeBSD$
 
 mips/atheros/apb.c		optional ar71xx_apb
 mips/atheros/ar71xx_gpio.c	optional gpio
 mips/atheros/ar71xx_machdep.c	standard
 mips/atheros/ar71xx_ehci.c	optional ehci
 mips/atheros/ar71xx_ohci.c	optional ohci
 mips/atheros/ar71xx_pci.c	optional ar71xx_pci pci
 mips/atheros/ar724x_pci.c	optional ar724x_pci pci
 mips/atheros/ar71xx_pci_bus_space.c	optional pci
 mips/atheros/ar71xx_spi.c	optional ar71xx_spi
 mips/atheros/ar71xx_macaddr.c	standard
 mips/atheros/pcf2123_rtc.c	optional pcf2123_rtc ar71xx_spi
 mips/atheros/ar71xx_wdog.c	optional ar71xx_wdog
 mips/atheros/if_arge.c		optional arge
 mips/atheros/uart_bus_ar71xx.c	optional uart_ar71xx
 mips/atheros/uart_cpu_ar71xx.c	optional uart_ar71xx
 mips/atheros/uart_bus_ar933x.c	optional uart_ar933x
 mips/atheros/uart_cpu_ar933x.c	optional uart_ar933x
 mips/atheros/uart_dev_ar933x.c	optional uart_ar933x
 mips/atheros/ar71xx_bus_space_reversed.c	standard
 mips/mips/intr_machdep.c        standard
 mips/mips/tick.c		standard
 mips/atheros/ar71xx_setup.c	standard
 mips/atheros/ar71xx_chip.c	standard
 mips/atheros/ar724x_chip.c	standard
 mips/atheros/ar91xx_chip.c	standard
 mips/atheros/ar933x_chip.c	standard
 mips/atheros/ar934x_chip.c	standard
+mips/atheros/qca953x_chip.c	standard
 mips/atheros/qca955x_chip.c	standard
 mips/atheros/ar71xx_fixup.c	optional ar71xx_ath_eeprom
 mips/atheros/qca955x_apb.c	optional qca955x_apb
 mips/atheros/qca955x_pci.c	optional qca955x_pci pci
Index: projects/powernv/mips/atheros/if_arge.c
===================================================================
--- projects/powernv/mips/atheros/if_arge.c	(revision 290990)
+++ projects/powernv/mips/atheros/if_arge.c	(revision 290991)
@@ -1,2699 +1,2720 @@
 /*-
  * Copyright (c) 2009, Oleksandr Tymoshenko
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * AR71XX gigabit ethernet driver
  */
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #endif
 
 #include "opt_arge.h"
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/taskqueue.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/ethernet.h>
 #include <net/if_types.h>
 
 #include <net/bpf.h>
 
 #include <machine/bus.h>
 #include <machine/cache.h>
 #include <machine/resource.h>
 #include <vm/vm_param.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/pmap.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "opt_arge.h"
 
 #if defined(ARGE_MDIO)
 #include <dev/etherswitch/mdio.h>
 #include <dev/etherswitch/miiproxy.h>
 #include "mdio_if.h"
 #endif
 
 
 MODULE_DEPEND(arge, ether, 1, 1, 1);
 MODULE_DEPEND(arge, miibus, 1, 1, 1);
 MODULE_VERSION(arge, 1);
 
 #include "miibus_if.h"
 
 #include <net/ethernet.h>
 
 #include <mips/atheros/ar71xxreg.h>
 #include <mips/atheros/ar934xreg.h>	/* XXX tsk! */
+#include <mips/atheros/qca953xreg.h>	/* XXX tsk! */
 #include <mips/atheros/qca955xreg.h>	/* XXX tsk! */
 #include <mips/atheros/if_argevar.h>
 #include <mips/atheros/ar71xx_setup.h>
 #include <mips/atheros/ar71xx_cpudef.h>
 #include <mips/atheros/ar71xx_macaddr.h>
 
 typedef enum {
 	ARGE_DBG_MII 	=	0x00000001,
 	ARGE_DBG_INTR	=	0x00000002,
 	ARGE_DBG_TX	=	0x00000004,
 	ARGE_DBG_RX	=	0x00000008,
 	ARGE_DBG_ERR	=	0x00000010,
 	ARGE_DBG_RESET	=	0x00000020,
 	ARGE_DBG_PLL	=	0x00000040,
 } arge_debug_flags;
 
 static const char * arge_miicfg_str[] = {
 	"NONE",
 	"GMII",
 	"MII",
 	"RGMII",
 	"RMII",
 	"SGMII"
 };
 
 #ifdef ARGE_DEBUG
 #define	ARGEDEBUG(_sc, _m, ...) 					\
 	do {								\
 		if ((_m) & (_sc)->arge_debug)				\
 			device_printf((_sc)->arge_dev, __VA_ARGS__);	\
 	} while (0)
 #else
 #define	ARGEDEBUG(_sc, _m, ...)
 #endif
 
 static int arge_attach(device_t);
 static int arge_detach(device_t);
 static void arge_flush_ddr(struct arge_softc *);
 static int arge_ifmedia_upd(struct ifnet *);
 static void arge_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 static int arge_ioctl(struct ifnet *, u_long, caddr_t);
 static void arge_init(void *);
 static void arge_init_locked(struct arge_softc *);
 static void arge_link_task(void *, int);
 static void arge_update_link_locked(struct arge_softc *sc);
 static void arge_set_pll(struct arge_softc *, int, int);
 static int arge_miibus_readreg(device_t, int, int);
 static void arge_miibus_statchg(device_t);
 static int arge_miibus_writereg(device_t, int, int, int);
 static int arge_probe(device_t);
 static void arge_reset_dma(struct arge_softc *);
 static int arge_resume(device_t);
 static int arge_rx_ring_init(struct arge_softc *);
 static void arge_rx_ring_free(struct arge_softc *sc);
 static int arge_tx_ring_init(struct arge_softc *);
 static void arge_tx_ring_free(struct arge_softc *);
 #ifdef DEVICE_POLLING
 static int arge_poll(struct ifnet *, enum poll_cmd, int);
 #endif
 static int arge_shutdown(device_t);
 static void arge_start(struct ifnet *);
 static void arge_start_locked(struct ifnet *);
 static void arge_stop(struct arge_softc *);
 static int arge_suspend(device_t);
 
 static int arge_rx_locked(struct arge_softc *);
 static void arge_tx_locked(struct arge_softc *);
 static void arge_intr(void *);
 static int arge_intr_filter(void *);
 static void arge_tick(void *);
 
 static void arge_hinted_child(device_t bus, const char *dname, int dunit);
 
 /*
  * ifmedia callbacks for multiPHY MAC
  */
 void arge_multiphy_mediastatus(struct ifnet *, struct ifmediareq *);
 int arge_multiphy_mediachange(struct ifnet *);
 
 static void arge_dmamap_cb(void *, bus_dma_segment_t *, int, int);
 static int arge_dma_alloc(struct arge_softc *);
 static void arge_dma_free(struct arge_softc *);
 static int arge_newbuf(struct arge_softc *, int);
 static __inline void arge_fixup_rx(struct mbuf *);
 
 static device_method_t arge_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		arge_probe),
 	DEVMETHOD(device_attach,	arge_attach),
 	DEVMETHOD(device_detach,	arge_detach),
 	DEVMETHOD(device_suspend,	arge_suspend),
 	DEVMETHOD(device_resume,	arge_resume),
 	DEVMETHOD(device_shutdown,	arge_shutdown),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	arge_miibus_readreg),
 	DEVMETHOD(miibus_writereg,	arge_miibus_writereg),
 	DEVMETHOD(miibus_statchg,	arge_miibus_statchg),
 
 	/* bus interface */
 	DEVMETHOD(bus_add_child,	device_add_child_ordered),
 	DEVMETHOD(bus_hinted_child,	arge_hinted_child),
 
 	DEVMETHOD_END
 };
 
 static driver_t arge_driver = {
 	"arge",
 	arge_methods,
 	sizeof(struct arge_softc)
 };
 
 static devclass_t arge_devclass;
 
 DRIVER_MODULE(arge, nexus, arge_driver, arge_devclass, 0, 0);
 DRIVER_MODULE(miibus, arge, miibus_driver, miibus_devclass, 0, 0);
 
 #if defined(ARGE_MDIO)
 static int argemdio_probe(device_t);
 static int argemdio_attach(device_t);
 static int argemdio_detach(device_t);
 
 /*
  * Declare an additional, separate driver for accessing the MDIO bus.
  */
 static device_method_t argemdio_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		argemdio_probe),
 	DEVMETHOD(device_attach,	argemdio_attach),
 	DEVMETHOD(device_detach,	argemdio_detach),
 
 	/* bus interface */
 	DEVMETHOD(bus_add_child,	device_add_child_ordered),
 	
 	/* MDIO access */
 	DEVMETHOD(mdio_readreg,		arge_miibus_readreg),
 	DEVMETHOD(mdio_writereg,	arge_miibus_writereg),
 };
 
 DEFINE_CLASS_0(argemdio, argemdio_driver, argemdio_methods,
     sizeof(struct arge_softc));
 static devclass_t argemdio_devclass;
 
 DRIVER_MODULE(miiproxy, arge, miiproxy_driver, miiproxy_devclass, 0, 0);
 DRIVER_MODULE(argemdio, nexus, argemdio_driver, argemdio_devclass, 0, 0);
 DRIVER_MODULE(mdio, argemdio, mdio_driver, mdio_devclass, 0, 0);
 #endif
 
 static struct mtx miibus_mtx;
 
 MTX_SYSINIT(miibus_mtx, &miibus_mtx, "arge mii lock", MTX_DEF);
 
 /*
  * Flushes all
  *
  * XXX this needs to be done at interrupt time! Grr!
  */
 static void
 arge_flush_ddr(struct arge_softc *sc)
 {
 	switch (sc->arge_mac_unit) {
 	case 0:
 		ar71xx_device_flush_ddr(AR71XX_CPU_DDR_FLUSH_GE0);
 		break;
 	case 1:
 		ar71xx_device_flush_ddr(AR71XX_CPU_DDR_FLUSH_GE1);
 		break;
 	default:
 		device_printf(sc->arge_dev, "%s: unknown unit (%d)\n",
 		    __func__,
 		    sc->arge_mac_unit);
 		break;
 	}
 }
 
 static int
 arge_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Atheros AR71xx built-in ethernet interface");
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 #ifdef	ARGE_DEBUG
 static void
 arge_attach_intr_sysctl(device_t dev, struct sysctl_oid_list *parent)
 {
 	struct arge_softc *sc = device_get_softc(dev);
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	char sn[8];
 	int i;
 
 	tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "intr",
 	    CTLFLAG_RD, NULL, "Interrupt statistics");
 	child = SYSCTL_CHILDREN(tree);
 	for (i = 0; i < 32; i++) {
 		snprintf(sn, sizeof(sn), "%d", i);
 		SYSCTL_ADD_UINT(ctx, child, OID_AUTO, sn, CTLFLAG_RD,
 		    &sc->intr_stats.count[i], 0, "");
 	}
 }
 #endif
 
 static void
 arge_attach_sysctl(device_t dev)
 {
 	struct arge_softc *sc = device_get_softc(dev);
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 
 #ifdef	ARGE_DEBUG
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"debug", CTLFLAG_RW, &sc->arge_debug, 0,
 		"arge interface debugging flags");
 	arge_attach_intr_sysctl(dev, SYSCTL_CHILDREN(tree));
 #endif
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"tx_pkts_aligned", CTLFLAG_RW, &sc->stats.tx_pkts_aligned, 0,
 		"number of TX aligned packets");
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"tx_pkts_unaligned", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned,
 		0, "number of TX unaligned packets");
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"tx_pkts_unaligned_start", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_start,
 		0, "number of TX unaligned packets (start)");
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"tx_pkts_unaligned_len", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_len,
 		0, "number of TX unaligned packets (len)");
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"tx_pkts_nosegs", CTLFLAG_RW, &sc->stats.tx_pkts_nosegs,
 		0, "number of TX packets fail with no ring slots avail");
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"intr_stray_filter", CTLFLAG_RW, &sc->stats.intr_stray,
 		0, "number of stray interrupts (filter)");
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"intr_stray_intr", CTLFLAG_RW, &sc->stats.intr_stray2,
 		0, "number of stray interrupts (intr)");
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"intr_ok", CTLFLAG_RW, &sc->stats.intr_ok,
 		0, "number of OK interrupts");
 #ifdef	ARGE_DEBUG
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_prod",
 	    CTLFLAG_RW, &sc->arge_cdata.arge_tx_prod, 0, "");
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_cons",
 	    CTLFLAG_RW, &sc->arge_cdata.arge_tx_cons, 0, "");
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_cnt",
 	    CTLFLAG_RW, &sc->arge_cdata.arge_tx_cnt, 0, "");
 #endif
 }
 
 static void
 arge_reset_mac(struct arge_softc *sc)
 {
 	uint32_t reg;
 	uint32_t reset_reg;
 
 	ARGEDEBUG(sc, ARGE_DBG_RESET, "%s called\n", __func__);
 
 	/* Step 1. Soft-reset MAC */
 	ARGE_SET_BITS(sc, AR71XX_MAC_CFG1, MAC_CFG1_SOFT_RESET);
 	DELAY(20);
 
 	/* Step 2. Punt the MAC core from the central reset register */
 	/*
 	 * XXX TODO: migrate this (and other) chip specific stuff into
 	 * a chipdef method.
 	 */
 	if (sc->arge_mac_unit == 0) {
 		reset_reg = RST_RESET_GE0_MAC;
 	} else {
 		reset_reg = RST_RESET_GE1_MAC;
 	}
 
 	/*
 	 * AR934x (and later) also needs the MDIO block reset.
 	 * XXX should methodize this!
 	 */
 	if (ar71xx_soc == AR71XX_SOC_AR9341 ||
 	   ar71xx_soc == AR71XX_SOC_AR9342 ||
 	   ar71xx_soc == AR71XX_SOC_AR9344) {
 		if (sc->arge_mac_unit == 0) {
 			reset_reg |= AR934X_RESET_GE0_MDIO;
 		} else {
 			reset_reg |= AR934X_RESET_GE1_MDIO;
 		}
 	}
 
 	if (ar71xx_soc == AR71XX_SOC_QCA9556 ||
 	   ar71xx_soc == AR71XX_SOC_QCA9558) {
 		if (sc->arge_mac_unit == 0) {
 			reset_reg |= QCA955X_RESET_GE0_MDIO;
 		} else {
 			reset_reg |= QCA955X_RESET_GE1_MDIO;
 		}
 	}
+
+	if (ar71xx_soc == AR71XX_SOC_QCA9533 ||
+	   ar71xx_soc == AR71XX_SOC_QCA9533_V2) {
+		if (sc->arge_mac_unit == 0) {
+			reset_reg |= QCA953X_RESET_GE0_MDIO;
+		} else {
+			reset_reg |= QCA953X_RESET_GE1_MDIO;
+		}
+	}
+
 	ar71xx_device_stop(reset_reg);
 	DELAY(100);
 	ar71xx_device_start(reset_reg);
 
 	/* Step 3. Reconfigure MAC block */
 	ARGE_WRITE(sc, AR71XX_MAC_CFG1,
 		MAC_CFG1_SYNC_RX | MAC_CFG1_RX_ENABLE |
 		MAC_CFG1_SYNC_TX | MAC_CFG1_TX_ENABLE);
 
 	reg = ARGE_READ(sc, AR71XX_MAC_CFG2);
 	reg |= MAC_CFG2_ENABLE_PADCRC | MAC_CFG2_LENGTH_FIELD ;
 	ARGE_WRITE(sc, AR71XX_MAC_CFG2, reg);
 
 	ARGE_WRITE(sc, AR71XX_MAC_MAX_FRAME_LEN, 1536);
 }
 
 /*
  * These values map to the divisor values programmed into
  * AR71XX_MAC_MII_CFG.
  *
  * The index of each value corresponds to the divisor section
  * value in AR71XX_MAC_MII_CFG (ie, table[0] means '0' in
  * AR71XX_MAC_MII_CFG, table[1] means '1', etc.)
  */
 static const uint32_t ar71xx_mdio_div_table[] = {
 	4, 4, 6, 8, 10, 14, 20, 28,
 };
 
 static const uint32_t ar7240_mdio_div_table[] = {
 	2, 2, 4, 6, 8, 12, 18, 26, 32, 40, 48, 56, 62, 70, 78, 96,
 };
 
 static const uint32_t ar933x_mdio_div_table[] = {
 	4, 4, 6, 8, 10, 14, 20, 28, 34, 42, 50, 58, 66, 74, 82, 98,
 };
 
 /*
  * Lookup the divisor to use based on the given frequency.
  *
  * Returns the divisor to use, or -ve on error.
  */
 static int
 arge_mdio_get_divider(struct arge_softc *sc, unsigned long mdio_clock)
 {
 	unsigned long ref_clock, t;
 	const uint32_t *table;
 	int ndivs;
 	int i;
 
 	/*
 	 * This is the base MDIO frequency on the SoC.
 	 * The dividers .. well, divide. Duh.
 	 */
 	ref_clock = ar71xx_mdio_freq();
 
 	/*
 	 * If either clock is undefined, just tell the
 	 * caller to fall through to the defaults.
 	 */
 	if (ref_clock == 0 || mdio_clock == 0)
 		return (-EINVAL);
 
 	/*
 	 * Pick the correct table!
 	 */
 	switch (ar71xx_soc) {
 	case AR71XX_SOC_AR9330:
 	case AR71XX_SOC_AR9331:
 	case AR71XX_SOC_AR9341:
 	case AR71XX_SOC_AR9342:
 	case AR71XX_SOC_AR9344:
+	case AR71XX_SOC_QCA9533:
+	case AR71XX_SOC_QCA9533_V2:
 	case AR71XX_SOC_QCA9556:
 	case AR71XX_SOC_QCA9558:
 		table = ar933x_mdio_div_table;
 		ndivs = nitems(ar933x_mdio_div_table);
 		break;
 
 	case AR71XX_SOC_AR7240:
 	case AR71XX_SOC_AR7241:
 	case AR71XX_SOC_AR7242:
 		table = ar7240_mdio_div_table;
 		ndivs = nitems(ar7240_mdio_div_table);
 		break;
 
 	default:
 		table = ar71xx_mdio_div_table;
 		ndivs = nitems(ar71xx_mdio_div_table);
 	}
 
 	/*
 	 * Now, walk through the list and find the first divisor
 	 * that falls under the target MDIO frequency.
 	 *
 	 * The divisors go up, but the corresponding frequencies
 	 * are actually decreasing.
 	 */
 	for (i = 0; i < ndivs; i++) {
 		t = ref_clock / table[i];
 		if (t <= mdio_clock) {
 			return (i);
 		}
 	}
 
 	ARGEDEBUG(sc, ARGE_DBG_RESET,
 	    "No divider found; MDIO=%lu Hz; target=%lu Hz\n",
 		ref_clock, mdio_clock);
 	return (-ENOENT);
 }
 
 /*
  * Fetch the MDIO bus clock rate.
  *
  * For now, the default is DIV_28 for everything
  * bar AR934x, which will be DIV_58.
  *
  * It will definitely need updating to take into account
  * the MDIO bus core clock rate and the target clock
  * rate for the chip.
  */
 static uint32_t
 arge_fetch_mdiobus_clock_rate(struct arge_softc *sc)
 {
 	int mdio_freq, div;
 
 	/*
 	 * Is the MDIO frequency defined? If so, find a divisor that
 	 * makes reasonable sense.  Don't overshoot the frequency.
 	 */
 	if (resource_int_value(device_get_name(sc->arge_dev),
 	    device_get_unit(sc->arge_dev),
 	    "mdio_freq",
 	    &mdio_freq) == 0) {
 		sc->arge_mdiofreq = mdio_freq;
 		div = arge_mdio_get_divider(sc, sc->arge_mdiofreq);
 		if (bootverbose)
 			device_printf(sc->arge_dev,
 			    "%s: mdio ref freq=%llu Hz, target freq=%llu Hz,"
 			    " divisor index=%d\n",
 			    __func__,
 			    (unsigned long long) ar71xx_mdio_freq(),
 			    (unsigned long long) mdio_freq,
 			    div);
 		if (div >= 0)
 			return (div);
 	}
 
 	/*
 	 * Default value(s).
 	 *
 	 * XXX obviously these need .. fixing.
 	 *
 	 * From Linux/OpenWRT:
 	 *
 	 * + 7240? DIV_6
 	 * + Builtin-switch port and not 934x? DIV_10
 	 * + Not built-in switch port and 934x? DIV_58
 	 * + .. else DIV_28.
 	 */
 	switch (ar71xx_soc) {
 	case AR71XX_SOC_AR9341:
 	case AR71XX_SOC_AR9342:
 	case AR71XX_SOC_AR9344:
+	case AR71XX_SOC_QCA9533:
+	case AR71XX_SOC_QCA9533_V2:
 	case AR71XX_SOC_QCA9556:
 	case AR71XX_SOC_QCA9558:
 		return (MAC_MII_CFG_CLOCK_DIV_58);
 		break;
 	default:
 		return (MAC_MII_CFG_CLOCK_DIV_28);
 	}
 }
 
 static void
 arge_reset_miibus(struct arge_softc *sc)
 {
 	uint32_t mdio_div;
 
 	mdio_div = arge_fetch_mdiobus_clock_rate(sc);
 
 	/*
 	 * XXX AR934x and later; should we be also resetting the
 	 * MDIO block(s) using the reset register block?
 	 */
 
 	/* Reset MII bus; program in the default divisor */
 	ARGE_WRITE(sc, AR71XX_MAC_MII_CFG, MAC_MII_CFG_RESET | mdio_div);
 	DELAY(100);
 	ARGE_WRITE(sc, AR71XX_MAC_MII_CFG, mdio_div);
 	DELAY(100);
 }
 
 static void
 arge_fetch_pll_config(struct arge_softc *sc)
 {
 	long int val;
 
 	if (resource_long_value(device_get_name(sc->arge_dev),
 	    device_get_unit(sc->arge_dev),
 	    "pll_10", &val) == 0) {
 		sc->arge_pllcfg.pll_10 = val;
 		device_printf(sc->arge_dev, "%s: pll_10 = 0x%x\n",
 		    __func__, (int) val);
 	}
 	if (resource_long_value(device_get_name(sc->arge_dev),
 	    device_get_unit(sc->arge_dev),
 	    "pll_100", &val) == 0) {
 		sc->arge_pllcfg.pll_100 = val;
 		device_printf(sc->arge_dev, "%s: pll_100 = 0x%x\n",
 		    __func__, (int) val);
 	}
 	if (resource_long_value(device_get_name(sc->arge_dev),
 	    device_get_unit(sc->arge_dev),
 	    "pll_1000", &val) == 0) {
 		sc->arge_pllcfg.pll_1000 = val;
 		device_printf(sc->arge_dev, "%s: pll_1000 = 0x%x\n",
 		    __func__, (int) val);
 	}
 }
 
 static int
 arge_attach(device_t dev)
 {
 	struct ifnet		*ifp;
 	struct arge_softc	*sc;
 	int			error = 0, rid, i;
 	uint32_t		hint;
 	long			eeprom_mac_addr = 0;
 	int			miicfg = 0;
 	int			readascii = 0;
 	int			local_mac = 0;
 	uint8_t			local_macaddr[ETHER_ADDR_LEN];
 	char *			local_macstr;
 	char			devid_str[32];
 	int			count;
 
 	sc = device_get_softc(dev);
 	sc->arge_dev = dev;
 	sc->arge_mac_unit = device_get_unit(dev);
 
 	/*
 	 * See if there's a "board" MAC address hint available for
 	 * this particular device.
 	 *
 	 * This is in the environment - it'd be nice to use the resource_*()
 	 * routines, but at the moment the system is booting, the resource hints
 	 * are set to the 'static' map so they're not pulling from kenv.
 	 */
 	snprintf(devid_str, 32, "hint.%s.%d.macaddr",
 	    device_get_name(dev),
 	    device_get_unit(dev));
 	if ((local_macstr = kern_getenv(devid_str)) != NULL) {
 		uint32_t tmpmac[ETHER_ADDR_LEN];
 
 		/* Have a MAC address; should use it */
 		device_printf(dev, "Overriding MAC address from environment: '%s'\n",
 		    local_macstr);
 
 		/* Extract out the MAC address */
 		/* XXX this should all be a generic method */
 		count = sscanf(local_macstr, "%x%*c%x%*c%x%*c%x%*c%x%*c%x",
 		    &tmpmac[0], &tmpmac[1],
 		    &tmpmac[2], &tmpmac[3],
 		    &tmpmac[4], &tmpmac[5]);
 		if (count == 6) {
 			/* Valid! */
 			local_mac = 1;
 			for (i = 0; i < ETHER_ADDR_LEN; i++)
 				local_macaddr[i] = tmpmac[i];
 		}
 		/* Done! */
 		freeenv(local_macstr);
 		local_macstr = NULL;
 	}
 
 	/*
 	 * Hardware workarounds.
 	 */
 	switch (ar71xx_soc) {
 	case AR71XX_SOC_AR9330:
 	case AR71XX_SOC_AR9331:
 	case AR71XX_SOC_AR9341:
 	case AR71XX_SOC_AR9342:
 	case AR71XX_SOC_AR9344:
+	case AR71XX_SOC_QCA9533:
+	case AR71XX_SOC_QCA9533_V2:
 	case AR71XX_SOC_QCA9556:
 	case AR71XX_SOC_QCA9558:
 		/* Arbitrary alignment */
 		sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE;
 		sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE;
 		break;
 	default:
 		sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE;
 		sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE;
 		break;
 	}
 
 	/*
 	 * Some units (eg the TP-Link WR-1043ND) do not have a convenient
 	 * EEPROM location to read the ethernet MAC address from.
 	 * OpenWRT simply snaffles it from a fixed location.
 	 *
 	 * Since multiple units seem to use this feature, include
 	 * a method of setting the MAC address based on an flash location
 	 * in CPU address space.
 	 *
 	 * Some vendors have decided to store the mac address as a literal
 	 * string of 18 characters in xx:xx:xx:xx:xx:xx format instead of
 	 * an array of numbers.  Expose a hint to turn on this conversion
 	 * feature via strtol()
 	 */
 	 if (local_mac == 0 && resource_long_value(device_get_name(dev),
 	     device_get_unit(dev), "eeprommac", &eeprom_mac_addr) == 0) {
 		local_mac = 1;
 		int i;
 		const char *mac =
 		    (const char *) MIPS_PHYS_TO_KSEG1(eeprom_mac_addr);
 		device_printf(dev, "Overriding MAC from EEPROM\n");
 		if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 			"readascii", &readascii) == 0) {
 			device_printf(dev, "Vendor stores MAC in ASCII format\n");
 			for (i = 0; i < 6; i++) {
 				local_macaddr[i] = strtol(&(mac[i*3]), NULL, 16);
 			}
 		} else {
 			for (i = 0; i < 6; i++) {
 				local_macaddr[i] = mac[i];
 			}
 		}
 	}
 
 	KASSERT(((sc->arge_mac_unit == 0) || (sc->arge_mac_unit == 1)),
 	    ("if_arge: Only MAC0 and MAC1 supported"));
 
 	/*
 	 * Fetch the PLL configuration.
 	 */
 	arge_fetch_pll_config(sc);
 
 	/*
 	 * Get the MII configuration, if applicable.
 	 */
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "miimode", &miicfg) == 0) {
 		/* XXX bounds check? */
 		device_printf(dev, "%s: overriding MII mode to '%s'\n",
 		    __func__, arge_miicfg_str[miicfg]);
 		sc->arge_miicfg = miicfg;
 	}
 
 	/*
 	 *  Get which PHY of 5 available we should use for this unit
 	 */
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev), 
 	    "phymask", &sc->arge_phymask) != 0) {
 		/*
 		 * Use port 4 (WAN) for GE0. For any other port use
 		 * its PHY the same as its unit number
 		 */
 		if (sc->arge_mac_unit == 0)
 			sc->arge_phymask = (1 << 4);
 		else
 			/* Use all phys up to 4 */
 			sc->arge_phymask = (1 << 4) - 1;
 
 		device_printf(dev, "No PHY specified, using mask %d\n", sc->arge_phymask);
 	}
 
 	/*
 	 * Get default/hard-coded media & duplex mode.
 	 */
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "media", &hint) != 0)
 		hint = 0;
 
 	if (hint == 1000)
 		sc->arge_media_type = IFM_1000_T;
 	else if (hint == 100)
 		sc->arge_media_type = IFM_100_TX;
 	else if (hint == 10)
 		sc->arge_media_type = IFM_10_T;
 	else
 		sc->arge_media_type = 0;
 
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "fduplex", &hint) != 0)
 		hint = 1;
 
 	if (hint)
 		sc->arge_duplex_mode = IFM_FDX;
 	else
 		sc->arge_duplex_mode = 0;
 
 	mtx_init(&sc->arge_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	callout_init_mtx(&sc->arge_stat_callout, &sc->arge_mtx, 0);
 	TASK_INIT(&sc->arge_link_task, 0, arge_link_task, sc);
 
 	/* Map control/status registers. */
 	sc->arge_rid = 0;
 	sc->arge_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 
 	    &sc->arge_rid, RF_ACTIVE | RF_SHAREABLE);
 
 	if (sc->arge_res == NULL) {
 		device_printf(dev, "couldn't map memory\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/* Allocate interrupts */
 	rid = 0;
 	sc->arge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	    RF_SHAREABLE | RF_ACTIVE);
 
 	if (sc->arge_irq == NULL) {
 		device_printf(dev, "couldn't map interrupt\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/* Allocate ifnet structure. */
 	ifp = sc->arge_ifp = if_alloc(IFT_ETHER);
 
 	if (ifp == NULL) {
 		device_printf(dev, "couldn't allocate ifnet structure\n");
 		error = ENOSPC;
 		goto fail;
 	}
 
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = arge_ioctl;
 	ifp->if_start = arge_start;
 	ifp->if_init = arge_init;
 	sc->arge_if_flags = ifp->if_flags;
 
 	/* XXX: add real size */
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/* Tell the upper layer(s) we support long frames. */
 	ifp->if_capabilities |= IFCAP_VLAN_MTU;
 
 	ifp->if_capenable = ifp->if_capabilities;
 #ifdef DEVICE_POLLING
 	ifp->if_capabilities |= IFCAP_POLLING;
 #endif
 
 	/* If there's a local mac defined, copy that in */
 	if (local_mac == 1) {
 		(void) ar71xx_mac_addr_init(sc->arge_eaddr,
 		    local_macaddr, 0, 0);
 	} else {
 		/*
 		 * No MAC address configured. Generate the random one.
 		 */
 		if  (bootverbose)
 			device_printf(dev,
 			    "Generating random ethernet address.\n");
 		(void) ar71xx_mac_addr_random_init(sc->arge_eaddr);
 	}
 
 	if (arge_dma_alloc(sc) != 0) {
 		error = ENXIO;
 		goto fail;
 	}
 
 	/*
 	 * Don't do this for the MDIO bus case - it's already done
 	 * as part of the MDIO bus attachment.
 	 */
 #if !defined(ARGE_MDIO)
 	/* Initialize the MAC block */
 	arge_reset_mac(sc);
 	arge_reset_miibus(sc);
 #endif
 
 	/* Configure MII mode, just for convienence */
 	if (sc->arge_miicfg != 0)
 		ar71xx_device_set_mii_if(sc->arge_mac_unit, sc->arge_miicfg);
 
 	/*
 	 * Set all Ethernet address registers to the same initial values
 	 * set all four addresses to 66-88-aa-cc-dd-ee
 	 */
 	ARGE_WRITE(sc, AR71XX_MAC_STA_ADDR1, (sc->arge_eaddr[2] << 24)
 	    | (sc->arge_eaddr[3] << 16) | (sc->arge_eaddr[4] << 8)
 	    | sc->arge_eaddr[5]);
 	ARGE_WRITE(sc, AR71XX_MAC_STA_ADDR2, (sc->arge_eaddr[0] << 8)
 	    | sc->arge_eaddr[1]);
 
 	ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG0,
 	    FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT);
 
 	/*
 	 * SoC specific bits.
 	 */
 	switch (ar71xx_soc) {
 		case AR71XX_SOC_AR7240:
 		case AR71XX_SOC_AR7241:
 		case AR71XX_SOC_AR7242:
 		case AR71XX_SOC_AR9330:
 		case AR71XX_SOC_AR9331:
 		case AR71XX_SOC_AR9341:
 		case AR71XX_SOC_AR9342:
 		case AR71XX_SOC_AR9344:
+		case AR71XX_SOC_QCA9533:
+		case AR71XX_SOC_QCA9533_V2:
 		case AR71XX_SOC_QCA9556:
 		case AR71XX_SOC_QCA9558:
 			ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG1, 0x0010ffff);
 			ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG2, 0x015500aa);
 			break;
 		/* AR71xx, AR913x */
 		default:
 			ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG1, 0x0fff0000);
 			ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG2, 0x00001fff);
 	}
 
 	ARGE_WRITE(sc, AR71XX_MAC_FIFO_RX_FILTMATCH,
 	    FIFO_RX_FILTMATCH_DEFAULT);
 
 	ARGE_WRITE(sc, AR71XX_MAC_FIFO_RX_FILTMASK,
 	    FIFO_RX_FILTMASK_DEFAULT);
 
 #if defined(ARGE_MDIO)
 	sc->arge_miiproxy = mii_attach_proxy(sc->arge_dev);
 #endif
 
 	device_printf(sc->arge_dev, "finishing attachment, phymask %04x"
 	    ", proxy %s \n", sc->arge_phymask, sc->arge_miiproxy == NULL ?
 	    "null" : "set");
 	for (i = 0; i < ARGE_NPHY; i++) {
 		if (((1 << i) & sc->arge_phymask) != 0) {
 			error = mii_attach(sc->arge_miiproxy != NULL ?
 			    sc->arge_miiproxy : sc->arge_dev,
 			    &sc->arge_miibus, sc->arge_ifp,
 			    arge_ifmedia_upd, arge_ifmedia_sts,
 			    BMSR_DEFCAPMASK, i, MII_OFFSET_ANY, 0);
 			if (error != 0) {
 				device_printf(sc->arge_dev, "unable to attach"
 				    " PHY %d: %d\n", i, error);
 				goto fail;
 			}
 		}
 	}
 
 	if (sc->arge_miibus == NULL) {
 		/* no PHY, so use hard-coded values */
 		ifmedia_init(&sc->arge_ifmedia, 0,
 		    arge_multiphy_mediachange,
 		    arge_multiphy_mediastatus);
 		ifmedia_add(&sc->arge_ifmedia,
 		    IFM_ETHER | sc->arge_media_type  | sc->arge_duplex_mode,
 		    0, NULL);
 		ifmedia_set(&sc->arge_ifmedia,
 		    IFM_ETHER | sc->arge_media_type  | sc->arge_duplex_mode);
 		arge_set_pll(sc, sc->arge_media_type, sc->arge_duplex_mode);
 	}
 
 	/* Call MI attach routine. */
 	ether_ifattach(sc->arge_ifp, sc->arge_eaddr);
 
 	/* Hook interrupt last to avoid having to lock softc */
 	error = bus_setup_intr(sc->arge_dev, sc->arge_irq, INTR_TYPE_NET | INTR_MPSAFE,
 	    arge_intr_filter, arge_intr, sc, &sc->arge_intrhand);
 
 	if (error) {
 		device_printf(sc->arge_dev, "couldn't set up irq\n");
 		ether_ifdetach(sc->arge_ifp);
 		goto fail;
 	}
 
 	/* setup sysctl variables */
 	arge_attach_sysctl(sc->arge_dev);
 
 fail:
 	if (error) 
 		arge_detach(dev);
 
 	return (error);
 }
 
 static int
 arge_detach(device_t dev)
 {
 	struct arge_softc	*sc = device_get_softc(dev);
 	struct ifnet		*ifp = sc->arge_ifp;
 
 	KASSERT(mtx_initialized(&sc->arge_mtx),
 	    ("arge mutex not initialized"));
 
 	/* These should only be active if attach succeeded */
 	if (device_is_attached(dev)) {
 		ARGE_LOCK(sc);
 		sc->arge_detach = 1;
 #ifdef DEVICE_POLLING
 		if (ifp->if_capenable & IFCAP_POLLING)
 			ether_poll_deregister(ifp);
 #endif
 
 		arge_stop(sc);
 		ARGE_UNLOCK(sc);
 		taskqueue_drain(taskqueue_swi, &sc->arge_link_task);
 		ether_ifdetach(ifp);
 	}
 
 	if (sc->arge_miibus)
 		device_delete_child(dev, sc->arge_miibus);
 
 	if (sc->arge_miiproxy)
 		device_delete_child(dev, sc->arge_miiproxy);
 
 	bus_generic_detach(dev);
 
 	if (sc->arge_intrhand)
 		bus_teardown_intr(dev, sc->arge_irq, sc->arge_intrhand);
 
 	if (sc->arge_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->arge_rid,
 		    sc->arge_res);
 
 	if (ifp)
 		if_free(ifp);
 
 	arge_dma_free(sc);
 
 	mtx_destroy(&sc->arge_mtx);
 
 	return (0);
 
 }
 
 static int
 arge_suspend(device_t dev)
 {
 
 	panic("%s", __func__);
 	return 0;
 }
 
 static int
 arge_resume(device_t dev)
 {
 
 	panic("%s", __func__);
 	return 0;
 }
 
 static int
 arge_shutdown(device_t dev)
 {
 	struct arge_softc	*sc;
 
 	sc = device_get_softc(dev);
 
 	ARGE_LOCK(sc);
 	arge_stop(sc);
 	ARGE_UNLOCK(sc);
 
 	return (0);
 }
 
 static void
 arge_hinted_child(device_t bus, const char *dname, int dunit)
 {
 	BUS_ADD_CHILD(bus, 0, dname, dunit);
 	device_printf(bus, "hinted child %s%d\n", dname, dunit);
 }
 
 static int
 arge_mdio_busy(struct arge_softc *sc)
 {
 	int i,result;
 
 	for (i = 0; i < ARGE_MII_TIMEOUT; i++) {
 		DELAY(5);
 		ARGE_MDIO_BARRIER_READ(sc);
 		result = ARGE_MDIO_READ(sc, AR71XX_MAC_MII_INDICATOR);
 		if (! result)
 			return (0);
 		DELAY(5);
 	}
 	return (-1);
 }
 
 static int
 arge_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct arge_softc * sc = device_get_softc(dev);
 	int result;
 	uint32_t addr = (phy << MAC_MII_PHY_ADDR_SHIFT)
 	    | (reg & MAC_MII_REG_MASK);
 
 	mtx_lock(&miibus_mtx);
 	ARGE_MDIO_BARRIER_RW(sc);
 	ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CMD, MAC_MII_CMD_WRITE);
 	ARGE_MDIO_BARRIER_WRITE(sc);
 	ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_ADDR, addr);
 	ARGE_MDIO_BARRIER_WRITE(sc);
 	ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CMD, MAC_MII_CMD_READ);
 
 	if (arge_mdio_busy(sc) != 0) {
 		mtx_unlock(&miibus_mtx);
 		ARGEDEBUG(sc, ARGE_DBG_MII, "%s timedout\n", __func__);
 		/* XXX: return ERRNO istead? */
 		return (-1);
 	}
 
 	ARGE_MDIO_BARRIER_READ(sc);
 	result = ARGE_MDIO_READ(sc, AR71XX_MAC_MII_STATUS) & MAC_MII_STATUS_MASK;
 	ARGE_MDIO_BARRIER_RW(sc);
 	ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CMD, MAC_MII_CMD_WRITE);
 	mtx_unlock(&miibus_mtx);
 
 	ARGEDEBUG(sc, ARGE_DBG_MII,
 	    "%s: phy=%d, reg=%02x, value[%08x]=%04x\n",
 	    __func__, phy, reg, addr, result);
 
 	return (result);
 }
 
 static int
 arge_miibus_writereg(device_t dev, int phy, int reg, int data)
 {
 	struct arge_softc * sc = device_get_softc(dev);
 	uint32_t addr =
 	    (phy << MAC_MII_PHY_ADDR_SHIFT) | (reg & MAC_MII_REG_MASK);
 
 	ARGEDEBUG(sc, ARGE_DBG_MII, "%s: phy=%d, reg=%02x, value=%04x\n", __func__, 
 	    phy, reg, data);
 
 	mtx_lock(&miibus_mtx);
 	ARGE_MDIO_BARRIER_RW(sc);
 	ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_ADDR, addr);
 	ARGE_MDIO_BARRIER_WRITE(sc);
 	ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CONTROL, data);
 	ARGE_MDIO_BARRIER_WRITE(sc);
 
 	if (arge_mdio_busy(sc) != 0) {
 		mtx_unlock(&miibus_mtx);
 		ARGEDEBUG(sc, ARGE_DBG_MII, "%s timedout\n", __func__);
 		/* XXX: return ERRNO istead? */
 		return (-1);
 	}
 
 	mtx_unlock(&miibus_mtx);
 	return (0);
 }
 
 static void
 arge_miibus_statchg(device_t dev)
 {
 	struct arge_softc	*sc;
 
 	sc = device_get_softc(dev);
 	taskqueue_enqueue(taskqueue_swi, &sc->arge_link_task);
 }
 
 static void
 arge_link_task(void *arg, int pending)
 {
 	struct arge_softc	*sc;
 	sc = (struct arge_softc *)arg;
 
 	ARGE_LOCK(sc);
 	arge_update_link_locked(sc);
 	ARGE_UNLOCK(sc);
 }
 
 static void
 arge_update_link_locked(struct arge_softc *sc)
 {
 	struct mii_data		*mii;
 	struct ifnet		*ifp;
 	uint32_t		media, duplex;
 
 	mii = device_get_softc(sc->arge_miibus);
 	ifp = sc->arge_ifp;
 	if (mii == NULL || ifp == NULL ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		return;
 	}
 
 	/*
 	 * If we have a static media type configured, then
 	 * use that.  Some PHY configurations (eg QCA955x -> AR8327)
 	 * use a static speed/duplex between the SoC and switch,
 	 * even though the front-facing PHY speed changes.
 	 */
 	if (sc->arge_media_type != 0) {
 		ARGEDEBUG(sc, ARGE_DBG_MII, "%s: fixed; media=%d, duplex=%d\n",
 		    __func__,
 		    sc->arge_media_type,
 		    sc->arge_duplex_mode);
 		if (mii->mii_media_status & IFM_ACTIVE) {
 			sc->arge_link_status = 1;
 		} else {
 			sc->arge_link_status = 0;
 		}
 		arge_set_pll(sc, sc->arge_media_type, sc->arge_duplex_mode);
 	}
 
 	if (mii->mii_media_status & IFM_ACTIVE) {
 
 		media = IFM_SUBTYPE(mii->mii_media_active);
 		if (media != IFM_NONE) {
 			sc->arge_link_status = 1;
 			duplex = mii->mii_media_active & IFM_GMASK;
 			ARGEDEBUG(sc, ARGE_DBG_MII, "%s: media=%d, duplex=%d\n",
 			    __func__,
 			    media,
 			    duplex);
 			arge_set_pll(sc, media, duplex);
 		}
 	} else {
 		sc->arge_link_status = 0;
 	}
 }
 
 static void
 arge_set_pll(struct arge_softc *sc, int media, int duplex)
 {
 	uint32_t		cfg, ifcontrol, rx_filtmask;
 	uint32_t		fifo_tx, pll;
 	int if_speed;
 
 	/*
 	 * XXX Verify - is this valid for all chips?
 	 * QCA955x (and likely some of the earlier chips!) define
 	 * this as nibble mode and byte mode, and those have to do
 	 * with the interface type (MII/SMII versus GMII/RGMII.)
 	 */
 	ARGEDEBUG(sc, ARGE_DBG_PLL, "set_pll(%04x, %s)\n", media,
 	    duplex == IFM_FDX ? "full" : "half");
 	cfg = ARGE_READ(sc, AR71XX_MAC_CFG2);
 	cfg &= ~(MAC_CFG2_IFACE_MODE_1000
 	    | MAC_CFG2_IFACE_MODE_10_100
 	    | MAC_CFG2_FULL_DUPLEX);
 
 	if (duplex == IFM_FDX)
 		cfg |= MAC_CFG2_FULL_DUPLEX;
 
 	ifcontrol = ARGE_READ(sc, AR71XX_MAC_IFCONTROL);
 	ifcontrol &= ~MAC_IFCONTROL_SPEED;
 	rx_filtmask =
 	    ARGE_READ(sc, AR71XX_MAC_FIFO_RX_FILTMASK);
 	rx_filtmask &= ~FIFO_RX_MASK_BYTE_MODE;
 
 	switch(media) {
 	case IFM_10_T:
 		cfg |= MAC_CFG2_IFACE_MODE_10_100;
 		if_speed = 10;
 		break;
 	case IFM_100_TX:
 		cfg |= MAC_CFG2_IFACE_MODE_10_100;
 		ifcontrol |= MAC_IFCONTROL_SPEED;
 		if_speed = 100;
 		break;
 	case IFM_1000_T:
 	case IFM_1000_SX:
 		cfg |= MAC_CFG2_IFACE_MODE_1000;
 		rx_filtmask |= FIFO_RX_MASK_BYTE_MODE;
 		if_speed = 1000;
 		break;
 	default:
 		if_speed = 100;
 		device_printf(sc->arge_dev,
 		    "Unknown media %d\n", media);
 	}
 
 	ARGEDEBUG(sc, ARGE_DBG_PLL, "%s: if_speed=%d\n", __func__, if_speed);
 
 	switch (ar71xx_soc) {
 		case AR71XX_SOC_AR7240:
 		case AR71XX_SOC_AR7241:
 		case AR71XX_SOC_AR7242:
 		case AR71XX_SOC_AR9330:
 		case AR71XX_SOC_AR9331:
 		case AR71XX_SOC_AR9341:
 		case AR71XX_SOC_AR9342:
 		case AR71XX_SOC_AR9344:
+		case AR71XX_SOC_QCA9533:
+		case AR71XX_SOC_QCA9533_V2:
 		case AR71XX_SOC_QCA9556:
 		case AR71XX_SOC_QCA9558:
 			fifo_tx = 0x01f00140;
 			break;
 		case AR71XX_SOC_AR9130:
 		case AR71XX_SOC_AR9132:
 			fifo_tx = 0x00780fff;
 			break;
 		/* AR71xx */
 		default:
 			fifo_tx = 0x008001ff;
 	}
 
 	ARGE_WRITE(sc, AR71XX_MAC_CFG2, cfg);
 	ARGE_WRITE(sc, AR71XX_MAC_IFCONTROL, ifcontrol);
 	ARGE_WRITE(sc, AR71XX_MAC_FIFO_RX_FILTMASK,
 	    rx_filtmask);
 	ARGE_WRITE(sc, AR71XX_MAC_FIFO_TX_THRESHOLD, fifo_tx);
 
 	/* fetch PLL registers */
 	pll = ar71xx_device_get_eth_pll(sc->arge_mac_unit, if_speed);
 	ARGEDEBUG(sc, ARGE_DBG_PLL, "%s: pll=0x%x\n", __func__, pll);
 
 	/* Override if required by platform data */
 	if (if_speed == 10 && sc->arge_pllcfg.pll_10 != 0)
 		pll = sc->arge_pllcfg.pll_10;
 	else if (if_speed == 100 && sc->arge_pllcfg.pll_100 != 0)
 		pll = sc->arge_pllcfg.pll_100;
 	else if (if_speed == 1000 && sc->arge_pllcfg.pll_1000 != 0)
 		pll = sc->arge_pllcfg.pll_1000;
 	ARGEDEBUG(sc, ARGE_DBG_PLL, "%s: final pll=0x%x\n", __func__, pll);
 
 	/* XXX ensure pll != 0 */
 	ar71xx_device_set_pll_ge(sc->arge_mac_unit, if_speed, pll);
 
 	/* set MII registers */
 	/*
 	 * This was introduced to match what the Linux ag71xx ethernet
 	 * driver does.  For the AR71xx case, it does set the port
 	 * MII speed.  However, if this is done, non-gigabit speeds
 	 * are not at all reliable when speaking via RGMII through
 	 * 'bridge' PHY port that's pretending to be a local PHY.
 	 *
 	 * Until that gets root caused, and until an AR71xx + normal
 	 * PHY board is tested, leave this disabled.
 	 */
 #if 0
 	ar71xx_device_set_mii_speed(sc->arge_mac_unit, if_speed);
 #endif
 }
 
 
 static void
 arge_reset_dma(struct arge_softc *sc)
 {
 
 	ARGEDEBUG(sc, ARGE_DBG_RESET, "%s: called\n", __func__);
 
 	ARGE_WRITE(sc, AR71XX_DMA_RX_CONTROL, 0);
 	ARGE_WRITE(sc, AR71XX_DMA_TX_CONTROL, 0);
 
 	ARGE_WRITE(sc, AR71XX_DMA_RX_DESC, 0);
 	ARGE_WRITE(sc, AR71XX_DMA_TX_DESC, 0);
 
 	/* Clear all possible RX interrupts */
 	while(ARGE_READ(sc, AR71XX_DMA_RX_STATUS) & DMA_RX_STATUS_PKT_RECVD)
 		ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_PKT_RECVD);
 
 	/*
 	 * Clear all possible TX interrupts
 	 */
 	while(ARGE_READ(sc, AR71XX_DMA_TX_STATUS) & DMA_TX_STATUS_PKT_SENT)
 		ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_PKT_SENT);
 
 	/*
 	 * Now Rx/Tx errors
 	 */
 	ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS,
 	    DMA_RX_STATUS_BUS_ERROR | DMA_RX_STATUS_OVERFLOW);
 	ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS,
 	    DMA_TX_STATUS_BUS_ERROR | DMA_TX_STATUS_UNDERRUN);
 
 	/*
 	 * Force a DDR flush so any pending data is properly
 	 * flushed to RAM before underlying buffers are freed.
 	 */
 	arge_flush_ddr(sc);
 }
 
 static void
 arge_init(void *xsc)
 {
 	struct arge_softc	 *sc = xsc;
 
 	ARGE_LOCK(sc);
 	arge_init_locked(sc);
 	ARGE_UNLOCK(sc);
 }
 
 static void
 arge_init_locked(struct arge_softc *sc)
 {
 	struct ifnet		*ifp = sc->arge_ifp;
 	struct mii_data		*mii;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return;
 
 	/* Init circular RX list. */
 	if (arge_rx_ring_init(sc) != 0) {
 		device_printf(sc->arge_dev,
 		    "initialization failed: no memory for rx buffers\n");
 		arge_stop(sc);
 		return;
 	}
 
 	/* Init tx descriptors. */
 	arge_tx_ring_init(sc);
 
 	arge_reset_dma(sc);
 
 	if (sc->arge_miibus) {
 		mii = device_get_softc(sc->arge_miibus);
 		mii_mediachg(mii);
 	}
 	else {
 		/*
 		 * Sun always shines over multiPHY interface
 		 */
 		sc->arge_link_status = 1;
 	}
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	if (sc->arge_miibus) {
 		callout_reset(&sc->arge_stat_callout, hz, arge_tick, sc);
 		arge_update_link_locked(sc);
 	}
 
 	ARGE_WRITE(sc, AR71XX_DMA_TX_DESC, ARGE_TX_RING_ADDR(sc, 0));
 	ARGE_WRITE(sc, AR71XX_DMA_RX_DESC, ARGE_RX_RING_ADDR(sc, 0));
 
 	/* Start listening */
 	ARGE_WRITE(sc, AR71XX_DMA_RX_CONTROL, DMA_RX_CONTROL_EN);
 
 	/* Enable interrupts */
 	ARGE_WRITE(sc, AR71XX_DMA_INTR, DMA_INTR_ALL);
 }
 
 /*
  * Return whether the mbuf chain is correctly aligned
  * for the arge TX engine.
  *
  * All the MACs have a length requirement: any non-final
  * fragment (ie, descriptor with MORE bit set) needs to have
  * a length divisible by 4.
  *
  * The AR71xx, AR913x require the start address also be
  * DWORD aligned.  The later MACs don't.
  */
 static int
 arge_mbuf_chain_is_tx_aligned(struct arge_softc *sc, struct mbuf *m0)
 {
 	struct mbuf *m;
 
 	for (m = m0; m != NULL; m = m->m_next) {
 		/*
 		 * Only do this for chips that require it.
 		 */
 		if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
 		    (mtod(m, intptr_t) & 3) != 0) {
 			sc->stats.tx_pkts_unaligned_start++;
 			return 0;
 		}
 
 		/*
 		 * All chips have this requirement for length.
 		 */
 		if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0)) {
 			sc->stats.tx_pkts_unaligned_len++;
 			return 0;
 		}
 	}
 	return 1;
 }
 
 /*
  * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data
  * pointers to the fragment pointers.
  */
 static int
 arge_encap(struct arge_softc *sc, struct mbuf **m_head)
 {
 	struct arge_txdesc	*txd;
 	struct arge_desc	*desc, *prev_desc;
 	bus_dma_segment_t	txsegs[ARGE_MAXFRAGS];
 	int			error, i, nsegs, prod, prev_prod;
 	struct mbuf		*m;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	/*
 	 * Fix mbuf chain based on hardware alignment constraints.
 	 */
 	m = *m_head;
 	if (! arge_mbuf_chain_is_tx_aligned(sc, m)) {
 		sc->stats.tx_pkts_unaligned++;
 		m = m_defrag(*m_head, M_NOWAIT);
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		*m_head = m;
 	} else
 		sc->stats.tx_pkts_aligned++;
 
 	prod = sc->arge_cdata.arge_tx_prod;
 	txd = &sc->arge_cdata.arge_txdesc[prod];
 	error = bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_tx_tag,
 	    txd->tx_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT);
 
 	if (error == EFBIG) {
 		panic("EFBIG");
 	} else if (error != 0)
 		return (error);
 
 	if (nsegs == 0) {
 		m_freem(*m_head);
 		*m_head = NULL;
 		return (EIO);
 	}
 
 	/* Check number of available descriptors. */
 	if (sc->arge_cdata.arge_tx_cnt + nsegs >= (ARGE_TX_RING_COUNT - 2)) {
 		bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap);
 		sc->stats.tx_pkts_nosegs++;
 		return (ENOBUFS);
 	}
 
 	txd->tx_m = *m_head;
 	bus_dmamap_sync(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 
 	/*
 	 * Make a list of descriptors for this packet. DMA controller will
 	 * walk through it while arge_link is not zero.
 	 *
 	 * Since we're in a endless circular buffer, ensure that
 	 * the first descriptor in a multi-descriptor ring is always
 	 * set to EMPTY, then un-do it when we're done populating.
 	 */
 	prev_prod = prod;
 	desc = prev_desc = NULL;
 	for (i = 0; i < nsegs; i++) {
 		uint32_t tmp;
 
 		desc = &sc->arge_rdata.arge_tx_ring[prod];
 
 		/*
 		 * Set DESC_EMPTY so the hardware (hopefully) stops at this
 		 * point.  We don't want it to start transmitting descriptors
 		 * before we've finished fleshing this out.
 		 */
 		tmp = ARGE_DMASIZE(txsegs[i].ds_len);
 		if (i == 0)
 			tmp |= ARGE_DESC_EMPTY;
 		desc->packet_ctrl = tmp;
 
 		/* XXX Note: only relevant for older MACs; but check length! */
 		if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
 		    (txsegs[i].ds_addr & 3))
 			panic("TX packet address unaligned\n");
 
 		desc->packet_addr = txsegs[i].ds_addr;
 
 		/* link with previous descriptor */
 		if (prev_desc)
 			prev_desc->packet_ctrl |= ARGE_DESC_MORE;
 
 		sc->arge_cdata.arge_tx_cnt++;
 		prev_desc = desc;
 		ARGE_INC(prod, ARGE_TX_RING_COUNT);
 	}
 
 	/* Update producer index. */
 	sc->arge_cdata.arge_tx_prod = prod;
 
 	/*
 	 * The descriptors are updated, so enable the first one.
 	 */
 	desc = &sc->arge_rdata.arge_tx_ring[prev_prod];
 	desc->packet_ctrl &= ~ ARGE_DESC_EMPTY;
 
 	/* Sync descriptors. */
 	bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag,
 	    sc->arge_cdata.arge_tx_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/* Flush writes */
 	ARGE_BARRIER_WRITE(sc);
 
 	/* Start transmitting */
 	ARGEDEBUG(sc, ARGE_DBG_TX, "%s: setting DMA_TX_CONTROL_EN\n",
 	    __func__);
 	ARGE_WRITE(sc, AR71XX_DMA_TX_CONTROL, DMA_TX_CONTROL_EN);
 	return (0);
 }
 
 static void
 arge_start(struct ifnet *ifp)
 {
 	struct arge_softc	 *sc;
 
 	sc = ifp->if_softc;
 
 	ARGE_LOCK(sc);
 	arge_start_locked(ifp);
 	ARGE_UNLOCK(sc);
 }
 
 static void
 arge_start_locked(struct ifnet *ifp)
 {
 	struct arge_softc	*sc;
 	struct mbuf		*m_head;
 	int			enq = 0;
 
 	sc = ifp->if_softc;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	ARGEDEBUG(sc, ARGE_DBG_TX, "%s: beginning\n", __func__);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || sc->arge_link_status == 0 )
 		return;
 
 	/*
 	 * Before we go any further, check whether we're already full.
 	 * The below check errors out immediately if the ring is full
 	 * and never gets a chance to set this flag. Although it's
 	 * likely never needed, this at least avoids an unexpected
 	 * situation.
 	 */
 	if (sc->arge_cdata.arge_tx_cnt >= ARGE_TX_RING_COUNT - 2) {
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 		ARGEDEBUG(sc, ARGE_DBG_ERR,
 		    "%s: tx_cnt %d >= max %d; setting IFF_DRV_OACTIVE\n",
 		    __func__, sc->arge_cdata.arge_tx_cnt,
 		    ARGE_TX_RING_COUNT - 2);
 		return;
 	}
 
 	arge_flush_ddr(sc);
 
 	for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
 	    sc->arge_cdata.arge_tx_cnt < ARGE_TX_RING_COUNT - 2; ) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 
 		/*
 		 * Pack the data into the transmit ring.
 		 */
 		if (arge_encap(sc, &m_head)) {
 			if (m_head == NULL)
 				break;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 
 		enq++;
 		/*
 		 * If there's a BPF listener, bounce a copy of this frame
 		 * to him.
 		 */
 		ETHER_BPF_MTAP(ifp, m_head);
 	}
 	ARGEDEBUG(sc, ARGE_DBG_TX, "%s: finished; queued %d packets\n",
 	    __func__, enq);
 }
 
 static void
 arge_stop(struct arge_softc *sc)
 {
 	struct ifnet	    *ifp;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	ifp = sc->arge_ifp;
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	if (sc->arge_miibus)
 		callout_stop(&sc->arge_stat_callout);
 
 	/* mask out interrupts */
 	ARGE_WRITE(sc, AR71XX_DMA_INTR, 0);
 
 	arge_reset_dma(sc);
 
 	/* Flush FIFO and free any existing mbufs */
 	arge_flush_ddr(sc);
 	arge_rx_ring_free(sc);
 	arge_tx_ring_free(sc);
 }
 
 
 static int
 arge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct arge_softc		*sc = ifp->if_softc;
 	struct ifreq		*ifr = (struct ifreq *) data;
 	struct mii_data		*mii;
 	int			error;
 #ifdef DEVICE_POLLING
 	int			mask;
 #endif
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		ARGE_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) != 0) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 				if (((ifp->if_flags ^ sc->arge_if_flags)
 				    & (IFF_PROMISC | IFF_ALLMULTI)) != 0) {
 					/* XXX: handle promisc & multi flags */
 				}
 
 			} else {
 				if (!sc->arge_detach)
 					arge_init_locked(sc);
 			}
 		} else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			arge_stop(sc);
 		}
 		sc->arge_if_flags = ifp->if_flags;
 		ARGE_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		/* XXX: implement SIOCDELMULTI */
 		error = 0;
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		if (sc->arge_miibus) {
 			mii = device_get_softc(sc->arge_miibus);
 			error = ifmedia_ioctl(ifp, ifr, &mii->mii_media,
 			    command);
 		}
 		else
 			error = ifmedia_ioctl(ifp, ifr, &sc->arge_ifmedia,
 			    command);
 		break;
 	case SIOCSIFCAP:
 		/* XXX: Check other capabilities */
 #ifdef DEVICE_POLLING
 		mask = ifp->if_capenable ^ ifr->ifr_reqcap;
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				ARGE_WRITE(sc, AR71XX_DMA_INTR, 0);
 				error = ether_poll_register(arge_poll, ifp);
 				if (error)
 					return error;
 				ARGE_LOCK(sc);
 				ifp->if_capenable |= IFCAP_POLLING;
 				ARGE_UNLOCK(sc);
 			} else {
 				ARGE_WRITE(sc, AR71XX_DMA_INTR, DMA_INTR_ALL);
 				error = ether_poll_deregister(ifp);
 				ARGE_LOCK(sc);
 				ifp->if_capenable &= ~IFCAP_POLLING;
 				ARGE_UNLOCK(sc);
 			}
 		}
 		error = 0;
 		break;
 #endif
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set media options.
  */
 static int
 arge_ifmedia_upd(struct ifnet *ifp)
 {
 	struct arge_softc		*sc;
 	struct mii_data		*mii;
 	struct mii_softc	*miisc;
 	int			error;
 
 	sc = ifp->if_softc;
 	ARGE_LOCK(sc);
 	mii = device_get_softc(sc->arge_miibus);
 	LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
 		PHY_RESET(miisc);
 	error = mii_mediachg(mii);
 	ARGE_UNLOCK(sc);
 
 	return (error);
 }
 
 /*
  * Report current media status.
  */
 static void
 arge_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct arge_softc		*sc = ifp->if_softc;
 	struct mii_data		*mii;
 
 	mii = device_get_softc(sc->arge_miibus);
 	ARGE_LOCK(sc);
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 	ARGE_UNLOCK(sc);
 }
 
 struct arge_dmamap_arg {
 	bus_addr_t	arge_busaddr;
 };
 
 static void
 arge_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct arge_dmamap_arg	*ctx;
 
 	if (error != 0)
 		return;
 	ctx = arg;
 	ctx->arge_busaddr = segs[0].ds_addr;
 }
 
 static int
 arge_dma_alloc(struct arge_softc *sc)
 {
 	struct arge_dmamap_arg	ctx;
 	struct arge_txdesc	*txd;
 	struct arge_rxdesc	*rxd;
 	int			error, i;
 	int			arge_tx_align, arge_rx_align;
 
 	/* Assume 4 byte alignment by default */
 	arge_tx_align = 4;
 	arge_rx_align = 4;
 
 	if (sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE)
 		arge_tx_align = 1;
 	if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE)
 		arge_rx_align = 1;
 
 	/* Create parent DMA tag. */
 	error = bus_dma_tag_create(
 	    bus_get_dma_tag(sc->arge_dev),	/* parent */
 	    1, 0,			/* alignment, boundary */
 	    BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    BUS_SPACE_MAXSIZE_32BIT,	/* maxsize */
 	    0,				/* nsegments */
 	    BUS_SPACE_MAXSIZE_32BIT,	/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &sc->arge_cdata.arge_parent_tag);
 	if (error != 0) {
 		device_printf(sc->arge_dev,
 		    "failed to create parent DMA tag\n");
 		goto fail;
 	}
 	/* Create tag for Tx ring. */
 	error = bus_dma_tag_create(
 	    sc->arge_cdata.arge_parent_tag,	/* parent */
 	    ARGE_RING_ALIGN, 0,		/* alignment, boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    ARGE_TX_DMA_SIZE,		/* maxsize */
 	    1,				/* nsegments */
 	    ARGE_TX_DMA_SIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &sc->arge_cdata.arge_tx_ring_tag);
 	if (error != 0) {
 		device_printf(sc->arge_dev,
 		    "failed to create Tx ring DMA tag\n");
 		goto fail;
 	}
 
 	/* Create tag for Rx ring. */
 	error = bus_dma_tag_create(
 	    sc->arge_cdata.arge_parent_tag,	/* parent */
 	    ARGE_RING_ALIGN, 0,		/* alignment, boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    ARGE_RX_DMA_SIZE,		/* maxsize */
 	    1,				/* nsegments */
 	    ARGE_RX_DMA_SIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &sc->arge_cdata.arge_rx_ring_tag);
 	if (error != 0) {
 		device_printf(sc->arge_dev,
 		    "failed to create Rx ring DMA tag\n");
 		goto fail;
 	}
 
 	/* Create tag for Tx buffers. */
 	error = bus_dma_tag_create(
 	    sc->arge_cdata.arge_parent_tag,	/* parent */
 	    arge_tx_align, 0,		/* alignment, boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MCLBYTES * ARGE_MAXFRAGS,	/* maxsize */
 	    ARGE_MAXFRAGS,		/* nsegments */
 	    MCLBYTES,			/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &sc->arge_cdata.arge_tx_tag);
 	if (error != 0) {
 		device_printf(sc->arge_dev, "failed to create Tx DMA tag\n");
 		goto fail;
 	}
 
 	/* Create tag for Rx buffers. */
 	error = bus_dma_tag_create(
 	    sc->arge_cdata.arge_parent_tag,	/* parent */
 	    arge_rx_align, 0,		/* alignment, boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MCLBYTES,			/* maxsize */
 	    ARGE_MAXFRAGS,		/* nsegments */
 	    MCLBYTES,			/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &sc->arge_cdata.arge_rx_tag);
 	if (error != 0) {
 		device_printf(sc->arge_dev, "failed to create Rx DMA tag\n");
 		goto fail;
 	}
 
 	/* Allocate DMA'able memory and load the DMA map for Tx ring. */
 	error = bus_dmamem_alloc(sc->arge_cdata.arge_tx_ring_tag,
 	    (void **)&sc->arge_rdata.arge_tx_ring, BUS_DMA_WAITOK |
 	    BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc->arge_cdata.arge_tx_ring_map);
 	if (error != 0) {
 		device_printf(sc->arge_dev,
 		    "failed to allocate DMA'able memory for Tx ring\n");
 		goto fail;
 	}
 
 	ctx.arge_busaddr = 0;
 	error = bus_dmamap_load(sc->arge_cdata.arge_tx_ring_tag,
 	    sc->arge_cdata.arge_tx_ring_map, sc->arge_rdata.arge_tx_ring,
 	    ARGE_TX_DMA_SIZE, arge_dmamap_cb, &ctx, 0);
 	if (error != 0 || ctx.arge_busaddr == 0) {
 		device_printf(sc->arge_dev,
 		    "failed to load DMA'able memory for Tx ring\n");
 		goto fail;
 	}
 	sc->arge_rdata.arge_tx_ring_paddr = ctx.arge_busaddr;
 
 	/* Allocate DMA'able memory and load the DMA map for Rx ring. */
 	error = bus_dmamem_alloc(sc->arge_cdata.arge_rx_ring_tag,
 	    (void **)&sc->arge_rdata.arge_rx_ring, BUS_DMA_WAITOK |
 	    BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc->arge_cdata.arge_rx_ring_map);
 	if (error != 0) {
 		device_printf(sc->arge_dev,
 		    "failed to allocate DMA'able memory for Rx ring\n");
 		goto fail;
 	}
 
 	ctx.arge_busaddr = 0;
 	error = bus_dmamap_load(sc->arge_cdata.arge_rx_ring_tag,
 	    sc->arge_cdata.arge_rx_ring_map, sc->arge_rdata.arge_rx_ring,
 	    ARGE_RX_DMA_SIZE, arge_dmamap_cb, &ctx, 0);
 	if (error != 0 || ctx.arge_busaddr == 0) {
 		device_printf(sc->arge_dev,
 		    "failed to load DMA'able memory for Rx ring\n");
 		goto fail;
 	}
 	sc->arge_rdata.arge_rx_ring_paddr = ctx.arge_busaddr;
 
 	/* Create DMA maps for Tx buffers. */
 	for (i = 0; i < ARGE_TX_RING_COUNT; i++) {
 		txd = &sc->arge_cdata.arge_txdesc[i];
 		txd->tx_m = NULL;
 		txd->tx_dmamap = NULL;
 		error = bus_dmamap_create(sc->arge_cdata.arge_tx_tag, 0,
 		    &txd->tx_dmamap);
 		if (error != 0) {
 			device_printf(sc->arge_dev,
 			    "failed to create Tx dmamap\n");
 			goto fail;
 		}
 	}
 	/* Create DMA maps for Rx buffers. */
 	if ((error = bus_dmamap_create(sc->arge_cdata.arge_rx_tag, 0,
 	    &sc->arge_cdata.arge_rx_sparemap)) != 0) {
 		device_printf(sc->arge_dev,
 		    "failed to create spare Rx dmamap\n");
 		goto fail;
 	}
 	for (i = 0; i < ARGE_RX_RING_COUNT; i++) {
 		rxd = &sc->arge_cdata.arge_rxdesc[i];
 		rxd->rx_m = NULL;
 		rxd->rx_dmamap = NULL;
 		error = bus_dmamap_create(sc->arge_cdata.arge_rx_tag, 0,
 		    &rxd->rx_dmamap);
 		if (error != 0) {
 			device_printf(sc->arge_dev,
 			    "failed to create Rx dmamap\n");
 			goto fail;
 		}
 	}
 
 fail:
 	return (error);
 }
 
 static void
 arge_dma_free(struct arge_softc *sc)
 {
 	struct arge_txdesc	*txd;
 	struct arge_rxdesc	*rxd;
 	int			i;
 
 	/* Tx ring. */
 	if (sc->arge_cdata.arge_tx_ring_tag) {
 		if (sc->arge_rdata.arge_tx_ring_paddr)
 			bus_dmamap_unload(sc->arge_cdata.arge_tx_ring_tag,
 			    sc->arge_cdata.arge_tx_ring_map);
 		if (sc->arge_rdata.arge_tx_ring)
 			bus_dmamem_free(sc->arge_cdata.arge_tx_ring_tag,
 			    sc->arge_rdata.arge_tx_ring,
 			    sc->arge_cdata.arge_tx_ring_map);
 		sc->arge_rdata.arge_tx_ring = NULL;
 		sc->arge_rdata.arge_tx_ring_paddr = 0;
 		bus_dma_tag_destroy(sc->arge_cdata.arge_tx_ring_tag);
 		sc->arge_cdata.arge_tx_ring_tag = NULL;
 	}
 	/* Rx ring. */
 	if (sc->arge_cdata.arge_rx_ring_tag) {
 		if (sc->arge_rdata.arge_rx_ring_paddr)
 			bus_dmamap_unload(sc->arge_cdata.arge_rx_ring_tag,
 			    sc->arge_cdata.arge_rx_ring_map);
 		if (sc->arge_rdata.arge_rx_ring)
 			bus_dmamem_free(sc->arge_cdata.arge_rx_ring_tag,
 			    sc->arge_rdata.arge_rx_ring,
 			    sc->arge_cdata.arge_rx_ring_map);
 		sc->arge_rdata.arge_rx_ring = NULL;
 		sc->arge_rdata.arge_rx_ring_paddr = 0;
 		bus_dma_tag_destroy(sc->arge_cdata.arge_rx_ring_tag);
 		sc->arge_cdata.arge_rx_ring_tag = NULL;
 	}
 	/* Tx buffers. */
 	if (sc->arge_cdata.arge_tx_tag) {
 		for (i = 0; i < ARGE_TX_RING_COUNT; i++) {
 			txd = &sc->arge_cdata.arge_txdesc[i];
 			if (txd->tx_dmamap) {
 				bus_dmamap_destroy(sc->arge_cdata.arge_tx_tag,
 				    txd->tx_dmamap);
 				txd->tx_dmamap = NULL;
 			}
 		}
 		bus_dma_tag_destroy(sc->arge_cdata.arge_tx_tag);
 		sc->arge_cdata.arge_tx_tag = NULL;
 	}
 	/* Rx buffers. */
 	if (sc->arge_cdata.arge_rx_tag) {
 		for (i = 0; i < ARGE_RX_RING_COUNT; i++) {
 			rxd = &sc->arge_cdata.arge_rxdesc[i];
 			if (rxd->rx_dmamap) {
 				bus_dmamap_destroy(sc->arge_cdata.arge_rx_tag,
 				    rxd->rx_dmamap);
 				rxd->rx_dmamap = NULL;
 			}
 		}
 		if (sc->arge_cdata.arge_rx_sparemap) {
 			bus_dmamap_destroy(sc->arge_cdata.arge_rx_tag,
 			    sc->arge_cdata.arge_rx_sparemap);
 			sc->arge_cdata.arge_rx_sparemap = 0;
 		}
 		bus_dma_tag_destroy(sc->arge_cdata.arge_rx_tag);
 		sc->arge_cdata.arge_rx_tag = NULL;
 	}
 
 	if (sc->arge_cdata.arge_parent_tag) {
 		bus_dma_tag_destroy(sc->arge_cdata.arge_parent_tag);
 		sc->arge_cdata.arge_parent_tag = NULL;
 	}
 }
 
 /*
  * Initialize the transmit descriptors.
  */
 static int
 arge_tx_ring_init(struct arge_softc *sc)
 {
 	struct arge_ring_data	*rd;
 	struct arge_txdesc	*txd;
 	bus_addr_t		addr;
 	int			i;
 
 	sc->arge_cdata.arge_tx_prod = 0;
 	sc->arge_cdata.arge_tx_cons = 0;
 	sc->arge_cdata.arge_tx_cnt = 0;
 
 	rd = &sc->arge_rdata;
 	bzero(rd->arge_tx_ring, sizeof(rd->arge_tx_ring));
 	for (i = 0; i < ARGE_TX_RING_COUNT; i++) {
 		if (i == ARGE_TX_RING_COUNT - 1)
 			addr = ARGE_TX_RING_ADDR(sc, 0);
 		else
 			addr = ARGE_TX_RING_ADDR(sc, i + 1);
 		rd->arge_tx_ring[i].packet_ctrl = ARGE_DESC_EMPTY;
 		rd->arge_tx_ring[i].next_desc = addr;
 		txd = &sc->arge_cdata.arge_txdesc[i];
 		txd->tx_m = NULL;
 	}
 
 	bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag,
 	    sc->arge_cdata.arge_tx_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	return (0);
 }
 
 /*
  * Free the Tx ring, unload any pending dma transaction and free the mbuf.
  */
 static void
 arge_tx_ring_free(struct arge_softc *sc)
 {
 	struct arge_txdesc	*txd;
 	int			i;
 
 	/* Free the Tx buffers. */
 	for (i = 0; i < ARGE_TX_RING_COUNT; i++) {
 		txd = &sc->arge_cdata.arge_txdesc[i];
 		if (txd->tx_dmamap) {
 			bus_dmamap_sync(sc->arge_cdata.arge_tx_tag,
 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc->arge_cdata.arge_tx_tag,
 			    txd->tx_dmamap);
 		}
 		if (txd->tx_m)
 			m_freem(txd->tx_m);
 		txd->tx_m = NULL;
 	}
 }
 
 /*
  * Initialize the RX descriptors and allocate mbufs for them. Note that
  * we arrange the descriptors in a closed ring, so that the last descriptor
  * points back to the first.
  */
 static int
 arge_rx_ring_init(struct arge_softc *sc)
 {
 	struct arge_ring_data	*rd;
 	struct arge_rxdesc	*rxd;
 	bus_addr_t		addr;
 	int			i;
 
 	sc->arge_cdata.arge_rx_cons = 0;
 
 	rd = &sc->arge_rdata;
 	bzero(rd->arge_rx_ring, sizeof(rd->arge_rx_ring));
 	for (i = 0; i < ARGE_RX_RING_COUNT; i++) {
 		rxd = &sc->arge_cdata.arge_rxdesc[i];
 		if (rxd->rx_m != NULL) {
 			device_printf(sc->arge_dev,
 			    "%s: ring[%d] rx_m wasn't free?\n",
 			    __func__,
 			    i);
 		}
 		rxd->rx_m = NULL;
 		rxd->desc = &rd->arge_rx_ring[i];
 		if (i == ARGE_RX_RING_COUNT - 1)
 			addr = ARGE_RX_RING_ADDR(sc, 0);
 		else
 			addr = ARGE_RX_RING_ADDR(sc, i + 1);
 		rd->arge_rx_ring[i].next_desc = addr;
 		if (arge_newbuf(sc, i) != 0) {
 			return (ENOBUFS);
 		}
 	}
 
 	bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag,
 	    sc->arge_cdata.arge_rx_ring_map,
 	    BUS_DMASYNC_PREWRITE);
 
 	return (0);
 }
 
 /*
  * Free all the buffers in the RX ring.
  *
  * TODO: ensure that DMA is disabled and no pending DMA
  * is lurking in the FIFO.
  */
 static void
 arge_rx_ring_free(struct arge_softc *sc)
 {
 	int i;
 	struct arge_rxdesc	*rxd;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < ARGE_RX_RING_COUNT; i++) {
 		rxd = &sc->arge_cdata.arge_rxdesc[i];
 		/* Unmap the mbuf */
 		if (rxd->rx_m != NULL) {
 			bus_dmamap_unload(sc->arge_cdata.arge_rx_tag,
 			    rxd->rx_dmamap);
 			m_free(rxd->rx_m);
 			rxd->rx_m = NULL;
 		}
 	}
 }
 
 /*
  * Initialize an RX descriptor and attach an MBUF cluster.
  */
 static int
 arge_newbuf(struct arge_softc *sc, int idx)
 {
 	struct arge_desc		*desc;
 	struct arge_rxdesc	*rxd;
 	struct mbuf		*m;
 	bus_dma_segment_t	segs[1];
 	bus_dmamap_t		map;
 	int			nsegs;
 
 	/* XXX TODO: should just allocate an explicit 2KiB buffer */
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = MCLBYTES;
 
 	/*
 	 * Add extra space to "adjust" (copy) the packet back to be aligned
 	 * for purposes of IPv4/IPv6 header contents.
 	 */
 	if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE)
 		m_adj(m, sizeof(uint64_t));
 	/*
 	 * If it's a 1-byte aligned buffer, then just offset it two bytes
 	 * and that will give us a hopefully correctly DWORD aligned
 	 * L3 payload - and we won't have to undo it afterwards.
 	 */
 	else if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE)
 		m_adj(m, sizeof(uint16_t));
 
 	if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag,
 	    sc->arge_cdata.arge_rx_sparemap, m, segs, &nsegs, 0) != 0) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	rxd = &sc->arge_cdata.arge_rxdesc[idx];
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_unload(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap);
 	}
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc->arge_cdata.arge_rx_sparemap;
 	sc->arge_cdata.arge_rx_sparemap = map;
 	rxd->rx_m = m;
 	desc = rxd->desc;
 	if ((sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) &&
 	    segs[0].ds_addr & 3)
 		panic("RX packet address unaligned");
 	desc->packet_addr = segs[0].ds_addr;
 	desc->packet_ctrl = ARGE_DESC_EMPTY | ARGE_DMASIZE(segs[0].ds_len);
 
 	bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag,
 	    sc->arge_cdata.arge_rx_ring_map,
 	    BUS_DMASYNC_PREWRITE);
 
 	return (0);
 }
 
 /*
  * Move the data backwards 16 bits to (hopefully!) ensure the
  * IPv4/IPv6 payload is aligned.
  *
  * This is required for earlier hardware where the RX path
  * requires DWORD aligned buffers.
  */
 static __inline void
 arge_fixup_rx(struct mbuf *m)
 {
 	int		i;
 	uint16_t	*src, *dst;
 
 	src = mtod(m, uint16_t *);
 	dst = src - 1;
 
 	for (i = 0; i < m->m_len / sizeof(uint16_t); i++) {
 		*dst++ = *src++;
 	}
 
 	if (m->m_len % sizeof(uint16_t))
 		*(uint8_t *)dst = *(uint8_t *)src;
 
 	m->m_data -= ETHER_ALIGN;
 }
 
 #ifdef DEVICE_POLLING
 static int
 arge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
 	struct arge_softc *sc = ifp->if_softc;
 	int rx_npkts = 0;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ARGE_LOCK(sc);
 		arge_tx_locked(sc);
 		rx_npkts = arge_rx_locked(sc);
 		ARGE_UNLOCK(sc);
 	}
 
 	return (rx_npkts);
 }
 #endif /* DEVICE_POLLING */
 
 
 static void
 arge_tx_locked(struct arge_softc *sc)
 {
 	struct arge_txdesc	*txd;
 	struct arge_desc	*cur_tx;
 	struct ifnet		*ifp;
 	uint32_t		ctrl;
 	int			cons, prod;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	cons = sc->arge_cdata.arge_tx_cons;
 	prod = sc->arge_cdata.arge_tx_prod;
 
 	ARGEDEBUG(sc, ARGE_DBG_TX, "%s: cons=%d, prod=%d\n", __func__, cons,
 	    prod);
 
 	if (cons == prod)
 		return;
 
 	bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag,
 	    sc->arge_cdata.arge_tx_ring_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	ifp = sc->arge_ifp;
 	/*
 	 * Go through our tx list and free mbufs for those
 	 * frames that have been transmitted.
 	 */
 	for (; cons != prod; ARGE_INC(cons, ARGE_TX_RING_COUNT)) {
 		cur_tx = &sc->arge_rdata.arge_tx_ring[cons];
 		ctrl = cur_tx->packet_ctrl;
 		/* Check if descriptor has "finished" flag */
 		if ((ctrl & ARGE_DESC_EMPTY) == 0)
 			break;
 
 		ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_PKT_SENT);
 
 		sc->arge_cdata.arge_tx_cnt--;
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 		txd = &sc->arge_cdata.arge_txdesc[cons];
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 
 		bus_dmamap_sync(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap);
 
 		/* Free only if it's first descriptor in list */
 		if (txd->tx_m)
 			m_freem(txd->tx_m);
 		txd->tx_m = NULL;
 
 		/* reset descriptor */
 		cur_tx->packet_addr = 0;
 	}
 
 	sc->arge_cdata.arge_tx_cons = cons;
 
 	bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag,
 	    sc->arge_cdata.arge_tx_ring_map, BUS_DMASYNC_PREWRITE);
 }
 
 
 static int
 arge_rx_locked(struct arge_softc *sc)
 {
 	struct arge_rxdesc	*rxd;
 	struct ifnet		*ifp = sc->arge_ifp;
 	int			cons, prog, packet_len, i;
 	struct arge_desc	*cur_rx;
 	struct mbuf		*m;
 	int			rx_npkts = 0;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	cons = sc->arge_cdata.arge_rx_cons;
 
 	bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag,
 	    sc->arge_cdata.arge_rx_ring_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	for (prog = 0; prog < ARGE_RX_RING_COUNT;
 	    ARGE_INC(cons, ARGE_RX_RING_COUNT)) {
 		cur_rx = &sc->arge_rdata.arge_rx_ring[cons];
 		rxd = &sc->arge_cdata.arge_rxdesc[cons];
 		m = rxd->rx_m;
 
 		if ((cur_rx->packet_ctrl & ARGE_DESC_EMPTY) != 0)
 		       break;
 
 		ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_PKT_RECVD);
 
 		prog++;
 
 		packet_len = ARGE_DMASIZE(cur_rx->packet_ctrl);
 		bus_dmamap_sync(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		m = rxd->rx_m;
 
 		/*
 		 * If the MAC requires 4 byte alignment then the RX setup
 		 * routine will have pre-offset things; so un-offset it here.
 		 */
 		if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE)
 			arge_fixup_rx(m);
 
 		m->m_pkthdr.rcvif = ifp;
 		/* Skip 4 bytes of CRC */
 		m->m_pkthdr.len = m->m_len = packet_len - ETHER_CRC_LEN;
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		rx_npkts++;
 
 		ARGE_UNLOCK(sc);
 		(*ifp->if_input)(ifp, m);
 		ARGE_LOCK(sc);
 		cur_rx->packet_addr = 0;
 	}
 
 	if (prog > 0) {
 
 		i = sc->arge_cdata.arge_rx_cons;
 		for (; prog > 0 ; prog--) {
 			if (arge_newbuf(sc, i) != 0) {
 				device_printf(sc->arge_dev,
 				    "Failed to allocate buffer\n");
 				break;
 			}
 			ARGE_INC(i, ARGE_RX_RING_COUNT);
 		}
 
 		bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag,
 		    sc->arge_cdata.arge_rx_ring_map,
 		    BUS_DMASYNC_PREWRITE);
 
 		sc->arge_cdata.arge_rx_cons = cons;
 	}
 
 	return (rx_npkts);
 }
 
 static int
 arge_intr_filter(void *arg)
 {
 	struct arge_softc	*sc = arg;
 	uint32_t		status, ints;
 
 	status = ARGE_READ(sc, AR71XX_DMA_INTR_STATUS);
 	ints = ARGE_READ(sc, AR71XX_DMA_INTR);
 
 	ARGEDEBUG(sc, ARGE_DBG_INTR, "int mask(filter) = %b\n", ints,
 	    "\20\10RX_BUS_ERROR\7RX_OVERFLOW\5RX_PKT_RCVD"
 	    "\4TX_BUS_ERROR\2TX_UNDERRUN\1TX_PKT_SENT");
 	ARGEDEBUG(sc, ARGE_DBG_INTR, "status(filter) = %b\n", status,
 	    "\20\10RX_BUS_ERROR\7RX_OVERFLOW\5RX_PKT_RCVD"
 	    "\4TX_BUS_ERROR\2TX_UNDERRUN\1TX_PKT_SENT");
 
 	if (status & DMA_INTR_ALL) {
 		sc->arge_intr_status |= status;
 		ARGE_WRITE(sc, AR71XX_DMA_INTR, 0);
 		sc->stats.intr_ok++;
 		return (FILTER_SCHEDULE_THREAD);
 	}
 
 	sc->arge_intr_status = 0;
 	sc->stats.intr_stray++;
 	return (FILTER_STRAY);
 }
 
 static void
 arge_intr(void *arg)
 {
 	struct arge_softc	*sc = arg;
 	uint32_t		status;
 	struct ifnet		*ifp = sc->arge_ifp;
 #ifdef	ARGE_DEBUG
 	int i;
 #endif
 
 	status = ARGE_READ(sc, AR71XX_DMA_INTR_STATUS);
 	status |= sc->arge_intr_status;
 
 	ARGEDEBUG(sc, ARGE_DBG_INTR, "int status(intr) = %b\n", status,
 	    "\20\10\7RX_OVERFLOW\5RX_PKT_RCVD"
 	    "\4TX_BUS_ERROR\2TX_UNDERRUN\1TX_PKT_SENT");
 
 	/*
 	 * Is it our interrupt at all?
 	 */
 	if (status == 0) {
 		sc->stats.intr_stray2++;
 		return;
 	}
 
 #ifdef	ARGE_DEBUG
 	for (i = 0; i < 32; i++) {
 		if (status & (1U << i)) {
 			sc->intr_stats.count[i]++;
 		}
 	}
 #endif
 
 	if (status & DMA_INTR_RX_BUS_ERROR) {
 		ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_BUS_ERROR);
 		device_printf(sc->arge_dev, "RX bus error");
 		return;
 	}
 
 	if (status & DMA_INTR_TX_BUS_ERROR) {
 		ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_BUS_ERROR);
 		device_printf(sc->arge_dev, "TX bus error");
 		return;
 	}
 
 	ARGE_LOCK(sc);
 	arge_flush_ddr(sc);
 
 	if (status & DMA_INTR_RX_PKT_RCVD)
 		arge_rx_locked(sc);
 
 	/*
 	 * RX overrun disables the receiver.
 	 * Clear indication and re-enable rx.
 	 */
 	if ( status & DMA_INTR_RX_OVERFLOW) {
 		ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_OVERFLOW);
 		ARGE_WRITE(sc, AR71XX_DMA_RX_CONTROL, DMA_RX_CONTROL_EN);
 		sc->stats.rx_overflow++;
 	}
 
 	if (status & DMA_INTR_TX_PKT_SENT)
 		arge_tx_locked(sc);
 	/*
 	 * Underrun turns off TX. Clear underrun indication.
 	 * If there's anything left in the ring, reactivate the tx.
 	 */
 	if (status & DMA_INTR_TX_UNDERRUN) {
 		ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_UNDERRUN);
 		sc->stats.tx_underflow++;
 		ARGEDEBUG(sc, ARGE_DBG_TX, "%s: TX underrun; tx_cnt=%d\n",
 		    __func__, sc->arge_cdata.arge_tx_cnt);
 		if (sc->arge_cdata.arge_tx_cnt > 0 ) {
 			ARGE_WRITE(sc, AR71XX_DMA_TX_CONTROL,
 			    DMA_TX_CONTROL_EN);
 		}
 	}
 
 	/*
 	 * If we've finished TXing and there's space for more packets
 	 * to be queued for TX, do so. Otherwise we may end up in a
 	 * situation where the interface send queue was filled
 	 * whilst the hardware queue was full, then the hardware
 	 * queue was drained by the interface send queue wasn't,
 	 * and thus if_start() is never called to kick-start
 	 * the send process (and all subsequent packets are simply
 	 * discarded.
 	 *
 	 * XXX TODO: make sure that the hardware deals nicely
 	 * with the possibility of the queue being enabled above
 	 * after a TX underrun, then having the hardware queue added
 	 * to below.
 	 */
 	if (status & (DMA_INTR_TX_PKT_SENT | DMA_INTR_TX_UNDERRUN) &&
 	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
 			arge_start_locked(ifp);
 	}
 
 	/*
 	 * We handled all bits, clear status
 	 */
 	sc->arge_intr_status = 0;
 	ARGE_UNLOCK(sc);
 	/*
 	 * re-enable all interrupts
 	 */
 	ARGE_WRITE(sc, AR71XX_DMA_INTR, DMA_INTR_ALL);
 }
 
 
 static void
 arge_tick(void *xsc)
 {
 	struct arge_softc	*sc = xsc;
 	struct mii_data		*mii;
 
 	ARGE_LOCK_ASSERT(sc);
 
 	if (sc->arge_miibus) {
 		mii = device_get_softc(sc->arge_miibus);
 		mii_tick(mii);
 		callout_reset(&sc->arge_stat_callout, hz, arge_tick, sc);
 	}
 }
 
 int
 arge_multiphy_mediachange(struct ifnet *ifp)
 {
 	struct arge_softc *sc = ifp->if_softc;
 	struct ifmedia *ifm = &sc->arge_ifmedia;
 	struct ifmedia_entry *ife = ifm->ifm_cur;
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	if (IFM_SUBTYPE(ife->ifm_media) == IFM_AUTO) {
 		device_printf(sc->arge_dev,
 		    "AUTO is not supported for multiphy MAC");
 		return (EINVAL);
 	}
 
 	/*
 	 * Ignore everything
 	 */
 	return (0);
 }
 
 void
 arge_multiphy_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct arge_softc *sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
 	ifmr->ifm_active = IFM_ETHER | sc->arge_media_type |
 	    sc->arge_duplex_mode;
 }
 
 #if defined(ARGE_MDIO)
 static int
 argemdio_probe(device_t dev)
 {
 	device_set_desc(dev, "Atheros AR71xx built-in ethernet interface, MDIO controller");
 	return (0);
 }
 
 static int
 argemdio_attach(device_t dev)
 {
 	struct arge_softc	*sc;
 	int			error = 0;
 
 	sc = device_get_softc(dev);
 	sc->arge_dev = dev;
 	sc->arge_mac_unit = device_get_unit(dev);
 	sc->arge_rid = 0;
 	sc->arge_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 
 	    &sc->arge_rid, RF_ACTIVE | RF_SHAREABLE);
 	if (sc->arge_res == NULL) {
 		device_printf(dev, "couldn't map memory\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/* Reset MAC - required for AR71xx MDIO to successfully occur */
 	arge_reset_mac(sc);
 	/* Reset MII bus */
 	arge_reset_miibus(sc);
 
 	bus_generic_probe(dev);
 	bus_enumerate_hinted_children(dev);
 	error = bus_generic_attach(dev);
 fail:
 	return (error);
 }
 
 static int
 argemdio_detach(device_t dev)
 {
 	return (0);
 }
 
 #endif
Index: projects/powernv/mips/atheros/qca953x_chip.c
===================================================================
--- projects/powernv/mips/atheros/qca953x_chip.c	(nonexistent)
+++ projects/powernv/mips/atheros/qca953x_chip.c	(revision 290991)
@@ -0,0 +1,393 @@
+/*-
+ * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cons.h>
+#include <sys/kdb.h>
+#include <sys/reboot.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <net/ethernet.h>
+
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/cpuregs.h>
+#include <machine/hwfunc.h>
+#include <machine/md_var.h>
+#include <machine/trap.h>
+#include <machine/vmparam.h>
+
+#include <mips/atheros/ar71xxreg.h>
+#include <mips/atheros/qca953xreg.h>
+
+#include <mips/atheros/ar71xx_cpudef.h>
+#include <mips/atheros/ar71xx_setup.h>
+
+#include <mips/atheros/ar71xx_chip.h>
+
+#include <mips/atheros/qca953x_chip.h>
+
+static void
+qca953x_chip_detect_mem_size(void)
+{
+}
+
+static void
+qca953x_chip_detect_sys_frequency(void)
+{
+	unsigned long ref_rate;
+	unsigned long cpu_rate;
+	unsigned long ddr_rate;
+	unsigned long ahb_rate;
+	uint32_t pll, out_div, ref_div, nint, frac, clk_ctrl, postdiv;
+	uint32_t cpu_pll, ddr_pll;
+	uint32_t bootstrap;
+
+	bootstrap = ATH_READ_REG(QCA953X_RESET_REG_BOOTSTRAP);
+	if (bootstrap &	QCA953X_BOOTSTRAP_REF_CLK_40)
+		ref_rate = 40 * 1000 * 1000;
+	else
+		ref_rate = 25 * 1000 * 1000;
+
+	pll = ATH_READ_REG(QCA953X_PLL_CPU_CONFIG_REG);
+	out_div = (pll >> QCA953X_PLL_CPU_CONFIG_OUTDIV_SHIFT) &
+		  QCA953X_PLL_CPU_CONFIG_OUTDIV_MASK;
+	ref_div = (pll >> QCA953X_PLL_CPU_CONFIG_REFDIV_SHIFT) &
+		  QCA953X_PLL_CPU_CONFIG_REFDIV_MASK;
+	nint = (pll >> QCA953X_PLL_CPU_CONFIG_NINT_SHIFT) &
+	       QCA953X_PLL_CPU_CONFIG_NINT_MASK;
+	frac = (pll >> QCA953X_PLL_CPU_CONFIG_NFRAC_SHIFT) &
+	       QCA953X_PLL_CPU_CONFIG_NFRAC_MASK;
+
+	cpu_pll = nint * ref_rate / ref_div;
+	cpu_pll += frac * (ref_rate >> 6) / ref_div;
+	cpu_pll /= (1 << out_div);
+
+	pll = ATH_READ_REG(QCA953X_PLL_DDR_CONFIG_REG);
+	out_div = (pll >> QCA953X_PLL_DDR_CONFIG_OUTDIV_SHIFT) &
+		  QCA953X_PLL_DDR_CONFIG_OUTDIV_MASK;
+	ref_div = (pll >> QCA953X_PLL_DDR_CONFIG_REFDIV_SHIFT) &
+		  QCA953X_PLL_DDR_CONFIG_REFDIV_MASK;
+	nint = (pll >> QCA953X_PLL_DDR_CONFIG_NINT_SHIFT) &
+	       QCA953X_PLL_DDR_CONFIG_NINT_MASK;
+	frac = (pll >> QCA953X_PLL_DDR_CONFIG_NFRAC_SHIFT) &
+	       QCA953X_PLL_DDR_CONFIG_NFRAC_MASK;
+
+	ddr_pll = nint * ref_rate / ref_div;
+	ddr_pll += frac * (ref_rate >> 6) / (ref_div << 4);
+	ddr_pll /= (1 << out_div);
+
+	clk_ctrl = ATH_READ_REG(QCA953X_PLL_CLK_CTRL_REG);
+
+	postdiv = (clk_ctrl >> QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_SHIFT) &
+		  QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_MASK;
+
+	if (clk_ctrl & QCA953X_PLL_CLK_CTRL_CPU_PLL_BYPASS)
+		cpu_rate = ref_rate;
+	else if (clk_ctrl & QCA953X_PLL_CLK_CTRL_CPUCLK_FROM_CPUPLL)
+		cpu_rate = cpu_pll / (postdiv + 1);
+	else
+		cpu_rate = ddr_pll / (postdiv + 1);
+
+	postdiv = (clk_ctrl >> QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_SHIFT) &
+		  QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_MASK;
+
+	if (clk_ctrl & QCA953X_PLL_CLK_CTRL_DDR_PLL_BYPASS)
+		ddr_rate = ref_rate;
+	else if (clk_ctrl & QCA953X_PLL_CLK_CTRL_DDRCLK_FROM_DDRPLL)
+		ddr_rate = ddr_pll / (postdiv + 1);
+	else
+		ddr_rate = cpu_pll / (postdiv + 1);
+
+	postdiv = (clk_ctrl >> QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_SHIFT) &
+		  QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_MASK;
+
+	if (clk_ctrl & QCA953X_PLL_CLK_CTRL_AHB_PLL_BYPASS)
+		ahb_rate = ref_rate;
+	else if (clk_ctrl & QCA953X_PLL_CLK_CTRL_AHBCLK_FROM_DDRPLL)
+		ahb_rate = ddr_pll / (postdiv + 1);
+	else
+		ahb_rate = cpu_pll / (postdiv + 1);
+
+	u_ar71xx_ddr_freq = ddr_rate;
+	u_ar71xx_cpu_freq = cpu_rate;
+	u_ar71xx_ahb_freq = ahb_rate;
+
+	u_ar71xx_wdt_freq = ref_rate;
+	u_ar71xx_uart_freq = ref_rate;
+	u_ar71xx_mdio_freq = ref_rate;
+	u_ar71xx_refclk = ref_rate;
+}
+
+static void
+qca953x_chip_device_stop(uint32_t mask)
+{
+	uint32_t reg;
+
+	reg = ATH_READ_REG(QCA953X_RESET_REG_RESET_MODULE);
+	ATH_WRITE_REG(QCA953X_RESET_REG_RESET_MODULE, reg | mask);
+}
+
+static void
+qca953x_chip_device_start(uint32_t mask)
+{
+	uint32_t reg;
+
+	reg = ATH_READ_REG(QCA953X_RESET_REG_RESET_MODULE);
+	ATH_WRITE_REG(QCA953X_RESET_REG_RESET_MODULE, reg & ~mask);
+}
+
+static int
+qca953x_chip_device_stopped(uint32_t mask)
+{
+	uint32_t reg;
+
+	reg = ATH_READ_REG(QCA953X_RESET_REG_RESET_MODULE);
+	return ((reg & mask) == mask);
+}
+
+static void
+qca953x_chip_set_mii_speed(uint32_t unit, uint32_t speed)
+{
+
+	/* XXX TODO */
+	return;
+}
+
+static void
+qca953x_chip_set_pll_ge(int unit, int speed, uint32_t pll)
+{
+	switch (unit) {
+	case 0:
+		ATH_WRITE_REG(QCA953X_PLL_ETH_XMII_CONTROL_REG, pll);
+		break;
+	case 1:
+		ATH_WRITE_REG(QCA953X_PLL_ETH_SGMII_CONTROL_REG, pll);
+		break;
+	default:
+		printf("%s: invalid PLL set for arge unit: %d\n",
+		    __func__, unit);
+		return;
+	}
+}
+
+static void
+qca953x_chip_ddr_flush(ar71xx_flush_ddr_id_t id)
+{
+
+	switch (id) {
+	case AR71XX_CPU_DDR_FLUSH_GE0:
+		ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_GE0);
+		break;
+	case AR71XX_CPU_DDR_FLUSH_GE1:
+		ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_GE1);
+		break;
+	case AR71XX_CPU_DDR_FLUSH_USB:
+		ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_USB);
+		break;
+	case AR71XX_CPU_DDR_FLUSH_PCIE:
+		ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_PCIE);
+		break;
+	case AR71XX_CPU_DDR_FLUSH_WMAC:
+		ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_WMAC);
+		break;
+	default:
+		printf("%s: invalid flush (%d)\n", __func__, id);
+	}
+}
+
+static uint32_t
+qca953x_chip_get_eth_pll(unsigned int mac, int speed)
+{
+	uint32_t pll;
+
+	switch (speed) {
+	case 10:
+		pll = QCA953X_PLL_VAL_10;
+		break;
+	case 100:
+		pll = QCA953X_PLL_VAL_100;
+		break;
+	case 1000:
+		pll = QCA953X_PLL_VAL_1000;
+		break;
+	default:
+		printf("%s%d: invalid speed %d\n", __func__, mac, speed);
+		pll = 0;
+	}
+	return (pll);
+}
+
+static void
+qca953x_chip_reset_ethernet_switch(void)
+{
+}
+
+static void
+qca953x_configure_gmac(uint32_t gmac_cfg)
+{
+	uint32_t reg;
+
+	reg = ATH_READ_REG(QCA953X_GMAC_REG_ETH_CFG);
+	printf("%s: ETH_CFG=0x%08x\n", __func__, reg);
+	reg &= ~(QCA953X_ETH_CFG_SW_ONLY_MODE |
+	    QCA953X_ETH_CFG_SW_PHY_SWAP |
+	    QCA953X_ETH_CFG_SW_APB_ACCESS |
+	    QCA953X_ETH_CFG_SW_ACC_MSB_FIRST);
+
+	reg |= gmac_cfg;
+	ATH_WRITE_REG(QCA953X_GMAC_REG_ETH_CFG, reg);
+}
+
+static void
+qca953x_chip_init_usb_peripheral(void)
+{
+	uint32_t bootstrap;
+
+	bootstrap = ATH_READ_REG(QCA953X_RESET_REG_BOOTSTRAP);
+
+	ar71xx_device_stop(QCA953X_RESET_USBSUS_OVERRIDE);
+	DELAY(1000);
+
+	ar71xx_device_start(QCA953X_RESET_USB_PHY);
+	DELAY(1000);
+
+	ar71xx_device_start(QCA953X_RESET_USB_PHY_ANALOG);
+	DELAY(1000);
+
+	ar71xx_device_start(QCA953X_RESET_USB_HOST);
+	DELAY(1000);
+}
+
+static void
+qca953x_chip_set_mii_if(uint32_t unit, uint32_t mii_mode)
+{
+
+	/*
+	 * XXX !
+	 *
+	 * Nothing to see here; although gmac0 can have its
+	 * MII configuration changed, the register values
+	 * are slightly different.
+	 */
+}
+
+/*
+ * XXX TODO: fetch default MII divider configuration
+ */
+
+static void
+qca953x_chip_reset_wmac(void)
+{
+
+	/* XXX TODO */
+}
+
+static void
+qca953x_chip_init_gmac(void)
+{
+	long gmac_cfg;
+
+	if (resource_long_value("qca953x_gmac", 0, "gmac_cfg",
+	    &gmac_cfg) == 0) {
+		printf("%s: gmac_cfg=0x%08lx\n",
+		    __func__,
+		    (long) gmac_cfg);
+		qca953x_configure_gmac((uint32_t) gmac_cfg);
+	}
+}
+
+/*
+ * Reset the NAND Flash Controller.
+ *
+ * + active=1 means "make it active".
+ * + active=0 means "make it inactive".
+ */
+static void
+qca953x_chip_reset_nfc(int active)
+{
+}
+
+/*
+ * Configure the GPIO output mux setup.
+ *
+ * The QCA953x has an output mux which allowed
+ * certain functions to be configured on any pin.
+ * Specifically, the switch PHY link LEDs and
+ * WMAC external RX LNA switches are not limited to
+ * a specific GPIO pin.
+ */
+static void
+qca953x_chip_gpio_output_configure(int gpio, uint8_t func)
+{
+	uint32_t reg, s;
+	uint32_t t;
+
+	if (gpio > QCA953X_GPIO_COUNT)
+		return;
+
+	reg = QCA953X_GPIO_REG_OUT_FUNC0 + 4 * (gpio / 4);
+	s = 8 * (gpio % 4);
+
+	/* read-modify-write */
+	t = ATH_READ_REG(AR71XX_GPIO_BASE + reg);
+	t &= ~(0xff << s);
+	t |= func << s;
+	ATH_WRITE_REG(AR71XX_GPIO_BASE + reg, t);
+
+	/* flush write */
+	ATH_READ_REG(AR71XX_GPIO_BASE + reg);
+}
+
+struct ar71xx_cpu_def qca953x_chip_def = {
+	&qca953x_chip_detect_mem_size,
+	&qca953x_chip_detect_sys_frequency,
+	&qca953x_chip_device_stop,
+	&qca953x_chip_device_start,
+	&qca953x_chip_device_stopped,
+	&qca953x_chip_set_pll_ge,
+	&qca953x_chip_set_mii_speed,
+	&qca953x_chip_set_mii_if,
+	&qca953x_chip_get_eth_pll,
+	&qca953x_chip_ddr_flush,
+	&qca953x_chip_init_usb_peripheral,
+	&qca953x_chip_reset_ethernet_switch,
+	&qca953x_chip_reset_wmac,
+	&qca953x_chip_init_gmac,
+	&qca953x_chip_reset_nfc,
+	&qca953x_chip_gpio_output_configure,
+};

Property changes on: projects/powernv/mips/atheros/qca953x_chip.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/powernv/mips/atheros/qca953x_chip.h
===================================================================
--- projects/powernv/mips/atheros/qca953x_chip.h	(nonexistent)
+++ projects/powernv/mips/atheros/qca953x_chip.h	(revision 290991)
@@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#ifndef	__QCA953X_CHIP_H__
+#define	__QCA953X_CHIP_H__
+
+extern struct ar71xx_cpu_def qca953x_chip_def;
+
+#endif

Property changes on: projects/powernv/mips/atheros/qca953x_chip.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/powernv/mips/atheros/qca953xreg.h
===================================================================
--- projects/powernv/mips/atheros/qca953xreg.h	(nonexistent)
+++ projects/powernv/mips/atheros/qca953xreg.h	(revision 290991)
@@ -0,0 +1,195 @@
+/*-
+ * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef	__QCA953XREG_H__
+#define	__QCA953XREG_H__
+
+#define	BIT(x)				(1 << (x))
+
+/* Revision ID information */
+#define	REV_ID_MAJOR_QCA9533		0x0140
+#define	REV_ID_MAJOR_QCA9533_V2		0x0160
+#define	QCA953X_REV_ID_REVISION_MASK	0xf
+
+/* Big enough to cover APB and SPI, and most peripherals */
+/*
+ * it needs to cover SPI because right now the if_ath_ahb
+ * code uses rman to map in the SPI address into memory
+ * to read data instead of us squirreling it away at early
+ * boot-time and using the firmware interface.
+ *
+ * if_ath_ahb.c should use the same firmware interface
+ * that if_ath_pci.c uses.
+ */
+#define	QCA953X_GMAC_BASE      (AR71XX_APB_BASE + 0x00070000)
+#define	QCA953X_GMAC_SIZE      0x14
+#define	QCA953X_WMAC_BASE      (AR71XX_APB_BASE + 0x00100000)
+#define	QCA953X_WMAC_SIZE      0x20000
+#define	QCA953X_EHCI_BASE      0x1b000000
+#define	QCA953X_EHCI_SIZE      0x200
+#define	QCA953X_SRIF_BASE      (AR71XX_APB_BASE + 0x00116000)
+#define	QCA953X_SRIF_SIZE      0x1000
+
+#define	QCA953X_PCI_CFG_BASE0  0x14000000
+#define	QCA953X_PCI_CTRL_BASE0 (AR71XX_APB_BASE + 0x000f0000)
+#define	QCA953X_PCI_CRP_BASE0  (AR71XX_APB_BASE + 0x000c0000)
+#define	QCA953X_PCI_MEM_BASE0  0x10000000
+#define	QCA953X_PCI_MEM_SIZE   0x02000000
+
+/* PLL Block */
+#define	QCA953X_PLL_CPU_CONFIG_REG		(AR71XX_PLL_CPU_BASE + 0x00)
+#define	QCA953X_PLL_DDR_CONFIG_REG		(AR71XX_PLL_CPU_BASE + 0x04)
+#define	QCA953X_PLL_CLK_CTRL_REG		(AR71XX_PLL_CPU_BASE + 0x08)
+
+#define	QCA953X_PLL_ETH_XMII_CONTROL_REG	(AR71XX_PLL_CPU_BASE + 0x2c)
+#define	QCA953X_PLL_ETH_SGMII_CONTROL_REG	(AR71XX_PLL_CPU_BASE + 0x48)
+
+#define	QCA953X_PLL_CPU_CONFIG_NFRAC_SHIFT	0
+#define	QCA953X_PLL_CPU_CONFIG_NFRAC_MASK	0x3f
+#define	QCA953X_PLL_CPU_CONFIG_NINT_SHIFT	6
+#define	QCA953X_PLL_CPU_CONFIG_NINT_MASK	0x3f
+#define	QCA953X_PLL_CPU_CONFIG_REFDIV_SHIFT	12
+#define	QCA953X_PLL_CPU_CONFIG_REFDIV_MASK	0x1f
+#define	QCA953X_PLL_CPU_CONFIG_OUTDIV_SHIFT	19
+#define	QCA953X_PLL_CPU_CONFIG_OUTDIV_MASK	0x3
+
+#define	QCA953X_PLL_DDR_CONFIG_NFRAC_SHIFT	0
+#define	QCA953X_PLL_DDR_CONFIG_NFRAC_MASK	0x3ff
+#define	QCA953X_PLL_DDR_CONFIG_NINT_SHIFT	10
+#define	QCA953X_PLL_DDR_CONFIG_NINT_MASK	0x3f
+#define	QCA953X_PLL_DDR_CONFIG_REFDIV_SHIFT	16
+#define	QCA953X_PLL_DDR_CONFIG_REFDIV_MASK	0x1f
+#define	QCA953X_PLL_DDR_CONFIG_OUTDIV_SHIFT	23
+#define	QCA953X_PLL_DDR_CONFIG_OUTDIV_MASK	0x7
+
+#define	QCA953X_PLL_CLK_CTRL_CPU_PLL_BYPASS	BIT(2)
+#define	QCA953X_PLL_CLK_CTRL_DDR_PLL_BYPASS	BIT(3)
+#define	QCA953X_PLL_CLK_CTRL_AHB_PLL_BYPASS	BIT(4)
+#define	QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_SHIFT	5
+#define	QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_MASK	0x1f
+#define	QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_SHIFT	10
+#define	QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_MASK	0x1f
+#define	QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_SHIFT	15
+#define	QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_MASK	0x1f
+#define	QCA953X_PLL_CLK_CTRL_CPUCLK_FROM_CPUPLL	BIT(20)
+#define	QCA953X_PLL_CLK_CTRL_DDRCLK_FROM_DDRPLL	BIT(21)
+#define	QCA953X_PLL_CLK_CTRL_AHBCLK_FROM_DDRPLL	BIT(24)
+
+#define	QCA953X_PLL_VAL_1000			0x16000000
+#define	QCA953X_PLL_VAL_100			0x00000101
+#define	QCA953X_PLL_VAL_10			0x00001616
+
+/* Reset block */
+
+#define	QCA953X_RESET_REG_RESET_MODULE		(AR71XX_RST_BLOCK_BASE + 0x1c)
+#define	QCA953X_RESET_USB_EXT_PWR		BIT(29)
+#define	QCA953X_RESET_EXTERNAL			BIT(28)
+#define	QCA953X_RESET_RTC			BIT(27)
+#define	QCA953X_RESET_FULL_CHIP			BIT(24)
+#define	QCA953X_RESET_GE1_MDIO			BIT(23)
+#define	QCA953X_RESET_GE0_MDIO			BIT(22)
+#define	QCA953X_RESET_CPU_NMI			BIT(21)
+#define	QCA953X_RESET_CPU_COLD			BIT(20)
+#define	QCA953X_RESET_DDR			BIT(16)
+#define	QCA953X_RESET_USB_PHY_PLL_PWD_EXT	BIT(15)
+#define	QCA953X_RESET_GE1_MAC			BIT(13)
+#define	QCA953X_RESET_ETH_SWITCH_ANALOG		BIT(12)
+#define	QCA953X_RESET_USB_PHY_ANALOG		BIT(11)
+#define	QCA953X_RESET_GE0_MAC			BIT(9)
+#define	QCA953X_RESET_ETH_SWITCH		BIT(8)
+#define	QCA953X_RESET_PCIE_PHY			BIT(7)
+#define	QCA953X_RESET_PCIE			BIT(6)
+#define	QCA953X_RESET_USB_HOST			BIT(5)
+#define	QCA953X_RESET_USB_PHY			BIT(4)
+#define	QCA953X_RESET_USBSUS_OVERRIDE		BIT(3)
+
+#define	QCA953X_RESET_REG_BOOTSTRAP		(AR71XX_RST_BLOCK_BASE + 0xb0)
+#define	QCA953X_BOOTSTRAP_SW_OPTION2		BIT(12)
+#define	QCA953X_BOOTSTRAP_SW_OPTION1		BIT(11)
+#define	QCA953X_BOOTSTRAP_EJTAG_MODE		BIT(5)
+#define	QCA953X_BOOTSTRAP_REF_CLK_40		BIT(4)
+#define	QCA953X_BOOTSTRAP_SDRAM_DISABLED	BIT(1)
+#define	QCA953X_BOOTSTRAP_DDR1			BIT(0)
+
+#define	QCA953X_RESET_REG_EXT_INT_STATUS	(AR71XX_RST_BLOCK_BASE + 0xac)
+
+#define	QCA953X_DDR_REG_FLUSH_GE0		(AR71XX_APB_BASE + 0x9c)
+#define	QCA953X_DDR_REG_FLUSH_GE1		(AR71XX_APB_BASE + 0xa0)
+#define	QCA953X_DDR_REG_FLUSH_USB		(AR71XX_APB_BASE + 0xa4)
+#define	QCA953X_DDR_REG_FLUSH_PCIE		(AR71XX_APB_BASE + 0xa8)
+#define	QCA953X_DDR_REG_FLUSH_WMAC		(AR71XX_APB_BASE + 0xac)
+
+/* GPIO block */
+#define	QCA953X_GPIO_REG_OUT_FUNC0	0x2c
+#define	QCA953X_GPIO_REG_OUT_FUNC1	0x30
+#define	QCA953X_GPIO_REG_OUT_FUNC2	0x34
+#define	QCA953X_GPIO_REG_OUT_FUNC3	0x38
+#define	QCA953X_GPIO_REG_OUT_FUNC4	0x3c
+#define	QCA953X_GPIO_REG_IN_ENABLE0	0x44
+#define	QCA953X_GPIO_REG_FUNC		0x6c
+
+#define	QCA953X_GPIO_OUT_MUX_SPI_CS1	10
+#define	QCA953X_GPIO_OUT_MUX_SPI_CS2	11
+#define	QCA953X_GPIO_OUT_MUX_SPI_CS0	9
+#define	QCA953X_GPIO_OUT_MUX_SPI_CLK	8
+#define	QCA953X_GPIO_OUT_MUX_SPI_MOSI	12
+#define	QCA953X_GPIO_OUT_MUX_LED_LINK1	41
+#define	QCA953X_GPIO_OUT_MUX_LED_LINK2	42
+#define	QCA953X_GPIO_OUT_MUX_LED_LINK3	43
+#define	QCA953X_GPIO_OUT_MUX_LED_LINK4	44
+#define	QCA953X_GPIO_OUT_MUX_LED_LINK5	45
+
+#define	QCA953X_GPIO_COUNT		18
+
+/* GMAC block */
+#define	QCA953X_GMAC_REG_ETH_CFG	(QCA953X_GMAC_BASE + 0x00)
+
+#define	QCA953X_ETH_CFG_SW_ONLY_MODE		BIT(6)
+#define	QCA953X_ETH_CFG_SW_PHY_SWAP		BIT(7)
+#define	QCA953X_ETH_CFG_SW_APB_ACCESS		BIT(9)
+#define	QCA953X_ETH_CFG_SW_ACC_MSB_FIRST	BIT(13)
+
+/* SRIF block */
+#define	QCA953X_SRIF_CPU_DPLL1_REG		0x1c0
+#define	QCA953X_SRIF_CPU_DPLL2_REG		0x1c4
+#define	QCA953X_SRIF_CPU_DPLL3_REG		0x1c8
+
+#define	QCA953X_SRIF_DDR_DPLL1_REG		0x240
+#define	QCA953X_SRIF_DDR_DPLL2_REG		0x244
+#define	QCA953X_SRIF_DDR_DPLL3_REG		0x248
+
+#define	QCA953X_SRIF_DPLL1_REFDIV_SHIFT		27
+#define	QCA953X_SRIF_DPLL1_REFDIV_MASK		0x1f
+#define	QCA953X_SRIF_DPLL1_NINT_SHIFT		18
+#define	QCA953X_SRIF_DPLL1_NINT_MASK		0x1ff
+#define	QCA953X_SRIF_DPLL1_NFRAC_MASK		0x0003ffff
+
+#define	QCA953X_SRIF_DPLL2_LOCAL_PLL		BIT(30)
+#define	QCA953X_SRIF_DPLL2_OUTDIV_SHIFT		13
+#define	QCA953X_SRIF_DPLL2_OUTDIV_MASK		0x7
+
+#endif	/* __QCA953XREG_H__ */

Property changes on: projects/powernv/mips/atheros/qca953xreg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/powernv/net/pfkeyv2.h
===================================================================
--- projects/powernv/net/pfkeyv2.h	(revision 290990)
+++ projects/powernv/net/pfkeyv2.h	(revision 290991)
@@ -1,441 +1,441 @@
 /*	$FreeBSD$	*/
 /*	$KAME: pfkeyv2.h,v 1.37 2003/09/06 05:15:43 itojun Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This file has been derived rfc 2367,
  * And added some flags of SADB_KEY_FLAGS_ as SADB_X_EXT_.
  *	sakane@ydc.co.jp
  */
 
 #ifndef _NET_PFKEYV2_H_
 #define _NET_PFKEYV2_H_
 
 /*
 This file defines structures and symbols for the PF_KEY Version 2
 key management interface. It was written at the U.S. Naval Research
 Laboratory. This file is in the public domain. The authors ask that
 you leave this credit intact on any copies of this file.
 */
 #ifndef __PFKEY_V2_H
 #define __PFKEY_V2_H 1
 
 #define PF_KEY_V2 2
 #define PFKEYV2_REVISION        199806L
 
 #define SADB_RESERVED    0
 #define SADB_GETSPI      1
 #define SADB_UPDATE      2
 #define SADB_ADD         3
 #define SADB_DELETE      4
 #define SADB_GET         5
 #define SADB_ACQUIRE     6
 #define SADB_REGISTER    7
 #define SADB_EXPIRE      8
 #define SADB_FLUSH       9
 #define SADB_DUMP        10
 #define SADB_X_PROMISC   11
 #define SADB_X_PCHANGE   12
 
 #define SADB_X_SPDUPDATE  13
 #define SADB_X_SPDADD     14
 #define SADB_X_SPDDELETE  15	/* by policy index */
 #define SADB_X_SPDGET     16
 #define SADB_X_SPDACQUIRE 17
 #define SADB_X_SPDDUMP    18
 #define SADB_X_SPDFLUSH   19
 #define SADB_X_SPDSETIDX  20
 #define SADB_X_SPDEXPIRE  21
 #define SADB_X_SPDDELETE2 22	/* by policy id */
 #define SADB_MAX          22
 
 struct sadb_msg {
   u_int8_t sadb_msg_version;
   u_int8_t sadb_msg_type;
   u_int8_t sadb_msg_errno;
   u_int8_t sadb_msg_satype;
   u_int16_t sadb_msg_len;
   u_int16_t sadb_msg_reserved;
   u_int32_t sadb_msg_seq;
   u_int32_t sadb_msg_pid;
 };
 
 struct sadb_ext {
   u_int16_t sadb_ext_len;
   u_int16_t sadb_ext_type;
 };
 
 struct sadb_sa {
   u_int16_t sadb_sa_len;
   u_int16_t sadb_sa_exttype;
   u_int32_t sadb_sa_spi;
   u_int8_t sadb_sa_replay;
   u_int8_t sadb_sa_state;
   u_int8_t sadb_sa_auth;
   u_int8_t sadb_sa_encrypt;
   u_int32_t sadb_sa_flags;
 };
 
 struct sadb_lifetime {
   u_int16_t sadb_lifetime_len;
   u_int16_t sadb_lifetime_exttype;
   u_int32_t sadb_lifetime_allocations;
   u_int64_t sadb_lifetime_bytes;
   u_int64_t sadb_lifetime_addtime;
   u_int64_t sadb_lifetime_usetime;
 };
 
 struct sadb_address {
   u_int16_t sadb_address_len;
   u_int16_t sadb_address_exttype;
   u_int8_t sadb_address_proto;
   u_int8_t sadb_address_prefixlen;
   u_int16_t sadb_address_reserved;
 };
 
 struct sadb_key {
   u_int16_t sadb_key_len;
   u_int16_t sadb_key_exttype;
   u_int16_t sadb_key_bits;
   u_int16_t sadb_key_reserved;
 };
 
 struct sadb_ident {
   u_int16_t sadb_ident_len;
   u_int16_t sadb_ident_exttype;
   u_int16_t sadb_ident_type;
   u_int16_t sadb_ident_reserved;
   u_int64_t sadb_ident_id;
 };
 
 struct sadb_sens {
   u_int16_t sadb_sens_len;
   u_int16_t sadb_sens_exttype;
   u_int32_t sadb_sens_dpd;
   u_int8_t sadb_sens_sens_level;
   u_int8_t sadb_sens_sens_len;
   u_int8_t sadb_sens_integ_level;
   u_int8_t sadb_sens_integ_len;
   u_int32_t sadb_sens_reserved;
 };
 
 struct sadb_prop {
   u_int16_t sadb_prop_len;
   u_int16_t sadb_prop_exttype;
   u_int8_t sadb_prop_replay;
   u_int8_t sadb_prop_reserved[3];
 };
 
 struct sadb_comb {
   u_int8_t sadb_comb_auth;
   u_int8_t sadb_comb_encrypt;
   u_int16_t sadb_comb_flags;
   u_int16_t sadb_comb_auth_minbits;
   u_int16_t sadb_comb_auth_maxbits;
   u_int16_t sadb_comb_encrypt_minbits;
   u_int16_t sadb_comb_encrypt_maxbits;
   u_int32_t sadb_comb_reserved;
   u_int32_t sadb_comb_soft_allocations;
   u_int32_t sadb_comb_hard_allocations;
   u_int64_t sadb_comb_soft_bytes;
   u_int64_t sadb_comb_hard_bytes;
   u_int64_t sadb_comb_soft_addtime;
   u_int64_t sadb_comb_hard_addtime;
   u_int64_t sadb_comb_soft_usetime;
   u_int64_t sadb_comb_hard_usetime;
 };
 
 struct sadb_supported {
   u_int16_t sadb_supported_len;
   u_int16_t sadb_supported_exttype;
   u_int32_t sadb_supported_reserved;
 };
 
 struct sadb_alg {
   u_int8_t sadb_alg_id;
   u_int8_t sadb_alg_ivlen;
   u_int16_t sadb_alg_minbits;
   u_int16_t sadb_alg_maxbits;
   u_int16_t sadb_alg_reserved;
 };
 
 struct sadb_spirange {
   u_int16_t sadb_spirange_len;
   u_int16_t sadb_spirange_exttype;
   u_int32_t sadb_spirange_min;
   u_int32_t sadb_spirange_max;
   u_int32_t sadb_spirange_reserved;
 };
 
 struct sadb_x_kmprivate {
   u_int16_t sadb_x_kmprivate_len;
   u_int16_t sadb_x_kmprivate_exttype;
   u_int32_t sadb_x_kmprivate_reserved;
 };
 
 /*
  * XXX Additional SA Extension.
  * mode: tunnel or transport
  * reqid: to make SA unique nevertheless the address pair of SA are same.
  *        Mainly it's for VPN.
  */
 struct sadb_x_sa2 {
   u_int16_t sadb_x_sa2_len;
   u_int16_t sadb_x_sa2_exttype;
   u_int8_t sadb_x_sa2_mode;
   u_int8_t sadb_x_sa2_reserved1;
   u_int16_t sadb_x_sa2_reserved2;
   u_int32_t sadb_x_sa2_sequence;	/* lowermost 32bit of sequence number */
   u_int32_t sadb_x_sa2_reqid;
 };
 
 /* XXX Policy Extension */
 struct sadb_x_policy {
   u_int16_t sadb_x_policy_len;
   u_int16_t sadb_x_policy_exttype;
   u_int16_t sadb_x_policy_type;		/* See policy type of ipsec.h */
   u_int8_t sadb_x_policy_dir;		/* direction, see ipsec.h */
   u_int8_t sadb_x_policy_reserved;
   u_int32_t sadb_x_policy_id;
-  u_int32_t sadb_x_policy_reserved2;
+  u_int32_t sadb_x_policy_priority;
 };
 _Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch");
 
 /*
  * When policy_type == IPSEC, it is followed by some of
  * the ipsec policy request.
  * [total length of ipsec policy requests]
  *	= (sadb_x_policy_len * sizeof(uint64_t) - sizeof(struct sadb_x_policy))
  */
 
 /* XXX IPsec Policy Request Extension */
 /*
  * This structure is aligned 8 bytes.
  */
 struct sadb_x_ipsecrequest {
   u_int16_t sadb_x_ipsecrequest_len;	/* structure length in 64 bits. */
   u_int16_t sadb_x_ipsecrequest_proto;	/* See ipsec.h */
   u_int8_t sadb_x_ipsecrequest_mode;	/* See IPSEC_MODE_XX in ipsec.h. */
   u_int8_t sadb_x_ipsecrequest_level;	/* See IPSEC_LEVEL_XX in ipsec.h */
   u_int16_t sadb_x_ipsecrequest_reqid;	/* See ipsec.h */
 
   /*
    * followed by source IP address of SA, and immediately followed by
    * destination IP address of SA.  These encoded into two of sockaddr
    * structure without any padding.  Must set each sa_len exactly.
    * Each of length of the sockaddr structure are not aligned to 64bits,
    * but sum of x_request and addresses is aligned to 64bits.
    */
 };
 
 /* NAT-Traversal type, see RFC 3948 (and drafts). */
 struct sadb_x_nat_t_type {
   u_int16_t sadb_x_nat_t_type_len;
   u_int16_t sadb_x_nat_t_type_exttype;
   u_int8_t sadb_x_nat_t_type_type;
   u_int8_t sadb_x_nat_t_type_reserved[3];
 };
 _Static_assert(sizeof(struct sadb_x_nat_t_type) == 8, "struct size mismatch");
 
 /* NAT-Traversal source or destination port. */
 struct sadb_x_nat_t_port { 
   u_int16_t sadb_x_nat_t_port_len;
   u_int16_t sadb_x_nat_t_port_exttype;
   u_int16_t sadb_x_nat_t_port_port;
   u_int16_t sadb_x_nat_t_port_reserved;
 };
 _Static_assert(sizeof(struct sadb_x_nat_t_port) == 8, "struct size mismatch");
 
 /* ESP fragmentation size. */
 struct sadb_x_nat_t_frag {
   u_int16_t sadb_x_nat_t_frag_len;
   u_int16_t sadb_x_nat_t_frag_exttype;
   u_int16_t sadb_x_nat_t_frag_fraglen;
   u_int16_t sadb_x_nat_t_frag_reserved;
 };
 _Static_assert(sizeof(struct sadb_x_nat_t_frag) == 8, "struct size mismatch");
 
 
 #define SADB_EXT_RESERVED             0
 #define SADB_EXT_SA                   1
 #define SADB_EXT_LIFETIME_CURRENT     2
 #define SADB_EXT_LIFETIME_HARD        3
 #define SADB_EXT_LIFETIME_SOFT        4
 #define SADB_EXT_ADDRESS_SRC          5
 #define SADB_EXT_ADDRESS_DST          6
 #define SADB_EXT_ADDRESS_PROXY        7
 #define SADB_EXT_KEY_AUTH             8
 #define SADB_EXT_KEY_ENCRYPT          9
 #define SADB_EXT_IDENTITY_SRC         10
 #define SADB_EXT_IDENTITY_DST         11
 #define SADB_EXT_SENSITIVITY          12
 #define SADB_EXT_PROPOSAL             13
 #define SADB_EXT_SUPPORTED_AUTH       14
 #define SADB_EXT_SUPPORTED_ENCRYPT    15
 #define SADB_EXT_SPIRANGE             16
 #define SADB_X_EXT_KMPRIVATE          17
 #define SADB_X_EXT_POLICY             18
 #define SADB_X_EXT_SA2                19
 #define SADB_X_EXT_NAT_T_TYPE         20
 #define SADB_X_EXT_NAT_T_SPORT        21
 #define SADB_X_EXT_NAT_T_DPORT        22
 #define SADB_X_EXT_NAT_T_OA           23	/* Deprecated. */
 #define SADB_X_EXT_NAT_T_OAI          23	/* Peer's NAT_OA for src of SA. */
 #define SADB_X_EXT_NAT_T_OAR          24	/* Peer's NAT_OA for dst of SA. */
 #define SADB_X_EXT_NAT_T_FRAG         25	/* Manual MTU override. */
 #define SADB_EXT_MAX                  25
 
 #define SADB_SATYPE_UNSPEC	0
 #define SADB_SATYPE_AH		2
 #define SADB_SATYPE_ESP		3
 #define SADB_SATYPE_RSVP	5
 #define SADB_SATYPE_OSPFV2	6
 #define SADB_SATYPE_RIPV2	7
 #define SADB_SATYPE_MIP		8
 #define SADB_X_SATYPE_IPCOMP	9
 /*#define SADB_X_SATYPE_POLICY	10	obsolete, do not reuse */
 #define SADB_X_SATYPE_TCPSIGNATURE	11
 #define SADB_SATYPE_MAX		12
 
 #define SADB_SASTATE_LARVAL   0
 #define SADB_SASTATE_MATURE   1
 #define SADB_SASTATE_DYING    2
 #define SADB_SASTATE_DEAD     3
 #define SADB_SASTATE_MAX      3
 
 #define SADB_SAFLAGS_PFS      1
 
 /*
  * Though some of these numbers (both _AALG and _EALG) appear to be
  * IKEv2 numbers and others original IKE numbers, they have no meaning.
  * These are constants that the various IKE daemons use to tell the kernel
  * what cipher to use.
  *
  * Do not use these constants directly to decide which Transformation ID
  * to send.  You are responsible for mapping them yourself.
  */
 #define SADB_AALG_NONE		0
 #define SADB_AALG_MD5HMAC	2
 #define SADB_AALG_SHA1HMAC	3
 #define SADB_AALG_MAX		252
 #define SADB_X_AALG_SHA2_256	5
 #define SADB_X_AALG_SHA2_384	6
 #define SADB_X_AALG_SHA2_512	7
 #define SADB_X_AALG_RIPEMD160HMAC	8
 #define SADB_X_AALG_AES_XCBC_MAC	9	/* RFC3566 */
 #define SADB_X_AALG_AES128GMAC	11		/* RFC4543 + Errata1821 */
 #define SADB_X_AALG_AES192GMAC	12
 #define SADB_X_AALG_AES256GMAC	13
 #define SADB_X_AALG_MD5		249	/* Keyed MD5 */
 #define SADB_X_AALG_SHA		250	/* Keyed SHA */
 #define SADB_X_AALG_NULL	251	/* null authentication */
 #define SADB_X_AALG_TCP_MD5	252	/* Keyed TCP-MD5 (RFC2385) */
 
 #define SADB_EALG_NONE		0
 #define SADB_EALG_DESCBC	2
 #define SADB_EALG_3DESCBC	3
 #define SADB_X_EALG_CAST128CBC	6
 #define SADB_X_EALG_BLOWFISHCBC	7
 #define SADB_EALG_NULL		11
 #define SADB_X_EALG_RIJNDAELCBC	12
 #define SADB_X_EALG_AES		12
 #define SADB_X_EALG_AESCTR	13
 #define SADB_X_EALG_AESGCM8	18	/* RFC4106 */
 #define SADB_X_EALG_AESGCM12	19
 #define SADB_X_EALG_AESGCM16	20
 #define SADB_X_EALG_CAMELLIACBC	22
 #define SADB_X_EALG_AESGMAC	23	/* RFC4543 + Errata1821 */
 #define SADB_EALG_MAX		23	/* !!! keep updated !!! */
 
 /* private allocations - based on RFC2407/IANA assignment */
 #define SADB_X_CALG_NONE	0
 #define SADB_X_CALG_OUI		1
 #define SADB_X_CALG_DEFLATE	2
 #define SADB_X_CALG_LZS		3
 #define SADB_X_CALG_MAX		4
 
 #define SADB_IDENTTYPE_RESERVED   0
 #define SADB_IDENTTYPE_PREFIX     1
 #define SADB_IDENTTYPE_FQDN       2
 #define SADB_IDENTTYPE_USERFQDN   3
 #define SADB_X_IDENTTYPE_ADDR     4
 #define SADB_IDENTTYPE_MAX        4
 
 /* `flags' in sadb_sa structure holds followings */
 #define SADB_X_EXT_NONE		0x0000	/* i.e. new format. */
 #define SADB_X_EXT_OLD		0x0001	/* old format. */
 
 #define SADB_X_EXT_IV4B		0x0010	/* IV length of 4 bytes in use */
 #define SADB_X_EXT_DERIV	0x0020	/* DES derived */
 #define SADB_X_EXT_CYCSEQ	0x0040	/* allowing to cyclic sequence. */
 
 	/* three of followings are exclusive flags each them */
 #define SADB_X_EXT_PSEQ		0x0000	/* sequencial padding for ESP */
 #define SADB_X_EXT_PRAND	0x0100	/* random padding for ESP */
 #define SADB_X_EXT_PZERO	0x0200	/* zero padding for ESP */
 #define SADB_X_EXT_PMASK	0x0300	/* mask for padding flag */
 
 #if 1
 #define SADB_X_EXT_RAWCPI	0x0080	/* use well known CPI (IPComp) */
 #endif
 
 #define SADB_KEY_FLAGS_MAX	0x0fff
 
 /* SPI size for PF_KEYv2 */
 #define PFKEY_SPI_SIZE	sizeof(u_int32_t)
 
 /* Identifier for menber of lifetime structure */
 #define SADB_X_LIFETIME_ALLOCATIONS	0
 #define SADB_X_LIFETIME_BYTES		1
 #define SADB_X_LIFETIME_ADDTIME		2
 #define SADB_X_LIFETIME_USETIME		3
 
 /* The rate for SOFT lifetime against HARD one. */
 #define PFKEY_SOFT_LIFETIME_RATE	80
 
 /* Utilities */
 #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
 #define	PFKEY_EXTLEN(msg) \
 	PFKEY_UNUNIT64(((struct sadb_ext *)(msg))->sadb_ext_len)
 #define PFKEY_ADDR_PREFIX(ext) \
 	(((struct sadb_address *)(ext))->sadb_address_prefixlen)
 #define PFKEY_ADDR_PROTO(ext) \
 	(((struct sadb_address *)(ext))->sadb_address_proto)
 #define PFKEY_ADDR_SADDR(ext) \
 	((struct sockaddr *)((caddr_t)(ext) + sizeof(struct sadb_address)))
 
 /* in 64bits */
 #define	PFKEY_UNUNIT64(a)	((a) << 3)
 #define	PFKEY_UNIT64(a)		((a) >> 3)
 
 #endif /* __PFKEY_V2_H */
 
 #endif /* _NET_PFKEYV2_H_ */
Index: projects/powernv/netinet6/ip6_forward.c
===================================================================
--- projects/powernv/netinet6/ip6_forward.c	(revision 290990)
+++ projects/powernv/netinet6/ip6_forward.c	(revision 290991)
@@ -1,627 +1,627 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 
 #include <netinet/in_pcb.h>
 
 #ifdef IPSEC
 #include <netinet6/ip6_ipsec.h>
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #endif /* IPSEC */
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  */
 void
 ip6_forward(struct mbuf *m, int srcrt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct sockaddr_in6 *dst = NULL;
 	struct rtentry *rt = NULL;
 	struct route_in6 rin6;
 	int error, type = 0, code = 0;
 	struct mbuf *mcopy = NULL;
 	struct ifnet *origifp;	/* maybe unnecessary */
 	u_int32_t inzone, outzone;
 	struct in6_addr src_in6, dst_in6, odst;
 #ifdef IPSEC
 	struct secpolicy *sp = NULL;
 #endif
 #ifdef SCTP
 	int sw_csum;
 #endif
 	struct m_tag *fwd_tag;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * Do not forward packets to multicast destination (should be handled
 	 * by ip6_mforward().
 	 * Do not forward packets with unspecified source.  It was discussed
 	 * in July 2000, on the ipngwg mailing list.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 ||
 	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
 		m_freem(m);
 		return;
 	}
 #ifdef IPSEC
 	/*
 	 * Check if this packet has an active SA and needs to be dropped
 	 * instead of forwarded.
 	 */
 	if (ip6_ipsec_fwd(m) != 0) {
 		IP6STAT_INC(ip6s_cantforward);
 		m_freem(m);
 		return;
 	}
 #endif /* IPSEC */
 
 #ifdef IPSTEALTH
 	if (!V_ip6stealth) {
 #endif
 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		icmp6_error(m, ICMP6_TIME_EXCEEDED,
 				ICMP6_TIME_EXCEED_TRANSIT, 0);
 		return;
 	}
 	ip6->ip6_hlim -= IPV6_HLIMDEC;
 
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU -
 	 * size of IPv6 + ICMPv6 headers) bytes of the packet in case
 	 * we need to generate an ICMP6 message to the src.
 	 * Thanks to M_EXT, in most cases copy will not occur.
 	 *
 	 * It is important to save it before IPsec processing as IPsec
 	 * processing may modify the mbuf.
 	 */
 	mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN));
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
 	sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error);
 	if (sp == NULL) {
 		IPSEC6STAT_INC(ips_out_inval);
 		IP6STAT_INC(ip6s_cantforward);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		IPSEC6STAT_INC(ips_out_polvio);
 		IP6STAT_INC(ip6s_cantforward);
 		KEY_FREESP(&sp);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		KEY_FREESP(&sp);
 		goto skip_ipsec;
 
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* XXX should be panic ? */
 			printf("ip6_forward: No IPsec request specified.\n");
 			IP6STAT_INC(ip6s_cantforward);
 			KEY_FREESP(&sp);
 			if (mcopy) {
 #if 0
 				/* XXX: what icmp ? */
 #else
 				m_freem(mcopy);
 #endif
 			}
 			m_freem(m);
 			return;
 		}
 		/* do IPsec */
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		/* should be panic ?? */
 		printf("ip6_forward: Invalid policy found. %d\n", sp->policy);
 		KEY_FREESP(&sp);
 		goto skip_ipsec;
 	}
 
     {
 	struct ipsecrequest *isr = NULL;
 
 	/*
 	 * when the kernel forwards a packet, it is not proper to apply
 	 * IPsec transport mode to the packet. This check avoid from this.
 	 * at present, if there is even a transport mode SA request in the
 	 * security policy, the kernel does not apply IPsec to the packet.
 	 * this check is not enough because the following case is valid.
 	 *      ipsec esp/tunnel/xxx-xxx/require esp/transport//require;
 	 */
 	for (isr = sp->req; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_ANY)
 			goto doipsectunnel;
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 			goto doipsectunnel;
 	}
 
 	/*
 	 * if there's no need for tunnel mode IPsec, skip.
 	 */
 	if (!isr)
 		goto skip_ipsec;
 
     doipsectunnel:
 	/*
 	 * All the extension headers will become inaccessible
 	 * (since they can be encrypted).
 	 * Don't panic, we need no more updates to extension headers
 	 * on inner IPv6 packet (since they are now encapsulated).
 	 *
 	 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 	 */
 
 	/*
 	 * If we need to encapsulate the packet, do it here
 	 * ipsec6_proces_packet will send the packet using ip6_output
 	 */
 	error = ipsec6_process_packet(m, sp->req);
 	/* Release SP if an error occured */
 	if (error != 0)
 		KEY_FREESP(&sp);
 	if (error == EJUSTRETURN) {
 		/*
 		 * We had a SP with a level of 'use' and no SA. We
 		 * will just continue to process the packet without
 		 * IPsec processing.
 		 */
 		error = 0;
 		goto skip_ipsec;
 	}
 
 	if (error) {
 		/* mbuf is already reclaimed in ipsec6_process_packet. */
 		switch (error) {
 		case EHOSTUNREACH:
 		case ENETUNREACH:
 		case EMSGSIZE:
 		case ENOBUFS:
 		case ENOMEM:
 			break;
 		default:
 			printf("ip6_output (ipsec): error code %d\n", error);
 			/* FALLTHROUGH */
 		case ENOENT:
 			/* don't show these error codes to the user */
 			break;
 		}
 		IP6STAT_INC(ip6s_cantforward);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		return;
 	} else {
 		/*
 		 * In the FAST IPSec case we have already
 		 * re-injected the packet and it has been freed
 		 * by the ipsec_done() function.  So, just clean
 		 * up after ourselves.
 		 */
 		m = NULL;
 		goto freecopy;
 	}
     }
 skip_ipsec:
 #endif
 again:
 	bzero(&rin6, sizeof(struct route_in6));
 	dst = (struct sockaddr_in6 *)&rin6.ro_dst;
 	dst->sin6_len = sizeof(struct sockaddr_in6);
 	dst->sin6_family = AF_INET6;
 	dst->sin6_addr = ip6->ip6_dst;
 again2:
 	rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
 	if (rin6.ro_rt != NULL)
 		RT_UNLOCK(rin6.ro_rt);
 	else {
 		IP6STAT_INC(ip6s_noroute);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
 		if (mcopy) {
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 			ICMP6_DST_UNREACH_NOROUTE, 0);
 		}
 		goto bad;
 	}
 	rt = rin6.ro_rt;
 
 	/*
 	 * Source scope check: if a packet can't be delivered to its
 	 * destination for the reason that the destination is beyond the scope
 	 * of the source address, discard the packet and return an icmp6
 	 * destination unreachable error with Code 2 (beyond scope of source
 	 * address).  We use a local copy of ip6_src, since in6_setscope()
 	 * will possibly modify its first argument.
 	 * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1]
 	 */
 	src_in6 = ip6->ip6_src;
 	if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) {
 		/* XXX: this should not happen */
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (inzone != outzone) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
 
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp));
 		}
 		if (mcopy)
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
 		goto bad;
 	}
 
 	/*
 	 * Destination scope check: if a packet is going to break the scope
 	 * zone of packet's destination address, discard it.  This case should
 	 * usually be prevented by appropriately-configured routing table, but
 	 * we need an explicit check because we may mistakenly forward the
 	 * packet to a different zone by (e.g.) a default route.
 	 */
 	dst_in6 = ip6->ip6_dst;
 	if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 ||
 	    in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 ||
 	    inzone != outzone) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		dst = (struct sockaddr_in6 *)rt->rt_gateway;
 
 	/*
 	 * If we are to forward the packet using the same interface
 	 * as one we got the packet from, perhaps we should send a redirect
 	 * to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a route
 	 * modified by a redirect.
 	 */
 	if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
 		if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
 			/*
 			 * If the incoming interface is equal to the outgoing
 			 * one, and the link attached to the interface is
 			 * point-to-point, then it will be highly probable
 			 * that a routing loop occurs. Thus, we immediately
 			 * drop the packet and send an ICMPv6 error message.
 			 *
 			 * type/code is based on suggestion by Rich Draves.
 			 * not sure if it is the best pick.
 			 */
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0);
 			goto bad;
 		}
 		type = ND_REDIRECT;
 	}
 
 	/*
 	 * Fake scoped addresses. Note that even link-local source or
 	 * destinaion can appear, if the originating node just sends the
 	 * packet to us (without address resolution for the destination).
 	 * Since both icmp6_error and icmp6_redirect_output fill the embedded
 	 * link identifiers, we can do this stuff after making a copy for
 	 * returning an error.
 	 */
 	if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
 		/*
 		 * See corresponding comments in ip6_output.
 		 * XXX: but is it possible that ip6_forward() sends a packet
 		 *      to a loopback interface? I don't think so, and thus
 		 *      I bark here. (jinmei@kame.net)
 		 * XXX: it is common to route invalid packets to loopback.
 		 *	also, the codepath will be visited on use of ::1 in
 		 *	rthdr. (itojun)
 		 */
 #if 1
 		if (0)
 #else
 		if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0)
 #endif
 		{
 			printf("ip6_forward: outgoing interface is loopback. "
 			       "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			       ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			       ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			       ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif),
 			       if_name(rt->rt_ifp));
 		}
 
 		/* we can just use rcvif in forwarding. */
 		origifp = m->m_pkthdr.rcvif;
 	}
 	else
 		origifp = rt->rt_ifp;
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto pass;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL);
 	if (error != 0 || m == NULL)
 		goto freecopy;		/* consumed by filter */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localip(&ip6->ip6_dst))
 			m->m_flags |= M_FASTFWD_OURS;
 		else
 			goto again;	/* Redo the routing table lookup. */
 	}
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto out;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&rin6.ro_dst;
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		goto again2;
 	}
 
 pass:
 	/* See if the size was changed by the packet filter. */
 	if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
 		if (mcopy) {
 			u_long mtu;
 #ifdef IPSEC
 			size_t ipsechdrsiz;
 #endif /* IPSEC */
 
 			mtu = IN6_LINKMTU(rt->rt_ifp);
 #ifdef IPSEC
 			/*
 			 * When we do IPsec tunnel ingress, we need to play
 			 * with the link value (decrement IPsec header size
 			 * from mtu value).  The code is much simpler than v4
 			 * case, as we have the outgoing interface for
 			 * encapsulated packet as "rt->rt_ifp".
 			 */
 			ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND,
 			    NULL);
 			if (ipsechdrsiz < mtu)
 				mtu -= ipsechdrsiz;
 			/*
 			 * if mtu becomes less than minimum MTU,
 			 * tell minimum MTU (and I'll need to fragment it).
 			 */
 			if (mtu < IPV6_MMTU)
 				mtu = IPV6_MMTU;
 #endif /* IPSEC */
 			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
 		}
 		goto bad;
 	}
 
-	error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst);
+	error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL);
 	if (error) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
 		IP6STAT_INC(ip6s_cantforward);
 	} else {
 		IP6STAT_INC(ip6s_forward);
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward);
 		if (type)
 			IP6STAT_INC(ip6s_redirectsent);
 		else {
 			if (mcopy)
 				goto freecopy;
 		}
 	}
 
 	if (mcopy == NULL)
 		goto out;
 	switch (error) {
 	case 0:
 		if (type == ND_REDIRECT) {
 			icmp6_redirect_output(mcopy, rt);
 			goto out;
 		}
 		goto freecopy;
 
 	case EMSGSIZE:
 		/* xxx MTU is constant in PPP? */
 		goto freecopy;
 
 	case ENOBUFS:
 		/* Tell source to slow down like source quench in IP? */
 		goto freecopy;
 
 	case ENETUNREACH:	/* shouldn't happen, checked above */
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP6_DST_UNREACH;
 		code = ICMP6_DST_UNREACH_ADDR;
 		break;
 	}
 	icmp6_error(mcopy, type, code, 0);
 	goto out;
 
  freecopy:
 	m_freem(mcopy);
 	goto out;
 bad:
 	m_freem(m);
 out:
 	if (rt != NULL)
 		RTFREE(rt);
 }
Index: projects/powernv/netinet6/ip6_output.c
===================================================================
--- projects/powernv/netinet6/ip6_output.c	(revision 290990)
+++ projects/powernv/netinet6/ip6_output.c	(revision 290991)
@@ -1,2997 +1,2997 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_sctp.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <machine/in_cksum.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_rss.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #include <netinet6/ip6_ipsec.h>
 #endif /* IPSEC */
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef FLOWTABLE
 #include <net/flowtable.h>
 #endif
 
 extern int in6_mcast_loop;
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
 			   struct ucred *, int);
 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
 	struct socket *, struct sockopt *);
 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
 	struct ucred *, int, int, int);
 
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
 	struct ip6_frag **);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
 	struct ifnet *, struct in6_addr *, u_long *, int *, u_int);
 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
 
 
 /*
  * Make an extension header from option data.  hp is the source, and
  * mp is the destination.
  */
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
 		    ((eh)->ip6e_len + 1) << 3);				\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (/*CONSTCOND*/ 0)
 
 /*
  * Form a chain of extension headers.
  * m is the extension header mbuf
  * mp is the previous mbuf in the chain
  * p is the next header
  * i is the type of option.
  */
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (/*CONSTCOND*/ 0)
 
 void
 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
 {
 	u_short csum;
 
 	csum = in_cksum_skip(m, offset + plen, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
 		    "csum_flags=%b\n", __func__, m->m_len, plen, offset,
 		    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 		/*
 		 * XXX this should not happen, but if it does, the correct
 		 * behavior may be to insert the checksum in the appropriate
 		 * next mbuf in the chain.
 		 */
 		return;
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 int
 ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
     int mtu, uint32_t id)
 {
 	struct mbuf *m, **mnext, *m_frgpart;
 	struct ip6_hdr *ip6, *mhip6;
 	struct ip6_frag *ip6f;
 	int off;
 	int error;
 	int tlen = m0->m_pkthdr.len;
 
 	m = m0;
 	ip6 = mtod(m, struct ip6_hdr *);
 	mnext = &m->m_nextpkt;
 
 	for (off = hlen; off < tlen; off += mtu) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (!m) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m->m_flags = m0->m_flags & M_COPYFLAGS;
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 		m->m_data += max_linkhdr;
 		mhip6 = mtod(m, struct ip6_hdr *);
 		*mhip6 = *ip6;
 		m->m_len = sizeof(*mhip6);
 		error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
 		if (error) {
 			IP6STAT_INC(ip6s_odropped);
 			return (error);
 		}
 		ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 		if (off + mtu >= tlen)
 			mtu = tlen - off;
 		else
 			ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 		mhip6->ip6_plen = htons((u_short)(mtu + hlen +
 		    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
 		if ((m_frgpart = m_copy(m0, off, mtu)) == 0) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m_cat(m, m_frgpart);
 		m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
 		m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
 		m->m_pkthdr.rcvif = NULL;
 		ip6f->ip6f_reserved = 0;
 		ip6f->ip6f_ident = id;
 		ip6f->ip6f_nxt = nextproto;
 		IP6STAT_INC(ip6s_ofragments);
 		in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 	}
 
 	return (0);
 }
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route_in6 ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_mtu.
  *
  * ifpp - XXX: just for statistics
  */
 /*
  * XXX TODO: no flowid is assigned for outbound flows?
  */
 int
 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct inpcb *inp)
 {
 	struct ip6_hdr *ip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	struct mbuf *mprev = NULL;
 	int hlen, tlen, len;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *dst, src_sa, dst_sa;
 	struct in6_addr odst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	int alwaysfrag, dontfrag;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr finaldst, src0, dst0;
 	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int sw_csum, tso;
 	int needfiblookup;
 	uint32_t fibnum;
 	struct m_tag *fwd_tag = NULL;
 	uint32_t id;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (ip6 == NULL) {
 		printf ("ip6 is NULL");
 		goto bad;
 	}
 
 	if (inp != NULL) {
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			/* unconditionally set flowid */
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	finaldst = ip6->ip6_dst;
 	bzero(&exthdrs, sizeof(exthdrs));
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		if (opt->ip6po_rthdr) {
 			/*
 			 * Destination options header(1st part)
 			 * This only makes sense with a routing header.
 			 * See Section 9.2 of RFC 3542.
 			 * Disabling this part just for MIP6 convenience is
 			 * a bad idea.  We need to think carefully about a
 			 * way to make the advanced API coexist with MIP6
 			 * options, which might automatically be inserted in
 			 * the kernel.
 			 */
 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		}
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/*
 	 * IPSec checking which handles several cases.
 	 * FAST IPSEC: We re-injected the packet.
 	 * XXX: need scope argument.
 	 */
 	switch(ip6_ipsec_output(&m, inp, &error))
 	{
 	case 1:                 /* Bad packet */
 		goto freehdrs;
 	case -1:                /* IPSec done */
 		goto done;
 	case 0:                 /* No IPSec */
 	default:
 		break;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh)
 		optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1)
 		optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr)
 		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 
 	/* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
 	if (exthdrs.ip6e_dest2)
 		optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if (optlen && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	u_char *nexthdrp = &ip6->ip6_nxt;
 	mprev = m;
 
 	/*
 	 * we treat dest2 specially.  this makes IPsec processing
 	 * much easier.  the goal here is to make mprev point the
 	 * mbuf prior to dest2.
 	 *
 	 * result: IPv6 dest2 payload
 	 * m and mprev will point to IPv6 header.
 	 */
 	if (exthdrs.ip6e_dest2) {
 		if (!hdrsplit)
 			panic("assumption failed: hdr not split");
 		exthdrs.ip6e_dest2->m_next = m->m_next;
 		m->m_next = exthdrs.ip6e_dest2;
 		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 		ip6->ip6_nxt = IPPROTO_DSTOPTS;
 	}
 
 	/*
 	 * result: IPv6 hbh dest1 rthdr dest2 payload
 	 * m will point to IPv6 header.  mprev will point to the
 	 * extension header prior to dest2 (rthdr in the above case).
 	 */
 	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
 		   IPPROTO_DSTOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		   IPPROTO_ROUTING);
 
 	/*
 	 * If there is a routing header, discard the packet.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		 error = EINVAL;
 		 goto bad;
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_UNSPECSRC) == 0) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	IP6STAT_INC(ip6s_localout);
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == 0) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	}
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
 #endif
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 again:
 	/*
 	 * if specified, try to fill in the traffic class field.
 	 * do not override if a non-zero value is already set.
 	 * we check the diffserv field and the ecn field separately.
 	 */
 	if (opt && opt->ip6po_tclass >= 0) {
 		int mask = 0;
 
 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
 			mask |= 0xfc;
 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
 			mask |= 0x03;
 		if (mask != 0)
 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
 	}
 
 	/* fill in or override the hop limit field, if necessary. */
 	if (opt && opt->ip6po_hlim != -1)
 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (im6o != NULL)
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 		else
 			ip6->ip6_hlim = V_ip6_defmcasthlim;
 	}
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (ro->ro_rt && fwd_tag == NULL) {
 		rt = ro->ro_rt;
 		ifp = ro->ro_rt->rt_ifp;
 	} else {
 		if (fwd_tag == NULL) {
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
 			dst_sa.sin6_len = sizeof(dst_sa);
 			dst_sa.sin6_addr = ip6->ip6_dst;
 		}
 		error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
 		    &rt, fibnum);
 		if (error != 0) {
 			if (ifp != NULL)
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 			goto bad;
 		}
 	}
 	if (rt == NULL) {
 		/*
 		 * If in6_selectroute() does not return a route entry,
 		 * dst may not have been updated.
 		 */
 		*dst = dst_sa;	/* XXX */
 	}
 
 	/*
 	 * then rt (for unicast) and ifp must be non-NULL valid values.
 	 */
 	if ((flags & IPV6_FORWARDING) == 0) {
 		/* XXX: the FORWARDING flag can be set for mrouting. */
 		in6_ifstat_inc(ifp, ifs6_out_request);
 	}
 	if (rt != NULL) {
 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
 		counter_u64_add(rt->rt_pksent, 1);
 	}
 
 
 	/*
 	 * The outgoing interface must be in the zone of source and
 	 * destination addresses.
 	 */
 	origifp = ifp;
 
 	src0 = ip6->ip6_src;
 	if (in6_setscope(&src0, origifp, &zone))
 		goto badscope;
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
 		goto badscope;
 
 	dst0 = ip6->ip6_dst;
 	if (in6_setscope(&dst0, origifp, &zone))
 		goto badscope;
 	/* re-initialize to be sure */
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = ip6->ip6_dst;
 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
 		goto badscope;
 	}
 
 	/* We should use ia_ifp to support the case of
 	 * sending packets to an address of our own.
 	 */
 	if (ia != NULL && ia->ia_ifp)
 		ifp = ia->ia_ifp;
 
 	/* scope check is done. */
 	goto routefound;
 
   badscope:
 	IP6STAT_INC(ip6s_badscope);
 	in6_ifstat_inc(origifp, ifs6_out_discard);
 	if (error == 0)
 		error = EHOSTUNREACH; /* XXX */
 	goto bad;
 
   routefound:
 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (opt && opt->ip6po_nextroute.ro_rt) {
 			/*
 			 * The nexthop is explicitly specified by the
 			 * application.  We assume the next hop is an IPv6
 			 * address.
 			 */
 			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
 		}
 		else if ((rt->rt_flags & RTF_GATEWAY))
 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
 	} else {
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if (!(ifp->if_flags & IFF_MULTICAST)) {
 			IP6STAT_INC(ip6s_noroute);
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		if ((im6o == NULL && in6_mcast_loop) ||
 		    (im6o && im6o->im6o_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we have not joined
 			 * the address; protocols will filter it later,
 			 * thus deferring a hash lookup and lock acquisition
 			 * at the expense of an m_copym().
 			 */
 			ip6_mloopback(ifp, m);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				/*
 				 * XXX: ip6_mforward expects that rcvif is NULL
 				 * when it is called from the originating path.
 				 * However, it may not always be the case.
 				 */
 				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/* Determine path MTU. */
 	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
 	    &alwaysfrag, fibnum)) != 0)
 		goto bad;
 
 	/*
 	 * The caller of this function may specify to use the minimum MTU
 	 * in some cases.
 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
 	 * setting.  The logic is a bit complicated; by default, unicast
 	 * packets will follow path MTU while multicast packets will be sent at
 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
 	 * including unicast ones will be sent at the minimum MTU.  Multicast
 	 * packets will always be sent at the minimum MTU unless
 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
 	 * See RFC 3542 for more details.
 	 */
 	if (mtu > IPV6_MMTU) {
 		if ((flags & IPV6_MINMTU))
 			mtu = IPV6_MMTU;
 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
 			mtu = IPV6_MMTU;
 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			 (opt == NULL ||
 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
 			mtu = IPV6_MMTU;
 		}
 	}
 
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy; /* XXX unused */
 		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not contiguous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       contiguous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
 		    &dummy, &plen) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto passout;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	needfiblookup = 0;
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localip(&ip6->ip6_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			error = netisr_queue(NETISR_IPV6, m);
 			goto done;
 		} else
 			needfiblookup = 1; /* Redo the routing table lookup. */
 	}
 	/* See if fib was changed by packet filter. */
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
 		RO_RTFREE(ro);
 		needfiblookup = 1;
 	}
 	if (needfiblookup)
 		goto again;
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&ro->ro_dst;
 		bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		goto again;
 	}
 
 passout:
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 *
 	 * the logic here is rather complex:
 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
 	 * 1-a:	send as is if tlen <= path mtu
 	 * 1-b:	fragment if tlen > path mtu
 	 *
 	 * 2: if user asks us not to fragment (dontfrag == 1)
 	 * 2-a:	send as is if tlen <= interface mtu
 	 * 2-b:	error if tlen > interface mtu
 	 *
 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
 	 *	always fragment
 	 *
 	 * 4: if dontfrag == 1 && alwaysfrag == 1
 	 *	error, as we cannot handle this conflicting request
 	 */
 	sw_csum = m->m_pkthdr.csum_flags;
 	if (!hdrsplit) {
 		tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
 		sw_csum &= ~ifp->if_hwassist;
 	} else
 		tso = 0;
 	/*
 	 * If we added extension headers, we will not do TSO and calculate the
 	 * checksums ourselves for now.
 	 * XXX-BZ  Need a framework to know when the NIC can handle it, even
 	 * with ext. hdrs.
 	 */
 	if (sw_csum & CSUM_DELAY_DATA_IPV6) {
 		sw_csum &= ~CSUM_DELAY_DATA_IPV6;
 		in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
 	}
 #ifdef SCTP
 	if (sw_csum & CSUM_SCTP_IPV6) {
 		sw_csum &= ~CSUM_SCTP_IPV6;
 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
 	}
 #endif
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 	tlen = m->m_pkthdr.len;
 
 	if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
 		dontfrag = 1;
 	else
 		dontfrag = 0;
 	if (dontfrag && alwaysfrag) {	/* case 4 */
 		/* conflicting request - can't transmit */
 		error = EMSGSIZE;
 		goto bad;
 	}
 	if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) {	/* case 2-b */
 		/*
 		 * Even if the DONTFRAG option is specified, we cannot send the
 		 * packet when the data length is larger than the MTU of the
 		 * outgoing interface.
 		 * Notify the error by sending IPV6_PATHMTU ancillary data if
 		 * application wanted to know the MTU value. Also return an
 		 * error code (this is not described in the API spec).
 		 */
 		if (inp != NULL)
 			ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/*
 	 * transmit packet without fragmentation
 	 */
 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
 		struct in6_ifaddr *ia6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
 		if (ia6) {
 			/* Record statistics for this interface address. */
 			counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
 			counter_u64_add(ia6->ia_ifa.ifa_obytes,
 			    m->m_pkthdr.len);
 			ifa_free(&ia6->ia_ifa);
 		}
-		error = nd6_output_ifp(ifp, origifp, m, dst);
+		error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
 		goto done;
 	}
 
 	/*
 	 * try to fragment the packet.  case 1-b and 3
 	 */
 	if (mtu < IPV6_MMTU) {
 		/* path MTU cannot be less than IPV6_MMTU */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) {
 		/* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		u_char nextproto;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		/*
 		 * If the interface will not calculate checksums on
 		 * fragmented packets, then do it here.
 		 * XXX-BZ handle the hw offloading case.  Need flags.
 		 */
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			in6_delayed_cksum(m, plen, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
 			sctp_delayed_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
 		}
 #endif
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		id = htonl(ip6_randomid());
 		if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
 			goto sendorfree;
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
-			error = nd6_output_ifp(ifp, origifp, m, dst);
+			error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IP6STAT_INC(ip6s_fragmented);
 
 done:
 	if (ro == &ip6route)
 		RO_RTFREE(ro);
 	if (ro_pmtu == &ip6route)
 		RO_RTFREE(ro_pmtu);
 	return (error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* FALLTHROUGH */
 bad:
 	if (m)
 		m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return (ENOBUFS); /* XXX */
 
 	if (hlen > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return (0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == 0) {
 		mopt = m_get(M_NOWAIT, MT_DATA);
 		if (mopt == NULL)
 			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return (ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			n = m_getcl(M_NOWAIT, MT_DATA, 0);
 			if (n == NULL)
 				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			    oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return (0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
     struct ip6_frag **frghdrp)
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 		    hlen - sizeof(struct ip6_hdr), M_NOWAIT);
 		if (n == 0)
 			return (ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if (M_WRITABLE(mlast) &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
 		    mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		mfrg = m_get(M_NOWAIT, MT_DATA);
 		if (mfrg == NULL)
 			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return (0);
 }
 
 static int
 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
     int *alwaysfragp, u_int fibnum)
 {
 	u_int32_t mtu = 0;
 	int alwaysfrag = 0;
 	int error = 0;
 
 	if (ro_pmtu != ro) {
 		/* The first hop and the final destination may differ. */
 		struct sockaddr_in6 *sa6_dst =
 		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (ro_pmtu->ro_rt &&
 		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
 		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
 			RTFREE(ro_pmtu->ro_rt);
 			ro_pmtu->ro_rt = (struct rtentry *)NULL;
 		}
 		if (ro_pmtu->ro_rt == NULL) {
 			bzero(sa6_dst, sizeof(*sa6_dst));
 			sa6_dst->sin6_family = AF_INET6;
 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
 			sa6_dst->sin6_addr = *dst;
 
 			in6_rtalloc(ro_pmtu, fibnum);
 		}
 	}
 	if (ro_pmtu->ro_rt) {
 		u_int32_t ifmtu;
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc6_faddr = *dst;
 
 		if (ifp == NULL)
 			ifp = ro_pmtu->ro_rt->rt_ifp;
 		ifmtu = IN6_LINKMTU(ifp);
 		mtu = tcp_hc_getmtu(&inc);
 		if (mtu)
 			mtu = min(mtu, ro_pmtu->ro_rt->rt_mtu);
 		else
 			mtu = ro_pmtu->ro_rt->rt_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
 			/*
 			 * RFC2460 section 5, last paragraph:
 			 * if we record ICMPv6 too big message with
 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
 			 * or smaller, with framgent header attached.
 			 * (fragment header is needed regardless from the
 			 * packet size, for translators to identify packets)
 			 */
 			alwaysfrag = 1;
 			mtu = IPV6_MMTU;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
 	} else
 		error = EHOSTUNREACH; /* XXX */
 
 	*mtup = mtu;
 	if (alwaysfragp)
 		*alwaysfragp = alwaysfrag;
 	return (error);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int optdatalen, uproto;
 	void *optdata;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 	td = sopt->sopt_td;
 	error = 0;
 	optval = 0;
 	uproto = (int)so->so_proto->pr_protocol;
 
 	if (level != IPPROTO_IPV6) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					in6p->inp_flags2 |= INP_REUSEADDR;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					in6p->inp_flags2 |= INP_REUSEPORT;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(in6p);
 				in6p->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 	} else {		/* level == IPPROTO_IPV6 */
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 				if (td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_SETHDROPTS);
 					if (error)
 						break;
 				}
 				/* FALLTHROUGH */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_HOPLIMIT:
 
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 			case IPV6_RECVTCLASS:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RECVRSSBUCKETID:
 #endif
 			case IPV6_V6ONLY:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_BINDMULTI:
 #ifdef	RSS
 			case IPV6_RSS_LISTEN_BUCKET:
 #endif
 				if (optname == IPV6_BINDANY && td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_BINDANY);
 					if (error)
 						break;
 				}
 
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 						if ((in6p->inp_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTSET2292(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	in6p->inp_flags |= IN6P_RFC2292; \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
 
 #define OPTSET2(bit, val) do {						\
 	INP_WLOCK(in6p);						\
 	if (val)							\
 		in6p->inp_flags2 |= bit;				\
 	else								\
 		in6p->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(in6p);						\
 } while (0)
 #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
 
 				case IPV6_RECVPKTINFO:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_PKTINFO);
 					break;
 
 				case IPV6_HOPLIMIT:
 				{
 					struct ip6_pktopts **optp;
 
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(IPV6_HOPLIMIT,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 				case IPV6_RECVHOPLIMIT:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVHOPOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDR:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					/*
 					 * We ignore this option for TCP
 					 * sockets.
 					 * (RFC3542 leaves this case
 					 * unspecified.)
 					 */
 					if (uproto != IPPROTO_TCP)
 						OPTSET(IN6P_MTU);
 					break;
 
 				case IPV6_RECVFLOWID:
 					OPTSET2(INP_RECVFLOWID, optval);
 					break;
 
 #ifdef	RSS
 				case IPV6_RECVRSSBUCKETID:
 					OPTSET2(INP_RECVRSSBUCKETID, optval);
 					break;
 #endif
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->inp_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->inp_vflag &= ~INP_IPV4;
 					else
 						in6p->inp_vflag |= INP_IPV4;
 					break;
 				case IPV6_RECVTCLASS:
 					/* cannot mix with RFC2292 XXX */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_TCLASS);
 					break;
 				case IPV6_AUTOFLOWLABEL:
 					OPTSET(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					OPTSET(INP_BINDANY);
 					break;
 
 				case IPV6_BINDMULTI:
 					OPTSET2(INP_BINDMULTI, optval);
 					break;
 #ifdef	RSS
 				case IPV6_RSS_LISTEN_BUCKET:
 					if ((optval >= 0) &&
 					    (optval < rss_getnumbuckets())) {
 						in6p->inp_rss_listen_bucket = optval;
 						OPTSET2(INP_RSS_BUCKET_SET, 1);
 					} else {
 						error = EINVAL;
 					}
 					break;
 #endif
 				}
 				break;
 
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				if (optlen != sizeof(optval)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				{
 					struct ip6_pktopts **optp;
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(optname,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292DSTOPTS:
 			case IPV6_2292RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					OPTSET2292(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					OPTSET2292(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292DSTOPTS:
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_2292RTHDR:
 					OPTSET2292(IN6P_RTHDR);
 					break;
 				}
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			{
 				/* new advanced API (RFC3542) */
 				u_char *optbuf;
 				u_char optbuf_storage[MCLBYTES];
 				int optlen;
 				struct ip6_pktopts **optp;
 
 				/* cannot mix with RFC2292 */
 				if (OPTBIT(IN6P_RFC2292)) {
 					error = EINVAL;
 					break;
 				}
 
 				/*
 				 * We only ensure valsize is not too large
 				 * here.  Further validation will be done
 				 * later.
 				 */
 				error = sooptcopyin(sopt, optbuf_storage,
 				    sizeof(optbuf_storage), 0);
 				if (error)
 					break;
 				optlen = sopt->sopt_valsize;
 				optbuf = optbuf_storage;
 				optp = &in6p->in6p_outputopts;
 				error = ip6_pcbopt(optname, optbuf, optlen,
 				    optp, (td != NULL) ? td->td_ucred : NULL,
 				    uproto);
 				break;
 			}
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			case IPV6_MSFILTER:
 			case MCAST_BLOCK_SOURCE:
 			case MCAST_UNBLOCK_SOURCE:
 			case MCAST_JOIN_GROUP:
 			case MCAST_LEAVE_GROUP:
 			case MCAST_JOIN_SOURCE_GROUP:
 			case MCAST_LEAVE_SOURCE_GROUP:
 				error = ip6_setmoptions(in6p, sopt);
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				INP_WLOCK(in6p);
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags |= INP_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					in6p->inp_flags |= INP_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				INP_WUNLOCK(in6p);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			{
 				caddr_t req;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				req = mtod(m, caddr_t);
 				error = ipsec_set_policy(in6p, optname, req,
 				    m->m_len, (sopt->sopt_td != NULL) ?
 				    sopt->sopt_td->td_ucred : NULL);
 				m_freem(m);
 				break;
 			}
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 				/*
 				 * RFC3542 (effectively) deprecated the
 				 * semantics of the 2292-style pktoptions.
 				 * Since it was not reliable in nature (i.e.,
 				 * applications had to expect the lack of some
 				 * information after all), it would make sense
 				 * to simplify this part by always returning
 				 * empty data.
 				 */
 				sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 			case IPV6_UNICAST_HOPS:
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 			case IPV6_RECVTCLASS:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_FLOWID:
 			case IPV6_FLOWTYPE:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RSSBUCKETID:
 			case IPV6_RECVRSSBUCKETID:
 #endif
 				switch (optname) {
 
 				case IPV6_RECVHOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_RECVPKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 
 				case IPV6_RECVHOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVRTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					optval = OPTBIT(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->inp_flags;
 					if (flags & INP_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & INP_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				case IPV6_RECVTCLASS:
 					optval = OPTBIT(IN6P_TCLASS);
 					break;
 
 				case IPV6_AUTOFLOWLABEL:
 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					optval = OPTBIT(INP_BINDANY);
 					break;
 
 				case IPV6_FLOWID:
 					optval = in6p->inp_flowid;
 					break;
 
 				case IPV6_FLOWTYPE:
 					optval = in6p->inp_flowtype;
 					break;
 
 				case IPV6_RECVFLOWID:
 					optval = OPTBIT2(INP_RECVFLOWID);
 					break;
 #ifdef	RSS
 				case IPV6_RSSBUCKETID:
 					retval =
 					    rss_hash2bucket(in6p->inp_flowid,
 					    in6p->inp_flowtype,
 					    &rss_bucket);
 					if (retval == 0)
 						optval = rss_bucket;
 					else
 						error = EINVAL;
 					break;
 
 				case IPV6_RECVRSSBUCKETID:
 					optval = OPTBIT2(INP_RECVRSSBUCKETID);
 					break;
 #endif
 
 				case IPV6_BINDMULTI:
 					optval = OPTBIT2(INP_BINDMULTI);
 					break;
 
 				}
 				if (error)
 					break;
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PATHMTU:
 			{
 				u_long pmtu = 0;
 				struct ip6_mtuinfo mtuinfo;
 				struct route_in6 sro;
 
 				bzero(&sro, sizeof(sro));
 
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
 				/*
 				 * XXX: we dot not consider the case of source
 				 * routing, or optional information to specify
 				 * the outgoing interface.
 				 */
 				error = ip6_getpmtu(&sro, NULL, NULL,
 				    &in6p->in6p_faddr, &pmtu, NULL,
 				    so->so_fibnum);
 				if (sro.ro_rt)
 					RTFREE(sro.ro_rt);
 				if (error)
 					break;
 				if (pmtu > IPV6_MAXPACKET)
 					pmtu = IPV6_MAXPACKET;
 
 				bzero(&mtuinfo, sizeof(mtuinfo));
 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
 				optdata = (void *)&mtuinfo;
 				optdatalen = sizeof(mtuinfo);
 				error = sooptcopyout(sopt, optdata,
 				    optdatalen);
 				break;
 			}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292RTHDR:
 			case IPV6_2292DSTOPTS:
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_2292DSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 				    sizeof optval);
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				error = ip6_getpcbopt(in6p->in6p_outputopts,
 				    optname, sopt);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_MSFILTER:
 				error = ip6_getmoptions(in6p, sopt);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 				size_t ovalsize = sopt->sopt_valsize;
 				caddr_t oval = (caddr_t)sopt->sopt_val;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				sopt->sopt_valsize = ovalsize;
 				sopt->sopt_val = oval;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	}
 	return (error);
 }
 
 int
 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0, optval, optlen;
 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
 	struct inpcb *in6p = sotoinpcb(so);
 	int level, op, optname;
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 
 	if (level != IPPROTO_IPV6) {
 		return (EINVAL);
 	}
 
 	switch (optname) {
 	case IPV6_CHECKSUM:
 		/*
 		 * For ICMPv6 sockets, no modification allowed for checksum
 		 * offset, permit "no change" values to help existing apps.
 		 *
 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
 		 * for an ICMPv6 socket will fail."
 		 * The current behavior does not meet RFC3542.
 		 */
 		switch (op) {
 		case SOPT_SET:
 			if (optlen != sizeof(int)) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 			if ((optval % 2) != 0) {
 				/* the API assumes even offset values */
 				error = EINVAL;
 			} else if (so->so_proto->pr_protocol ==
 			    IPPROTO_ICMPV6) {
 				if (optval != icmp6off)
 					error = EINVAL;
 			} else
 				in6p->in6p_cksum = optval;
 			break;
 
 		case SOPT_GET:
 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 				optval = icmp6off;
 			else
 				optval = in6p->in6p_cksum;
 
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	default:
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
     struct socket *so, struct sockopt *sopt)
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return (0);
 	}
 
 	/*  set options specified by user. */
 	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
 	    td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return (error);
 	}
 	*pktopt = opt;
 	return (0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 ip6_initpktopts(struct ip6_pktopts *opt)
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
 }
 
 static int
 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
     struct ucred *cred, int uproto)
 {
 	struct ip6_pktopts *opt;
 
 	if (*pktopt == NULL) {
 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
 		    M_WAITOK);
 		ip6_initpktopts(*pktopt);
 	}
 	opt = *pktopt;
 
 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
 }
 
 static int
 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
 	struct ip6_ext *ip6e;
 	int error = 0;
 	struct in6_pktinfo null_pktinfo;
 	int deftclass = 0, on;
 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
 	switch (optname) {
 	case IPV6_PKTINFO:
 		optdata = (void *)&null_pktinfo;
 		if (pktopt && pktopt->ip6po_pktinfo) {
 			bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
 			    sizeof(null_pktinfo));
 			in6_clearscope(&null_pktinfo.ipi6_addr);
 		} else {
 			/* XXX: we don't have to do this every time... */
 			bzero(&null_pktinfo, sizeof(null_pktinfo));
 		}
 		optdatalen = sizeof(struct in6_pktinfo);
 		break;
 	case IPV6_TCLASS:
 		if (pktopt && pktopt->ip6po_tclass >= 0)
 			optdata = (void *)&pktopt->ip6po_tclass;
 		else
 			optdata = (void *)&deftclass;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_HOPOPTS:
 		if (pktopt && pktopt->ip6po_hbh) {
 			optdata = (void *)pktopt->ip6po_hbh;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDR:
 		if (pktopt && pktopt->ip6po_rthdr) {
 			optdata = (void *)pktopt->ip6po_rthdr;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDRDSTOPTS:
 		if (pktopt && pktopt->ip6po_dest1) {
 			optdata = (void *)pktopt->ip6po_dest1;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_DSTOPTS:
 		if (pktopt && pktopt->ip6po_dest2) {
 			optdata = (void *)pktopt->ip6po_dest2;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_NEXTHOP:
 		if (pktopt && pktopt->ip6po_nexthop) {
 			optdata = (void *)pktopt->ip6po_nexthop;
 			optdatalen = pktopt->ip6po_nexthop->sa_len;
 		}
 		break;
 	case IPV6_USE_MIN_MTU:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_minmtu;
 		else
 			optdata = (void *)&defminmtu;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_DONTFRAG:
 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
 			on = 1;
 		else
 			on = 0;
 		optdata = (void *)&on;
 		optdatalen = sizeof(on);
 		break;
 	case IPV6_PREFER_TEMPADDR:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
 		else
 			optdata = (void *)&defpreftemp;
 		optdatalen = sizeof(int);
 		break;
 	default:		/* should not happen */
 #ifdef DIAGNOSTIC
 		panic("ip6_getpcbopt: unexpected option\n");
 #endif
 		return (ENOPROTOOPT);
 	}
 
 	error = sooptcopyout(sopt, optdata, optdatalen);
 
 	return (error);
 }
 
 void
 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1 || optname == IPV6_PKTINFO) {
 		if (pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPLIMIT)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1 || optname == IPV6_TCLASS)
 		pktopt->ip6po_tclass = -1;
 	if (optname == -1 || optname == IPV6_NEXTHOP) {
 		if (pktopt->ip6po_nextroute.ro_rt) {
 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
 			pktopt->ip6po_nextroute.ro_rt = NULL;
 		}
 		if (pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPOPTS) {
 		if (pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
 		if (pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDR) {
 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1 || optname == IPV6_DSTOPTS) {
 		if (pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (/*CONSTCOND*/ 0)
 
 static int
 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
 	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return (EINVAL);
 	}
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	dst->ip6po_tclass = src->ip6po_tclass;
 	dst->ip6po_flags = src->ip6po_flags;
 	dst->ip6po_minmtu = src->ip6po_minmtu;
 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		    src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return (0);
 
   bad:
 	ip6_clearpktopts(dst, -1);
 	return (ENOBUFS);
 }
 #undef PKTOPT_EXTHDRCPY
 
 struct ip6_pktopts *
 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 {
 	int error;
 	struct ip6_pktopts *dst;
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL)
 		return (NULL);
 	ip6_initpktopts(dst);
 
 	if ((error = copypktopts(dst, src, canwait)) != 0) {
 		free(dst, M_IP6OPT);
 		return (NULL);
 	}
 
 	return (dst);
 }
 
 void
 ip6_freepcbopts(struct ip6_pktopts *pktopt)
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
 {
 	struct cmsghdr *cm = 0;
 
 	if (control == NULL || opt == NULL)
 		return (EINVAL);
 
 	ip6_initpktopts(opt);
 	if (stickyopt) {
 		int error;
 
 		/*
 		 * If stickyopt is provided, make a local copy of the options
 		 * for this particular packet, then override them by ancillary
 		 * objects.
 		 * XXX: copypktopts() does not copy the cached route to a next
 		 * hop (if any).  This is not very good in terms of efficiency,
 		 * but we can allow this since this option should be rarely
 		 * used.
 		 */
 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return (EINVAL);
 
 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		int error;
 
 		if (control->m_len < CMSG_LEN(0))
 			return (EINVAL);
 
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return (EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Set a particular packet option, as a sticky option or an ancillary data
  * item.  "len" can be 0 only when it's a sticky option.
  * We have 4 cases of combination of "sticky" and "cmsg":
  * "sticky=0, cmsg=0": impossible
  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
  * "sticky=1, cmsg=0": RFC3542 socket option
  * "sticky=1, cmsg=1": RFC2292 socket option
  */
 static int
 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
     struct ucred *cred, int sticky, int cmsg, int uproto)
 {
 	int minmtupolicy, preftemp;
 	int error;
 
 	if (!sticky && !cmsg) {
 #ifdef DIAGNOSTIC
 		printf("ip6_setpktopt: impossible case\n");
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
 	 * not be specified in the context of RFC3542.  Conversely,
 	 * RFC3542 types should not be specified in the context of RFC2292.
 	 */
 	if (!cmsg) {
 		switch (optname) {
 		case IPV6_2292PKTINFO:
 		case IPV6_2292HOPLIMIT:
 		case IPV6_2292NEXTHOP:
 		case IPV6_2292HOPOPTS:
 		case IPV6_2292DSTOPTS:
 		case IPV6_2292RTHDR:
 		case IPV6_2292PKTOPTIONS:
 			return (ENOPROTOOPT);
 		}
 	}
 	if (sticky && cmsg) {
 		switch (optname) {
 		case IPV6_PKTINFO:
 		case IPV6_HOPLIMIT:
 		case IPV6_NEXTHOP:
 		case IPV6_HOPOPTS:
 		case IPV6_DSTOPTS:
 		case IPV6_RTHDRDSTOPTS:
 		case IPV6_RTHDR:
 		case IPV6_USE_MIN_MTU:
 		case IPV6_DONTFRAG:
 		case IPV6_TCLASS:
 		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
 			return (ENOPROTOOPT);
 		}
 	}
 
 	switch (optname) {
 	case IPV6_2292PKTINFO:
 	case IPV6_PKTINFO:
 	{
 		struct ifnet *ifp = NULL;
 		struct in6_pktinfo *pktinfo;
 
 		if (len != sizeof(struct in6_pktinfo))
 			return (EINVAL);
 
 		pktinfo = (struct in6_pktinfo *)buf;
 
 		/*
 		 * An application can clear any sticky IPV6_PKTINFO option by
 		 * doing a "regular" setsockopt with ipi6_addr being
 		 * in6addr_any and ipi6_ifindex being zero.
 		 * [RFC 3542, Section 6]
 		 */
 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
 		    pktinfo->ipi6_ifindex == 0 &&
 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			ip6_clearpktopts(opt, optname);
 			break;
 		}
 
 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			return (EINVAL);
 		}
 		if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
 			return (EINVAL);
 		/* validate the interface index if specified. */
 		if (pktinfo->ipi6_ifindex > V_if_index)
 			 return (ENXIO);
 		if (pktinfo->ipi6_ifindex) {
 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 			if (ifp == NULL)
 				return (ENXIO);
 		}
 		if (ifp != NULL && (
 		    ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED))
 			return (ENETDOWN);
 
 		if (ifp != NULL &&
 		    !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			struct in6_ifaddr *ia;
 
 			in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
 			ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
 			if (ia == NULL)
 				return (EADDRNOTAVAIL);
 			ifa_free(&ia->ia_ifa);
 		}
 		/*
 		 * We store the address anyway, and let in6_selectsrc()
 		 * validate the specified address.  This is because ipi6_addr
 		 * may not have enough information about its scope zone, and
 		 * we may need additional information (such as outgoing
 		 * interface or the scope zone of a destination address) to
 		 * disambiguate the scope.
 		 * XXX: the delay of the validation may confuse the
 		 * application when it is used as a sticky option.
 		 */
 		if (opt->ip6po_pktinfo == NULL) {
 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
 			    M_IP6OPT, M_NOWAIT);
 			if (opt->ip6po_pktinfo == NULL)
 				return (ENOBUFS);
 		}
 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
 		break;
 	}
 
 	case IPV6_2292HOPLIMIT:
 	case IPV6_HOPLIMIT:
 	{
 		int *hlimp;
 
 		/*
 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
 		 * to simplify the ordering among hoplimit options.
 		 */
 		if (optname == IPV6_HOPLIMIT && sticky)
 			return (ENOPROTOOPT);
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		hlimp = (int *)buf;
 		if (*hlimp < -1 || *hlimp > 255)
 			return (EINVAL);
 
 		opt->ip6po_hlim = *hlimp;
 		break;
 	}
 
 	case IPV6_TCLASS:
 	{
 		int tclass;
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		tclass = *(int *)buf;
 		if (tclass < -1 || tclass > 255)
 			return (EINVAL);
 
 		opt->ip6po_tclass = tclass;
 		break;
 	}
 
 	case IPV6_2292NEXTHOP:
 	case IPV6_NEXTHOP:
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {	/* just remove the option */
 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
 			break;
 		}
 
 		/* check if cmsg_len is large enough for sa_len */
 		if (len < sizeof(struct sockaddr) || len < *buf)
 			return (EINVAL);
 
 		switch (((struct sockaddr *)buf)->sa_family) {
 		case AF_INET6:
 		{
 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
 			int error;
 
 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 
 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
 				return (EINVAL);
 			}
 			if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
 			    != 0) {
 				return (error);
 			}
 			break;
 		}
 		case AF_LINK:	/* should eventually be supported */
 		default:
 			return (EAFNOSUPPORT);
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_nexthop == NULL)
 			return (ENOBUFS);
 		bcopy(buf, opt->ip6po_nexthop, *buf);
 		break;
 
 	case IPV6_2292HOPOPTS:
 	case IPV6_HOPOPTS:
 	{
 		struct ip6_hbh *hbh;
 		int hbhlen;
 
 		/*
 		 * XXX: We don't allow a non-privileged user to set ANY HbH
 		 * options, since per-option restriction has too much
 		 * overhead.
 		 */
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_hbh))
 			return (EINVAL);
 		hbh = (struct ip6_hbh *)buf;
 		hbhlen = (hbh->ip6h_len + 1) << 3;
 		if (len != hbhlen)
 			return (EINVAL);
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_hbh == NULL)
 			return (ENOBUFS);
 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
 
 		break;
 	}
 
 	case IPV6_2292DSTOPTS:
 	case IPV6_DSTOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	{
 		struct ip6_dest *dest, **newdest = NULL;
 		int destlen;
 
 		if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, optname);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_dest))
 			return (EINVAL);
 		dest = (struct ip6_dest *)buf;
 		destlen = (dest->ip6d_len + 1) << 3;
 		if (len != destlen)
 			return (EINVAL);
 
 		/*
 		 * Determine the position that the destination options header
 		 * should be inserted; before or after the routing header.
 		 */
 		switch (optname) {
 		case IPV6_2292DSTOPTS:
 			/*
 			 * The old advacned API is ambiguous on this point.
 			 * Our approach is to determine the position based
 			 * according to the existence of a routing header.
 			 * Note, however, that this depends on the order of the
 			 * extension headers in the ancillary data; the 1st
 			 * part of the destination options header must appear
 			 * before the routing header in the ancillary data,
 			 * too.
 			 * RFC3542 solved the ambiguity by introducing
 			 * separate ancillary data or option types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 			break;
 		case IPV6_RTHDRDSTOPTS:
 			newdest = &opt->ip6po_dest1;
 			break;
 		case IPV6_DSTOPTS:
 			newdest = &opt->ip6po_dest2;
 			break;
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, optname);
 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
 		if (*newdest == NULL)
 			return (ENOBUFS);
 		bcopy(dest, *newdest, destlen);
 
 		break;
 	}
 
 	case IPV6_2292RTHDR:
 	case IPV6_RTHDR:
 	{
 		struct ip6_rthdr *rth;
 		int rthlen;
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_RTHDR);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_rthdr))
 			return (EINVAL);
 		rth = (struct ip6_rthdr *)buf;
 		rthlen = (rth->ip6r_len + 1) << 3;
 		if (len != rthlen)
 			return (EINVAL);
 
 		switch (rth->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			if (rth->ip6r_len == 0)	/* must contain one addr */
 				return (EINVAL);
 			if (rth->ip6r_len % 2) /* length must be even */
 				return (EINVAL);
 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);	/* not supported */
 		}
 
 		/* turn off the previous option */
 		ip6_clearpktopts(opt, IPV6_RTHDR);
 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_rthdr == NULL)
 			return (ENOBUFS);
 		bcopy(rth, opt->ip6po_rthdr, rthlen);
 
 		break;
 	}
 
 	case IPV6_USE_MIN_MTU:
 		if (len != sizeof(int))
 			return (EINVAL);
 		minmtupolicy = *(int *)buf;
 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
 		    minmtupolicy != IP6PO_MINMTU_ALL) {
 			return (EINVAL);
 		}
 		opt->ip6po_minmtu = minmtupolicy;
 		break;
 
 	case IPV6_DONTFRAG:
 		if (len != sizeof(int))
 			return (EINVAL);
 
 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
 			/*
 			 * we ignore this option for TCP sockets.
 			 * (RFC3542 leaves this case unspecified.)
 			 */
 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
 		} else
 			opt->ip6po_flags |= IP6PO_DONTFRAG;
 		break;
 
 	case IPV6_PREFER_TEMPADDR:
 		if (len != sizeof(int))
 			return (EINVAL);
 		preftemp = *(int *)buf;
 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
 		    preftemp != IP6PO_TEMPADDR_PREFER) {
 			return (EINVAL);
 		}
 		opt->ip6po_prefer_tempaddr = preftemp;
 		break;
 
 	default:
 		return (ENOPROTOOPT);
 	} /* end of switch */
 
 	return (0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(struct ifnet *ifp, const struct mbuf *m)
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if (!M_WRITABLE(copym) ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 	ip6 = mtod(copym, struct ip6_hdr *);
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 		copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
 		    CSUM_PSEUDO_HDR;
 		copym->m_pkthdr.csum_data = 0xffff;
 	}
 	if_simloop(ifp, copym, AF_INET6, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		mh = m_gethdr(M_NOWAIT, MT_DATA);
 		if (mh == NULL) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		m_move_pkthdr(mh, m);
 		M_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(struct inpcb *in6p)
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }
Index: projects/powernv/netinet6/nd6.c
===================================================================
--- projects/powernv/netinet6/nd6.c	(revision 290990)
+++ projects/powernv/netinet6/nd6.c	(revision 290991)
@@ -1,2431 +1,2431 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/queue.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/iso88025.h>
 #include <net/fddi.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/icmp6.h>
 #include <netinet6/send.h>
 
 #include <sys/limits.h>
 
 #include <security/mac/mac_framework.h>
 
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
 #define SIN6(s) ((const struct sockaddr_in6 *)(s))
 
 /* timer values */
 VNET_DEFINE(int, nd6_prune)	= 1;	/* walk list every 1 seconds */
 VNET_DEFINE(int, nd6_delay)	= 5;	/* delay first probe time 5 second */
 VNET_DEFINE(int, nd6_umaxtries)	= 3;	/* maximum unicast query */
 VNET_DEFINE(int, nd6_mmaxtries)	= 3;	/* maximum multicast query */
 VNET_DEFINE(int, nd6_useloopback) = 1;	/* use loopback interface for
 					 * local traffic */
 VNET_DEFINE(int, nd6_gctimer)	= (60 * 60 * 24); /* 1 day: garbage
 					 * collection timer */
 
 /* preventing too many loops in ND option parsing */
 static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
 
 VNET_DEFINE(int, nd6_maxnudhint) = 0;	/* max # of subsequent upper
 					 * layer hints */
 static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
 					 * ND entries */
 #define	V_nd6_maxndopt			VNET(nd6_maxndopt)
 #define	V_nd6_maxqueuelen		VNET(nd6_maxqueuelen)
 
 #ifdef ND6_DEBUG
 VNET_DEFINE(int, nd6_debug) = 1;
 #else
 VNET_DEFINE(int, nd6_debug) = 0;
 #endif
 
 static eventhandler_tag lle_event_eh;
 
 /* for debugging? */
 #if 0
 static int nd6_inuse, nd6_allocated;
 #endif
 
 VNET_DEFINE(struct nd_drhead, nd_defrouter);
 VNET_DEFINE(struct nd_prhead, nd_prefix);
 
 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
 
 int	(*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
 
 static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *,
 	struct ifnet *);
 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
 static void nd6_slowtimo(void *);
 static int regen_tmpaddr(struct in6_ifaddr *);
 static void nd6_free(struct llentry *, int);
 static void nd6_free_redirect(const struct llentry *);
 static void nd6_llinfo_timer(void *);
 static void nd6_llinfo_settimer_locked(struct llentry *, long);
 static void clear_llinfo_pqueue(struct llentry *);
 static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int nd6_resolve_slow(struct ifnet *, struct mbuf *,
     const struct sockaddr_in6 *, u_char *, uint32_t *);
 static int nd6_need_cache(struct ifnet *);
  
 
 static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
 #define	V_nd6_slowtimo_ch		VNET(nd6_slowtimo_ch)
 
 VNET_DEFINE(struct callout, nd6_timer_ch);
 
 static void
 nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
 {
 	struct rt_addrinfo rtinfo;
 	struct sockaddr_in6 dst;
 	struct sockaddr_dl gw;
 	struct ifnet *ifp;
 	int type;
 
 	LLE_WLOCK_ASSERT(lle);
 
 	if (lltable_get_af(lle->lle_tbl) != AF_INET6)
 		return;
 
 	switch (evt) {
 	case LLENTRY_RESOLVED:
 		type = RTM_ADD;
 		KASSERT(lle->la_flags & LLE_VALID,
 		    ("%s: %p resolved but not valid?", __func__, lle));
 		break;
 	case LLENTRY_EXPIRED:
 		type = RTM_DELETE;
 		break;
 	default:
 		return;
 	}
 
 	ifp = lltable_get_ifp(lle->lle_tbl);
 
 	bzero(&dst, sizeof(dst));
 	bzero(&gw, sizeof(gw));
 	bzero(&rtinfo, sizeof(rtinfo));
 	lltable_fill_sa_entry(lle, (struct sockaddr *)&dst);
 	dst.sin6_scope_id = in6_getscopezone(ifp,
 	    in6_addrscope(&dst.sin6_addr));
 	gw.sdl_len = sizeof(struct sockaddr_dl);
 	gw.sdl_family = AF_LINK;
 	gw.sdl_alen = ifp->if_addrlen;
 	gw.sdl_index = ifp->if_index;
 	gw.sdl_type = ifp->if_type;
 	if (evt == LLENTRY_RESOLVED)
 		bcopy(&lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
 	rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
 	rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
 	rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
 	rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | (
 	    type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB);
 }
 
 void
 nd6_init(void)
 {
 
 	LIST_INIT(&V_nd_prefix);
 
 	/* initialization of the default router list */
 	TAILQ_INIT(&V_nd_defrouter);
 
 	/* start timer */
 	callout_init(&V_nd6_slowtimo_ch, 0);
 	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, curvnet);
 
 	nd6_dad_init();
 	if (IS_DEFAULT_VNET(curvnet))
 		lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
 		    NULL, EVENTHANDLER_PRI_ANY);
 }
 
 #ifdef VIMAGE
 void
 nd6_destroy()
 {
 
 	callout_drain(&V_nd6_slowtimo_ch);
 	callout_drain(&V_nd6_timer_ch);
 	if (IS_DEFAULT_VNET(curvnet))
 		EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
 }
 #endif
 
 struct nd_ifinfo *
 nd6_ifattach(struct ifnet *ifp)
 {
 	struct nd_ifinfo *nd;
 
 	nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO);
 	nd->initialized = 1;
 
 	nd->chlim = IPV6_DEFHLIM;
 	nd->basereachable = REACHABLE_TIME;
 	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
 	nd->retrans = RETRANS_TIMER;
 
 	nd->flags = ND6_IFF_PERFORMNUD;
 
 	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
 	 * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
 	 * default regardless of the V_ip6_auto_linklocal configuration to
 	 * give a reasonable default behavior.
 	 */
 	if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
 	    (ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
 	/*
 	 * A loopback interface does not need to accept RTADV.
 	 * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
 	 * default regardless of the V_ip6_accept_rtadv configuration to
 	 * prevent the interface from accepting RA messages arrived
 	 * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
 	 */
 	if (V_ip6_accept_rtadv &&
 	    !(ifp->if_flags & IFF_LOOPBACK) &&
 	    (ifp->if_type != IFT_BRIDGE))
 			nd->flags |= ND6_IFF_ACCEPT_RTADV;
 	if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_NO_RADR;
 
 	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
 	nd6_setmtu0(ifp, nd);
 
 	return nd;
 }
 
 void
 nd6_ifdetach(struct nd_ifinfo *nd)
 {
 
 	free(nd, M_IP6NDP);
 }
 
 /*
  * Reset ND level link MTU. This function is called when the physical MTU
  * changes, which means we might have to adjust the ND level MTU.
  */
 void
 nd6_setmtu(struct ifnet *ifp)
 {
 
 	nd6_setmtu0(ifp, ND_IFINFO(ifp));
 }
 
 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
 void
 nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
 {
 	u_int32_t omaxmtu;
 
 	omaxmtu = ndi->maxmtu;
 
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 		ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
 		break;
 	case IFT_FDDI:
 		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
 		break;
 	case IFT_ISO88025:
 		 ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
 		 break;
 	default:
 		ndi->maxmtu = ifp->if_mtu;
 		break;
 	}
 
 	/*
 	 * Decreasing the interface MTU under IPV6 minimum MTU may cause
 	 * undesirable situation.  We thus notify the operator of the change
 	 * explicitly.  The check for omaxmtu is necessary to restrict the
 	 * log to the case of changing the MTU, not initializing it.
 	 */
 	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
 		log(LOG_NOTICE, "nd6_setmtu0: "
 		    "new link MTU on %s (%lu) is too small for IPv6\n",
 		    if_name(ifp), (unsigned long)ndi->maxmtu);
 	}
 
 	if (ndi->maxmtu > V_in6_maxmtu)
 		in6_setmaxmtu(); /* check all interfaces just in case */
 
 }
 
 void
 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
 {
 
 	bzero(ndopts, sizeof(*ndopts));
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
 	ndopts->nd_opts_last
 		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
 
 	if (icmp6len == 0) {
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 }
 
 /*
  * Take one ND option.
  */
 struct nd_opt_hdr *
 nd6_option(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int olen;
 
 	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
 	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
 	    __func__));
 	if (ndopts->nd_opts_search == NULL)
 		return NULL;
 	if (ndopts->nd_opts_done)
 		return NULL;
 
 	nd_opt = ndopts->nd_opts_search;
 
 	/* make sure nd_opt_len is inside the buffer */
 	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	olen = nd_opt->nd_opt_len << 3;
 	if (olen == 0) {
 		/*
 		 * Message validation requires that all included
 		 * options have a length that is greater than zero.
 		 */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
 	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
 		/* option overruns the end of buffer, invalid */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
 		/* reached the end of options chain */
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 	return nd_opt;
 }
 
 /*
  * Parse multiple ND options.
  * This function is much easier to use, for ND routines that do not need
  * multiple options of the same type.
  */
 int
 nd6_options(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int i = 0;
 
 	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
 	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
 	    __func__));
 	if (ndopts->nd_opts_search == NULL)
 		return 0;
 
 	while (1) {
 		nd_opt = nd6_option(ndopts);
 		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
 			/*
 			 * Message validation requires that all included
 			 * options have a length that is greater than zero.
 			 */
 			ICMP6STAT_INC(icp6s_nd_badopt);
 			bzero(ndopts, sizeof(*ndopts));
 			return -1;
 		}
 
 		if (nd_opt == NULL)
 			goto skip1;
 
 		switch (nd_opt->nd_opt_type) {
 		case ND_OPT_SOURCE_LINKADDR:
 		case ND_OPT_TARGET_LINKADDR:
 		case ND_OPT_MTU:
 		case ND_OPT_REDIRECTED_HEADER:
 		case ND_OPT_NONCE:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 				nd6log((LOG_INFO,
 				    "duplicated ND6 option found (type=%d)\n",
 				    nd_opt->nd_opt_type));
 				/* XXX bark? */
 			} else {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			break;
 		case ND_OPT_PREFIX_INFORMATION:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			ndopts->nd_opts_pi_end =
 				(struct nd_opt_prefix_info *)nd_opt;
 			break;
 		/* What about ND_OPT_ROUTE_INFO? RFC 4191 */
 		case ND_OPT_RDNSS:	/* RFC 6106 */
 		case ND_OPT_DNSSL:	/* RFC 6106 */
 			/*
 			 * Silently ignore options we know and do not care about
 			 * in the kernel.
 			 */
 			break;
 		default:
 			/*
 			 * Unknown options must be silently ignored,
 			 * to accomodate future extension to the protocol.
 			 */
 			nd6log((LOG_DEBUG,
 			    "nd6_options: unsupported option %d - "
 			    "option ignored\n", nd_opt->nd_opt_type));
 		}
 
 skip1:
 		i++;
 		if (i > V_nd6_maxndopt) {
 			ICMP6STAT_INC(icp6s_nd_toomanyopt);
 			nd6log((LOG_INFO, "too many loop in nd opt\n"));
 			break;
 		}
 
 		if (ndopts->nd_opts_done)
 			break;
 	}
 
 	return 0;
 }
 
 /*
  * ND6 timer routine to handle ND6 entries
  */
 static void
 nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
 {
 	int canceled;
 
 	LLE_WLOCK_ASSERT(ln);
 
 	if (tick < 0) {
 		ln->la_expire = 0;
 		ln->ln_ntick = 0;
 		canceled = callout_stop(&ln->lle_timer);
 	} else {
 		ln->la_expire = time_uptime + tick / hz;
 		LLE_ADDREF(ln);
 		if (tick > INT_MAX) {
 			ln->ln_ntick = tick - INT_MAX;
 			canceled = callout_reset(&ln->lle_timer, INT_MAX,
 			    nd6_llinfo_timer, ln);
 		} else {
 			ln->ln_ntick = 0;
 			canceled = callout_reset(&ln->lle_timer, tick,
 			    nd6_llinfo_timer, ln);
 		}
 	}
 	if (canceled > 0)
 		LLE_REMREF(ln);
 }
 
 /*
  * Gets source address of the first packet in hold queue
  * and stores it in @src.
  * Returns pointer to @src (if hold queue is not empty) or NULL.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline struct in6_addr *
 nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
 {
 	struct ip6_hdr hdr;
 	struct mbuf *m;
 
 	if (ln->la_hold == NULL)
 		return (NULL);
 
 	/*
 	 * assume every packet in la_hold has the same IP header
 	 */
 	m = ln->la_hold;
 	if (sizeof(hdr) > m->m_len)
 		return (NULL);
 
 	m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr);
 	*src = hdr.ip6_src;
 
 	return (src);
 }
 
 /*
  * Switch @lle state to new state optionally arming timers.
  *
  * Set noinline to be dtrace-friendly
  */
 __noinline void
 nd6_llinfo_setstate(struct llentry *lle, int newstate)
 {
 	struct ifnet *ifp;
 	long delay;
 
 	delay = 0;
 
 	switch (newstate) {
 	case ND6_LLINFO_INCOMPLETE:
 		ifp = lle->lle_tbl->llt_ifp;
 		delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(lle)) {
 			ifp = lle->lle_tbl->llt_ifp;
 			delay = (long)ND_IFINFO(ifp)->reachable * hz;
 		}
 		break;
 	case ND6_LLINFO_STALE:
 		delay = (long)V_nd6_gctimer * hz;
 		break;
 	case ND6_LLINFO_DELAY:
 		lle->la_asked = 0;
 		delay = (long)V_nd6_delay * hz;
 		break;
 	}
 
 	if (delay > 0)
 		nd6_llinfo_settimer_locked(lle, delay);
 
 	lle->ln_state = newstate;
 }
 
 /*
  * Timer-dependent part of nd state machine.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline void
 nd6_llinfo_timer(void *arg)
 {
 	struct llentry *ln;
 	struct in6_addr *dst, *pdst, *psrc, src;
 	struct ifnet *ifp;
 	struct nd_ifinfo *ndi = NULL;
 	int send_ns;
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	ln = (struct llentry *)arg;
 	LLE_WLOCK(ln);
 	if (callout_pending(&ln->lle_timer)) {
 		/*
 		 * Here we are a bit odd here in the treatment of 
 		 * active/pending. If the pending bit is set, it got
 		 * rescheduled before I ran. The active
 		 * bit we ignore, since if it was stopped
 		 * in ll_tablefree() and was currently running
 		 * it would have return 0 so the code would
 		 * not have deleted it since the callout could
 		 * not be stopped so we want to go through
 		 * with the delete here now. If the callout
 		 * was restarted, the pending bit will be back on and
 		 * we just want to bail since the callout_reset would
 		 * return 1 and our reference would have been removed
 		 * by nd6_llinfo_settimer_locked above since canceled
 		 * would have been 1.
 		 */
 		LLE_WUNLOCK(ln);
 		return;
 	}
 	ifp = ln->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
 	ndi = ND_IFINFO(ifp);
 	send_ns = 0;
 	dst = &ln->r_l3addr.addr6;
 	pdst = dst;
 
 	if (ln->ln_ntick > 0) {
 		if (ln->ln_ntick > INT_MAX) {
 			ln->ln_ntick -= INT_MAX;
 			nd6_llinfo_settimer_locked(ln, INT_MAX);
 		} else {
 			ln->ln_ntick = 0;
 			nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
 		}
 		goto done;
 	}
 
 	if (ln->la_flags & LLE_STATIC) {
 		goto done;
 	}
 
 	if (ln->la_flags & LLE_DELETED) {
 		nd6_free(ln, 0);
 		ln = NULL;
 		goto done;
 	}
 
 	switch (ln->ln_state) {
 	case ND6_LLINFO_INCOMPLETE:
 		if (ln->la_asked < V_nd6_mmaxtries) {
 			ln->la_asked++;
 			send_ns = 1;
 			/* Send NS to multicast address */
 			pdst = NULL;
 		} else {
 			struct mbuf *m = ln->la_hold;
 			if (m) {
 				struct mbuf *m0;
 
 				/*
 				 * assuming every packet in la_hold has the
 				 * same IP header.  Send error after unlock.
 				 */
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
 				ln->la_hold = m0;
 				clear_llinfo_pqueue(ln);
 			}
 			EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT);
 			nd6_free(ln, 0);
 			ln = NULL;
 			if (m != NULL)
 				icmp6_error2(m, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
 		}
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(ln))
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 		break;
 
 	case ND6_LLINFO_STALE:
 		/* Garbage Collection(RFC 2461 5.3) */
 		if (!ND6_LLINFO_PERMANENT(ln)) {
 			EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
 			nd6_free(ln, 1);
 			ln = NULL;
 		}
 		break;
 
 	case ND6_LLINFO_DELAY:
 		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
 			/* We need NUD */
 			ln->la_asked = 1;
 			nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE);
 			send_ns = 1;
 		} else
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */
 		break;
 	case ND6_LLINFO_PROBE:
 		if (ln->la_asked < V_nd6_umaxtries) {
 			ln->la_asked++;
 			send_ns = 1;
 		} else {
 			EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
 			nd6_free(ln, 0);
 			ln = NULL;
 		}
 		break;
 	default:
 		panic("%s: paths in a dark night can be confusing: %d",
 		    __func__, ln->ln_state);
 	}
 done:
 	if (send_ns != 0) {
 		nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
 		psrc = nd6_llinfo_get_holdsrc(ln, &src);
 		LLE_FREE_LOCKED(ln);
 		ln = NULL;
 		nd6_ns_output(ifp, psrc, pdst, dst, NULL);
 	}
 
 	if (ln != NULL)
 		LLE_FREE_LOCKED(ln);
 	CURVNET_RESTORE();
 }
 
 
 /*
  * ND6 timer routine to expire default route list and prefix list
  */
 void
 nd6_timer(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 	struct in6_ifaddr *ia6, *nia6;
 
 	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
 	    nd6_timer, curvnet);
 
 	/* expire default router list */
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (dr->expire && dr->expire < time_uptime)
 			defrtrlist_del(dr);
 	}
 
 	/*
 	 * expire interface addresses.
 	 * in the past the loop was inside prefix expiry processing.
 	 * However, from a stricter speci-confrmance standpoint, we should
 	 * rather separate address lifetimes and prefix lifetimes.
 	 *
 	 * XXXRW: in6_ifaddrhead locking.
 	 */
   addrloop:
 	TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
 		/* check address lifetime */
 		if (IFA6_IS_INVALID(ia6)) {
 			int regen = 0;
 
 			/*
 			 * If the expiring address is temporary, try
 			 * regenerating a new one.  This would be useful when
 			 * we suspended a laptop PC, then turned it on after a
 			 * period that could invalidate all temporary
 			 * addresses.  Although we may have to restart the
 			 * loop (see below), it must be after purging the
 			 * address.  Otherwise, we'd see an infinite loop of
 			 * regeneration.
 			 */
 			if (V_ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 				if (regen_tmpaddr(ia6) == 0)
 					regen = 1;
 			}
 
 			in6_purgeaddr(&ia6->ia_ifa);
 
 			if (regen)
 				goto addrloop; /* XXX: see below */
 		} else if (IFA6_IS_DEPRECATED(ia6)) {
 			int oldflags = ia6->ia6_flags;
 
 			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
 
 			/*
 			 * If a temporary address has just become deprecated,
 			 * regenerate a new one if possible.
 			 */
 			if (V_ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
 
 				if (regen_tmpaddr(ia6) == 0) {
 					/*
 					 * A new temporary address is
 					 * generated.
 					 * XXX: this means the address chain
 					 * has changed while we are still in
 					 * the loop.  Although the change
 					 * would not cause disaster (because
 					 * it's not a deletion, but an
 					 * addition,) we'd rather restart the
 					 * loop just for safety.  Or does this
 					 * significantly reduce performance??
 					 */
 					goto addrloop;
 				}
 			}
 		} else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
 			/*
 			 * Schedule DAD for a tentative address.  This happens
 			 * if the interface was down or not running
 			 * when the address was configured.
 			 */
 			int delay;
 
 			delay = arc4random() %
 			    (MAX_RTR_SOLICITATION_DELAY * hz);
 			nd6_dad_start((struct ifaddr *)ia6, delay);
 		} else {
 			/*
 			 * Check status of the interface.  If it is down,
 			 * mark the address as tentative for future DAD.
 			 */
 			if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
 			    (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
 				== 0 ||
 			    (ND_IFINFO(ia6->ia_ifp)->flags &
 				ND6_IFF_IFDISABLED) != 0) {
 				ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
 				ia6->ia6_flags |= IN6_IFF_TENTATIVE;
 			}
 			/*
 			 * A new RA might have made a deprecated address
 			 * preferred.
 			 */
 			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
 		}
 	}
 
 	/* expire prefix list */
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		/*
 		 * check prefix lifetime.
 		 * since pltime is just for autoconf, pltime processing for
 		 * prefix is not necessary.
 		 */
 		if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
 		    time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) {
 
 			/*
 			 * address expiration and prefix expiration are
 			 * separate.  NEVER perform in6_purgeaddr here.
 			 */
 			prelist_remove(pr);
 		}
 	}
 	CURVNET_RESTORE();
 }
 
 /*
  * ia6 - deprecated/invalidated temporary address
  */
 static int
 regen_tmpaddr(struct in6_ifaddr *ia6)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	struct in6_ifaddr *public_ifa6 = NULL;
 
 	ifp = ia6->ia_ifa.ifa_ifp;
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in6_ifaddr *it6;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		it6 = (struct in6_ifaddr *)ifa;
 
 		/* ignore no autoconf addresses. */
 		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 			continue;
 
 		/* ignore autoconf addresses with different prefixes. */
 		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
 			continue;
 
 		/*
 		 * Now we are looking at an autoconf address with the same
 		 * prefix as ours.  If the address is temporary and is still
 		 * preferred, do not create another one.  It would be rare, but
 		 * could happen, for example, when we resume a laptop PC after
 		 * a long period.
 		 */
 		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 		    !IFA6_IS_DEPRECATED(it6)) {
 			public_ifa6 = NULL;
 			break;
 		}
 
 		/*
 		 * This is a public autoconf address that has the same prefix
 		 * as ours.  If it is preferred, keep it.  We can't break the
 		 * loop here, because there may be a still-preferred temporary
 		 * address with the prefix.
 		 */
 		if (!IFA6_IS_DEPRECATED(it6))
 			public_ifa6 = it6;
 	}
 	if (public_ifa6 != NULL)
 		ifa_ref(&public_ifa6->ia_ifa);
 	IF_ADDR_RUNLOCK(ifp);
 
 	if (public_ifa6 != NULL) {
 		int e;
 
 		if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
 			ifa_free(&public_ifa6->ia_ifa);
 			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
 			    " tmp addr,errno=%d\n", e);
 			return (-1);
 		}
 		ifa_free(&public_ifa6->ia_ifa);
 		return (0);
 	}
 
 	return (-1);
 }
 
 /*
  * Nuke neighbor cache/prefix/default router management table, right before
  * ifp goes away.
  */
 void
 nd6_purge(struct ifnet *ifp)
 {
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 
 	/*
 	 * Nuke default router list entries toward ifp.
 	 * We defer removal of default router list entries that is installed
 	 * in the routing table, in order to keep additional side effects as
 	 * small as possible.
 	 */
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (dr->installed)
 			continue;
 
 		if (dr->ifp == ifp)
 			defrtrlist_del(dr);
 	}
 
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (!dr->installed)
 			continue;
 
 		if (dr->ifp == ifp)
 			defrtrlist_del(dr);
 	}
 
 	/* Nuke prefix list entries toward ifp */
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		if (pr->ndpr_ifp == ifp) {
 			/*
 			 * Because if_detach() does *not* release prefixes
 			 * while purging addresses the reference count will
 			 * still be above zero. We therefore reset it to
 			 * make sure that the prefix really gets purged.
 			 */
 			pr->ndpr_refcnt = 0;
 
 			/*
 			 * Previously, pr->ndpr_addr is removed as well,
 			 * but I strongly believe we don't have to do it.
 			 * nd6_purge() is only called from in6_ifdetach(),
 			 * which removes all the associated interface addresses
 			 * by itself.
 			 * (jinmei@kame.net 20010129)
 			 */
 			prelist_remove(pr);
 		}
 	}
 
 	/* cancel default outgoing interface setting */
 	if (V_nd6_defifindex == ifp->if_index)
 		nd6_setdefaultiface(0);
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		/* Refresh default router list. */
 		defrouter_select();
 	}
 
 	/* XXXXX
 	 * We do not nuke the neighbor cache entries here any more
 	 * because the neighbor cache is kept in if_afdata[AF_INET6].
 	 * nd6_purge() is invoked by in6_ifdetach() which is called
 	 * from if_detach() where everything gets purged. So let
 	 * in6_domifdetach() do the actual L2 table purging work.
 	 */
 }
 
 /* 
  * the caller acquires and releases the lock on the lltbls
  * Returns the llentry locked
  */
 struct llentry *
 nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
 {
 	struct sockaddr_in6 sin6;
 	struct llentry *ln;
 	
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
 
 	IF_AFDATA_LOCK_ASSERT(ifp);
 
 	ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
 
 	return (ln);
 }
 
 struct llentry *
 nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
 {
 	struct sockaddr_in6 sin6;
 	struct llentry *ln;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
 
 	ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6);
 	if (ln != NULL)
 		ln->ln_state = ND6_LLINFO_NOSTATE;
 
 	return (ln);
 }
 
 /*
  * Test whether a given IPv6 address is a neighbor or not, ignoring
  * the actual neighbor cache.  The neighbor cache is ignored in order
  * to not reenter the routing code from within itself.
  */
 static int
 nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 	struct nd_prefix *pr;
 	struct ifaddr *dstaddr;
 
 	/*
 	 * A link-local address is always a neighbor.
 	 * XXX: a link does not necessarily specify a single interface.
 	 */
 	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
 		struct sockaddr_in6 sin6_copy;
 		u_int32_t zone;
 
 		/*
 		 * We need sin6_copy since sa6_recoverscope() may modify the
 		 * content (XXX).
 		 */
 		sin6_copy = *addr;
 		if (sa6_recoverscope(&sin6_copy))
 			return (0); /* XXX: should be impossible */
 		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
 			return (0);
 		if (sin6_copy.sin6_scope_id == zone)
 			return (1);
 		else
 			return (0);
 	}
 
 	/*
 	 * If the address matches one of our addresses,
 	 * it should be a neighbor.
 	 * If the address matches one of our on-link prefixes, it should be a
 	 * neighbor.
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_ifp != ifp)
 			continue;
 
 		if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
 			struct rtentry *rt;
 
 			/* Always use the default FIB here. */
 			rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix,
 			    0, 0, RT_DEFAULT_FIB);
 			if (rt == NULL)
 				continue;
 			/*
 			 * This is the case where multiple interfaces
 			 * have the same prefix, but only one is installed 
 			 * into the routing table and that prefix entry
 			 * is not the one being examined here. In the case
 			 * where RADIX_MPATH is enabled, multiple route
 			 * entries (of the same rt_key value) will be 
 			 * installed because the interface addresses all
 			 * differ.
 			 */
 			if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
 			       &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) {
 				RTFREE_LOCKED(rt);
 				continue;
 			}
 			RTFREE_LOCKED(rt);
 		}
 
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
 		    &addr->sin6_addr, &pr->ndpr_mask))
 			return (1);
 	}
 
 	/*
 	 * If the address is assigned on the node of the other side of
 	 * a p2p interface, the address should be a neighbor.
 	 */
 	dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS);
 	if (dstaddr != NULL) {
 		if (dstaddr->ifa_ifp == ifp) {
 			ifa_free(dstaddr);
 			return (1);
 		}
 		ifa_free(dstaddr);
 	}
 
 	/*
 	 * If the default router list is empty, all addresses are regarded
 	 * as on-link, and thus, as a neighbor.
 	 */
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
 	    TAILQ_EMPTY(&V_nd_defrouter) &&
 	    V_nd6_defifindex == ifp->if_index) {
 		return (1);
 	}
 
 	return (0);
 }
 
 
 /*
  * Detect if a given IPv6 address identifies a neighbor on a given link.
  * XXX: should take care of the destination of a p2p link?
  */
 int
 nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 	struct llentry *lle;
 	int rc = 0;
 
 	IF_AFDATA_UNLOCK_ASSERT(ifp);
 	if (nd6_is_new_addr_neighbor(addr, ifp))
 		return (1);
 
 	/*
 	 * Even if the address matches none of our addresses, it might be
 	 * in the neighbor cache.
 	 */
 	IF_AFDATA_RLOCK(ifp);
 	if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
 		LLE_RUNLOCK(lle);
 		rc = 1;
 	}
 	IF_AFDATA_RUNLOCK(ifp);
 	return (rc);
 }
 
 /*
  * Free an nd6 llinfo entry.
  * Since the function would cause significant changes in the kernel, DO NOT
  * make it global, unless you have a strong reason for the change, and are sure
  * that the change is safe.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline void
 nd6_free(struct llentry *ln, int gc)
 {
 	struct nd_defrouter *dr;
 	struct ifnet *ifp;
 
 	LLE_WLOCK_ASSERT(ln);
 
 	/*
 	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
 	 * even though it is not harmful, it was not really necessary.
 	 */
 
 	/* cancel timer */
 	nd6_llinfo_settimer_locked(ln, -1);
 
 	ifp = ln->lle_tbl->llt_ifp;
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		dr = defrouter_lookup(&ln->r_l3addr.addr6, ifp);
 
 		if (dr != NULL && dr->expire &&
 		    ln->ln_state == ND6_LLINFO_STALE && gc) {
 			/*
 			 * If the reason for the deletion is just garbage
 			 * collection, and the neighbor is an active default
 			 * router, do not delete it.  Instead, reset the GC
 			 * timer using the router's lifetime.
 			 * Simply deleting the entry would affect default
 			 * router selection, which is not necessarily a good
 			 * thing, especially when we're using router preference
 			 * values.
 			 * XXX: the check for ln_state would be redundant,
 			 *      but we intentionally keep it just in case.
 			 */
 			if (dr->expire > time_uptime)
 				nd6_llinfo_settimer_locked(ln,
 				    (dr->expire - time_uptime) * hz);
 			else
 				nd6_llinfo_settimer_locked(ln,
 				    (long)V_nd6_gctimer * hz);
 
 			LLE_REMREF(ln);
 			LLE_WUNLOCK(ln);
 			return;
 		}
 
 		if (dr) {
 			/*
 			 * Unreachablity of a router might affect the default
 			 * router selection and on-link detection of advertised
 			 * prefixes.
 			 */
 
 			/*
 			 * Temporarily fake the state to choose a new default
 			 * router and to perform on-link determination of
 			 * prefixes correctly.
 			 * Below the state will be set correctly,
 			 * or the entry itself will be deleted.
 			 */
 			ln->ln_state = ND6_LLINFO_INCOMPLETE;
 		}
 
 		if (ln->ln_router || dr) {
 
 			/*
 			 * We need to unlock to avoid a LOR with rt6_flush() with the
 			 * rnh and for the calls to pfxlist_onlink_check() and
 			 * defrouter_select() in the block further down for calls
 			 * into nd6_lookup().  We still hold a ref.
 			 */
 			LLE_WUNLOCK(ln);
 
 			/*
 			 * rt6_flush must be called whether or not the neighbor
 			 * is in the Default Router List.
 			 * See a corresponding comment in nd6_na_input().
 			 */
 			rt6_flush(&ln->r_l3addr.addr6, ifp);
 		}
 
 		if (dr) {
 			/*
 			 * Since defrouter_select() does not affect the
 			 * on-link determination and MIP6 needs the check
 			 * before the default router selection, we perform
 			 * the check now.
 			 */
 			pfxlist_onlink_check();
 
 			/*
 			 * Refresh default router list.
 			 */
 			defrouter_select();
 		}
 
 		/*
 		 * If this entry was added by an on-link redirect, remove the
 		 * corresponding host route.
 		 */
 		if (ln->la_flags & LLE_REDIRECT)
 			nd6_free_redirect(ln);
 
 		if (ln->ln_router || dr)
 			LLE_WLOCK(ln);
 	}
 
 	/*
 	 * Save to unlock. We still hold an extra reference and will not
 	 * free(9) in llentry_free() if someone else holds one as well.
 	 */
 	LLE_WUNLOCK(ln);
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(ln);
 	/* Guard against race with other llentry_free(). */
 	if (ln->la_flags & LLE_LINKED) {
 		/* Remove callout reference */
 		LLE_REMREF(ln);
 		lltable_unlink_entry(ln->lle_tbl, ln);
 	}
 	IF_AFDATA_UNLOCK(ifp);
 
 	llentry_free(ln);
 }
 
 /*
  * Remove the rtentry for the given llentry,
  * both of which were installed by a redirect.
  */
 static void
 nd6_free_redirect(const struct llentry *ln)
 {
 	int fibnum;
 	struct rtentry *rt;
 	struct radix_node_head *rnh;
 	struct sockaddr_in6 sin6;
 
 	lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		rnh = rt_tables_get_rnh(fibnum, AF_INET6);
 		if (rnh == NULL)
 			continue;
 
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rt = in6_rtalloc1((struct sockaddr *)&sin6, 0,
 		    RTF_RNH_LOCKED, fibnum);
 		if (rt) {
 			if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC))
 				rt_expunge(rnh, rt);
 			RTFREE_LOCKED(rt);
 		}
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 }
 
 /*
  * Rejuvenate this function for routing operations related
  * processing.
  */
 void
 nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct sockaddr_in6 *gateway;
 	struct nd_defrouter *dr;
 	struct ifnet *ifp;
 
 	gateway = (struct sockaddr_in6 *)rt->rt_gateway;
 	ifp = rt->rt_ifp;
 
 	switch (req) {
 	case RTM_ADD:
 		break;
 
 	case RTM_DELETE:
 		if (!ifp)
 			return;
 		/*
 		 * Only indirect routes are interesting.
 		 */
 		if ((rt->rt_flags & RTF_GATEWAY) == 0)
 			return;
 		/*
 		 * check for default route
 		 */
 		if (IN6_ARE_ADDR_EQUAL(&in6addr_any, 
 				       &SIN6(rt_key(rt))->sin6_addr)) {
 
 			dr = defrouter_lookup(&gateway->sin6_addr, ifp);
 			if (dr != NULL)
 				dr->installed = 0;
 		}
 		break;
 	}
 }
 
 
 int
 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 {
 	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
 	int error = 0;
 
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return (EPFNOSUPPORT);
 	switch (cmd) {
 	case OSIOCGIFINFO_IN6:
 #define ND	ndi->ndi
 		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
 		bzero(&ND, sizeof(ND));
 		ND.linkmtu = IN6_LINKMTU(ifp);
 		ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
 		ND.basereachable = ND_IFINFO(ifp)->basereachable;
 		ND.reachable = ND_IFINFO(ifp)->reachable;
 		ND.retrans = ND_IFINFO(ifp)->retrans;
 		ND.flags = ND_IFINFO(ifp)->flags;
 		ND.recalctm = ND_IFINFO(ifp)->recalctm;
 		ND.chlim = ND_IFINFO(ifp)->chlim;
 		break;
 	case SIOCGIFINFO_IN6:
 		ND = *ND_IFINFO(ifp);
 		break;
 	case SIOCSIFINFO_IN6:
 		/*
 		 * used to change host variables from userland.
 		 * intented for a use on router to reflect RA configurations.
 		 */
 		/* 0 means 'unspecified' */
 		if (ND.linkmtu != 0) {
 			if (ND.linkmtu < IPV6_MMTU ||
 			    ND.linkmtu > IN6_LINKMTU(ifp)) {
 				error = EINVAL;
 				break;
 			}
 			ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
 		}
 
 		if (ND.basereachable != 0) {
 			int obasereachable = ND_IFINFO(ifp)->basereachable;
 
 			ND_IFINFO(ifp)->basereachable = ND.basereachable;
 			if (ND.basereachable != obasereachable)
 				ND_IFINFO(ifp)->reachable =
 				    ND_COMPUTE_RTIME(ND.basereachable);
 		}
 		if (ND.retrans != 0)
 			ND_IFINFO(ifp)->retrans = ND.retrans;
 		if (ND.chlim != 0)
 			ND_IFINFO(ifp)->chlim = ND.chlim;
 		/* FALLTHROUGH */
 	case SIOCSIFINFO_FLAGS:
 	{
 		struct ifaddr *ifa;
 		struct in6_ifaddr *ia;
 
 		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 		    !(ND.flags & ND6_IFF_IFDISABLED)) {
 			/* ifdisabled 1->0 transision */
 
 			/*
 			 * If the interface is marked as ND6_IFF_IFDISABLED and
 			 * has an link-local address with IN6_IFF_DUPLICATED,
 			 * do not clear ND6_IFF_IFDISABLED.
 			 * See RFC 4862, Section 5.4.5.
 			 */
 			IF_ADDR_RLOCK(ifp);
 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 				if (ifa->ifa_addr->sa_family != AF_INET6)
 					continue;
 				ia = (struct in6_ifaddr *)ifa;
 				if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
 				    IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 					break;
 			}
 			IF_ADDR_RUNLOCK(ifp);
 
 			if (ifa != NULL) {
 				/* LLA is duplicated. */
 				ND.flags |= ND6_IFF_IFDISABLED;
 				log(LOG_ERR, "Cannot enable an interface"
 				    " with a link-local address marked"
 				    " duplicate.\n");
 			} else {
 				ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED;
 				if (ifp->if_flags & IFF_UP)
 					in6_if_up(ifp);
 			}
 		} else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 			    (ND.flags & ND6_IFF_IFDISABLED)) {
 			/* ifdisabled 0->1 transision */
 			/* Mark all IPv6 address as tentative. */
 
 			ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
 			if (V_ip6_dad_count > 0 &&
 			    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
 				IF_ADDR_RLOCK(ifp);
 				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
 				    ifa_link) {
 					if (ifa->ifa_addr->sa_family !=
 					    AF_INET6)
 						continue;
 					ia = (struct in6_ifaddr *)ifa;
 					ia->ia6_flags |= IN6_IFF_TENTATIVE;
 				}
 				IF_ADDR_RUNLOCK(ifp);
 			}
 		}
 
 		if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
 			if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) {
 				/* auto_linklocal 0->1 transision */
 
 				/* If no link-local address on ifp, configure */
 				ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
 				in6_ifattach(ifp, NULL);
 			} else if (!(ND.flags & ND6_IFF_IFDISABLED) &&
 			    ifp->if_flags & IFF_UP) {
 				/*
 				 * When the IF already has
 				 * ND6_IFF_AUTO_LINKLOCAL, no link-local
 				 * address is assigned, and IFF_UP, try to
 				 * assign one.
 				 */
 				IF_ADDR_RLOCK(ifp);
 				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
 				    ifa_link) {
 					if (ifa->ifa_addr->sa_family !=
 					    AF_INET6)
 						continue;
 					ia = (struct in6_ifaddr *)ifa;
 					if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 						break;
 				}
 				IF_ADDR_RUNLOCK(ifp);
 				if (ifa != NULL)
 					/* No LLA is configured. */
 					in6_ifattach(ifp, NULL);
 			}
 		}
 	}
 		ND_IFINFO(ifp)->flags = ND.flags;
 		break;
 #undef ND
 	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
 		/* sync kernel routing table with the default router list */
 		defrouter_reset();
 		defrouter_select();
 		break;
 	case SIOCSPFXFLUSH_IN6:
 	{
 		/* flush all the prefix advertised by routers */
 		struct nd_prefix *pr, *next;
 
 		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
 			struct in6_ifaddr *ia, *ia_next;
 
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue; /* XXX */
 
 			/* do we really have to remove addresses as well? */
 			/* XXXRW: in6_ifaddrhead locking. */
 			TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 			    ia_next) {
 				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 					continue;
 
 				if (ia->ia6_ndpr == pr)
 					in6_purgeaddr(&ia->ia_ifa);
 			}
 			prelist_remove(pr);
 		}
 		break;
 	}
 	case SIOCSRTRFLUSH_IN6:
 	{
 		/* flush all the default routers */
 		struct nd_defrouter *dr, *next;
 
 		defrouter_reset();
 		TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) {
 			defrtrlist_del(dr);
 		}
 		defrouter_select();
 		break;
 	}
 	case SIOCGNBRINFO_IN6:
 	{
 		struct llentry *ln;
 		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
 
 		if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
 			return (error);
 
 		IF_AFDATA_RLOCK(ifp);
 		ln = nd6_lookup(&nb_addr, 0, ifp);
 		IF_AFDATA_RUNLOCK(ifp);
 
 		if (ln == NULL) {
 			error = EINVAL;
 			break;
 		}
 		nbi->state = ln->ln_state;
 		nbi->asked = ln->la_asked;
 		nbi->isrouter = ln->ln_router;
 		if (ln->la_expire == 0)
 			nbi->expire = 0;
 		else
 			nbi->expire = ln->la_expire +
 			    (time_second - time_uptime);
 		LLE_RUNLOCK(ln);
 		break;
 	}
 	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		ndif->ifindex = V_nd6_defifindex;
 		break;
 	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		return (nd6_setdefaultiface(ndif->ifindex));
 	}
 	return (error);
 }
 
 /*
  * Calculates new isRouter value based on provided parameters and
  * returns it.
  */
 static int
 nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr,
     int ln_router)
 {
 
 	/*
 	 * ICMP6 type dependent behavior.
 	 *
 	 * NS: clear IsRouter if new entry
 	 * RS: clear IsRouter
 	 * RA: set IsRouter if there's lladdr
 	 * redir: clear IsRouter if new entry
 	 *
 	 * RA case, (1):
 	 * The spec says that we must set IsRouter in the following cases:
 	 * - If lladdr exist, set IsRouter.  This means (1-5).
 	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
 	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
 	 * A quetion arises for (1) case.  (1) case has no lladdr in the
 	 * neighbor cache, this is similar to (6).
 	 * This case is rare but we figured that we MUST NOT set IsRouter.
 	 *
 	 *   is_new  old_addr new_addr 	    NS  RS  RA	redir
 	 *							D R
 	 *	0	n	n	(1)	c   ?     s
 	 *	0	y	n	(2)	c   s     s
 	 *	0	n	y	(3)	c   s     s
 	 *	0	y	y	(4)	c   s     s
 	 *	0	y	y	(5)	c   s     s
 	 *	1	--	n	(6) c	c	c s
 	 *	1	--	y	(7) c	c   s	c s
 	 *
 	 *					(c=clear s=set)
 	 */
 	switch (type & 0xff) {
 	case ND_NEIGHBOR_SOLICIT:
 		/*
 		 * New entry must have is_router flag cleared.
 		 */
 		if (is_new)					/* (6-7) */
 			ln_router = 0;
 		break;
 	case ND_REDIRECT:
 		/*
 		 * If the icmp is a redirect to a better router, always set the
 		 * is_router flag.  Otherwise, if the entry is newly created,
 		 * clear the flag.  [RFC 2461, sec 8.3]
 		 */
 		if (code == ND_REDIRECT_ROUTER)
 			ln_router = 1;
 		else {
 			if (is_new)				/* (6-7) */
 				ln_router = 0;
 		}
 		break;
 	case ND_ROUTER_SOLICIT:
 		/*
 		 * is_router flag must always be cleared.
 		 */
 		ln_router = 0;
 		break;
 	case ND_ROUTER_ADVERT:
 		/*
 		 * Mark an entry with lladdr as a router.
 		 */
 		if ((!is_new && (old_addr || new_addr)) ||	/* (2-5) */
 		    (is_new && new_addr)) {			/* (7) */
 			ln_router = 1;
 		}
 		break;
 	}
 
 	return (ln_router);
 }
 
 /*
  * Create neighbor cache entry and cache link-layer address,
  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
  *
  * type - ICMP6 type
  * code - type dependent information
  *
  */
 void
 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
     int lladdrlen, int type, int code)
 {
 	struct llentry *ln = NULL, *ln_tmp;
 	int is_newentry;
 	int do_update;
 	int olladdr;
 	int llchange;
 	int flags;
 	uint16_t router = 0;
 	struct sockaddr_in6 sin6;
 	struct mbuf *chain = NULL;
 
 	IF_AFDATA_UNLOCK_ASSERT(ifp);
 
 	KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__));
 	KASSERT(from != NULL, ("%s: from == NULL", __func__));
 
 	/* nothing must be updated for unspecified address */
 	if (IN6_IS_ADDR_UNSPECIFIED(from))
 		return;
 
 	/*
 	 * Validation about ifp->if_addrlen and lladdrlen must be done in
 	 * the caller.
 	 *
 	 * XXX If the link does not have link-layer adderss, what should
 	 * we do? (ifp->if_addrlen == 0)
 	 * Spec says nothing in sections for RA, RS and NA.  There's small
 	 * description on it in NS section (RFC 2461 7.2.3).
 	 */
 	flags = lladdr ? LLE_EXCLUSIVE : 0;
 	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(from, flags, ifp);
 	IF_AFDATA_RUNLOCK(ifp);
 	is_newentry = 0;
 	if (ln == NULL) {
 		flags |= LLE_EXCLUSIVE;
 		ln = nd6_alloc(from, 0, ifp);
 		if (ln == NULL)
 			return;
 
 		/*
 		 * Since we already know all the data for the new entry,
 		 * fill it before insertion.
 		 */
 		if (lladdr != NULL)
 			lltable_set_entry_addr(ifp, ln, lladdr);
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(ln);
 		/* Prefer any existing lle over newly-created one */
 		ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp);
 		if (ln_tmp == NULL)
 			lltable_link_entry(LLTABLE6(ifp), ln);
 		IF_AFDATA_WUNLOCK(ifp);
 		if (ln_tmp == NULL) {
 			/* No existing lle, mark as new entry (6,7) */
 			is_newentry = 1;
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 			if (lladdr != NULL)	/* (7) */
 				EVENTHANDLER_INVOKE(lle_event, ln,
 				    LLENTRY_RESOLVED);
 		} else {
 			lltable_free_entry(LLTABLE6(ifp), ln);
 			ln = ln_tmp;
 			ln_tmp = NULL;
 		}
 	} 
 	/* do nothing if static ndp is set */
 	if ((ln->la_flags & LLE_STATIC)) {
 		if (flags & LLE_EXCLUSIVE)
 			LLE_WUNLOCK(ln);
 		else
 			LLE_RUNLOCK(ln);
 		return;
 	}
 
 	olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
 	if (olladdr && lladdr) {
 		llchange = bcmp(lladdr, &ln->ll_addr,
 		    ifp->if_addrlen);
 	} else if (!olladdr && lladdr)
 		llchange = 1;
 	else
 		llchange = 0;
 
 	/*
 	 * newentry olladdr  lladdr  llchange	(*=record)
 	 *	0	n	n	--	(1)
 	 *	0	y	n	--	(2)
 	 *	0	n	y	y	(3) * STALE
 	 *	0	y	y	n	(4) *
 	 *	0	y	y	y	(5) * STALE
 	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
 	 *	1	--	y	--	(7) * STALE
 	 */
 
 	do_update = 0;
 	if (is_newentry == 0 && llchange != 0) {
 		do_update = 1;	/* (3,5) */
 
 		/*
 		 * Record source link-layer address
 		 * XXX is it dependent to ifp->if_type?
 		 */
 		lltable_set_entry_addr(ifp, ln, lladdr);
 		nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 
 		if (ln->la_hold != NULL)
 			nd6_grab_holdchain(ln, &chain, &sin6);
 	}
 
 	/* Calculates new router status */
 	router = nd6_is_router(type, code, is_newentry, olladdr,
 	    lladdr != NULL ? 1 : 0, ln->ln_router);
 
 	ln->ln_router = router;
 	/* Mark non-router redirects with special flag */
 	if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER)
 		ln->la_flags |= LLE_REDIRECT;
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WUNLOCK(ln);
 	else
 		LLE_RUNLOCK(ln);
 
 	if (chain != NULL)
 		nd6_flush_holdchain(ifp, ifp, chain, &sin6);
 	
 	/*
 	 * When the link-layer address of a router changes, select the
 	 * best router again.  In particular, when the neighbor entry is newly
 	 * created, it might affect the selection policy.
 	 * Question: can we restrict the first condition to the "is_newentry"
 	 * case?
 	 * XXX: when we hear an RA from a new router with the link-layer
 	 * address option, defrouter_select() is called twice, since
 	 * defrtrlist_update called the function as well.  However, I believe
 	 * we can compromise the overhead, since it only happens the first
 	 * time.
 	 * XXX: although defrouter_select() should not have a bad effect
 	 * for those are not autoconfigured hosts, we explicitly avoid such
 	 * cases for safety.
 	 */
 	if ((do_update || is_newentry) && router &&
 	    ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		/*
 		 * guaranteed recursion
 		 */
 		defrouter_select();
 	}
 }
 
 static void
 nd6_slowtimo(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
 	struct nd_ifinfo *nd6if;
 	struct ifnet *ifp;
 
 	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, curvnet);
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (ifp->if_afdata[AF_INET6] == NULL)
 			continue;
 		nd6if = ND_IFINFO(ifp);
 		if (nd6if->basereachable && /* already initialized */
 		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
 			/*
 			 * Since reachable time rarely changes by router
 			 * advertisements, we SHOULD insure that a new random
 			 * value gets recomputed at least once every few hours.
 			 * (RFC 2461, 6.3.4)
 			 */
 			nd6if->recalctm = V_nd6_recalc_reachtm_interval;
 			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 		}
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 	CURVNET_RESTORE();
 }
 
 void
 nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
     struct sockaddr_in6 *sin6)
 {
 
 	LLE_WLOCK_ASSERT(ln);
 
 	*chain = ln->la_hold;
 	ln->la_hold = NULL;
 	lltable_fill_sa_entry(ln, (struct sockaddr *)sin6);
 
 	if (ln->ln_state == ND6_LLINFO_STALE) {
 
 		/*
 		 * The first time we send a packet to a
 		 * neighbor whose entry is STALE, we have
 		 * to change the state to DELAY and a sets
 		 * a timer to expire in DELAY_FIRST_PROBE_TIME
 		 * seconds to ensure do neighbor unreachability
 		 * detection on expiration.
 		 * (RFC 2461 7.3.3)
 		 */
 		nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY);
 	}
 }
 
 int
 nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
-    struct sockaddr_in6 *dst)
+    struct sockaddr_in6 *dst, struct route *ro)
 {
 	int error;
 	int ip6len;
 	struct ip6_hdr *ip6;
 	struct m_tag *mtag;
 
 #ifdef MAC
 	mac_netinet6_nd6_send(ifp, m);
 #endif
 
 	/*
 	 * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
 	 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
 	 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
 	 * to be diverted to user space.  When re-injected into the kernel,
 	 * send_output() will directly dispatch them to the outgoing interface.
 	 */
 	if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
 		if (mtag != NULL) {
 			ip6 = mtod(m, struct ip6_hdr *);
 			ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
 			/* Use the SEND socket */
 			error = send_sendso_input_hook(m, ifp, SND_OUT,
 			    ip6len);
 			/* -1 == no app on SEND socket */
 			if (error == 0 || error != -1)
 			    return (error);
 		}
 	}
 
 	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
 	    mtod(m, struct ip6_hdr *));
 
 	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
 		origifp = ifp;
 
-	error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, NULL);
+	error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro);
 	return (error);
 }
 
 /*
  * Do L2 address resolution for @sa_dst address. Stores found
  * address in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * If destination LLE does not exists or lle state modification
  * is required, call "slow" version.
  *
  * Return values:
  * - 0 on success (address copied to buffer).
  * - EWOULDBLOCK (no local error, but address is still unresolved)
  * - other errors (alloc failure, etc)
  */
 int
 nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
     const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags)
 {
 	struct llentry *ln = NULL;
 	const struct sockaddr_in6 *dst6;
 
 	if (pflags != NULL)
 		*pflags = 0;
 
 	dst6 = (const struct sockaddr_in6 *)sa_dst;
 
 	/* discard the packet if IPv6 operation is disabled on the interface */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
 		m_freem(m);
 		return (ENETDOWN); /* better error? */
 	}
 
 	if (m != NULL && m->m_flags & M_MCAST) {
 		switch (ifp->if_type) {
 		case IFT_ETHER:
 		case IFT_FDDI:
 		case IFT_L2VLAN:
 		case IFT_IEEE80211:
 		case IFT_BRIDGE:
 		case IFT_ISO88025:
 			ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
 						 desten);
 			return (0);
 		default:
 			m_freem(m);
 			return (EAFNOSUPPORT);
 		}
 	}
 
 	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(&dst6->sin6_addr, 0, ifp);
 	IF_AFDATA_RUNLOCK(ifp);
 
 	/*
 	 * Perform fast path for the following cases:
 	 * 1) lle state is REACHABLE
 	 * 2) lle state is DELAY (NS message sent)
 	 *
 	 * Every other case involves lle modification, so we handle
 	 * them separately.
 	 */
 	if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE &&
 	    ln->ln_state != ND6_LLINFO_DELAY)) {
 		/* Fall back to slow processing path */
 		if (ln != NULL)
 			LLE_RUNLOCK(ln);
 		return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
 	}
 
 
 	bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
 	if (pflags != NULL)
 		*pflags = ln->la_flags;
 	LLE_RUNLOCK(ln);
 	return (0);
 }
 
 
 /*
  * Do L2 address resolution for @sa_dst address. Stores found
  * address in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * Heavy version.
  * Function assume that destination LLE does not exist,
  * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline int
 nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m,
     const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags)
 {
 	struct llentry *lle = NULL, *lle_tmp;
 	struct in6_addr *psrc, src;
 	int send_ns;
 
 	/*
 	 * Address resolution or Neighbor Unreachability Detection
 	 * for the next hop.
 	 * At this point, the destination of the packet must be a unicast
 	 * or an anycast address(i.e. not a multicast).
 	 */
 	if (lle == NULL) {
 		IF_AFDATA_RLOCK(ifp);
 		lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
 		IF_AFDATA_RUNLOCK(ifp);
 		if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 			/*
 			 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 			 * the condition below is not very efficient.  But we believe
 			 * it is tolerable, because this should be a rare case.
 			 */
 			lle = nd6_alloc(&dst->sin6_addr, 0, ifp);
 			if (lle == NULL) {
 				char ip6buf[INET6_ADDRSTRLEN];
 				log(LOG_DEBUG,
 				    "nd6_output: can't allocate llinfo for %s "
 				    "(ln=%p)\n",
 				    ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
 				m_freem(m);
 				return (ENOBUFS);
 			}
 
 			IF_AFDATA_WLOCK(ifp);
 			LLE_WLOCK(lle);
 			/* Prefer any existing entry over newly-created one */
 			lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
 			if (lle_tmp == NULL)
 				lltable_link_entry(LLTABLE6(ifp), lle);
 			IF_AFDATA_WUNLOCK(ifp);
 			if (lle_tmp != NULL) {
 				lltable_free_entry(LLTABLE6(ifp), lle);
 				lle = lle_tmp;
 				lle_tmp = NULL;
 			}
 		}
 	} 
 	if (lle == NULL) {
 		if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 
 		if (m != NULL)
 			m_freem(m);
 		return (ENOBUFS);
 	}
 
 	LLE_WLOCK_ASSERT(lle);
 
 	/*
 	 * The first time we send a packet to a neighbor whose entry is
 	 * STALE, we have to change the state to DELAY and a sets a timer to
 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
 	 * neighbor unreachability detection on expiration.
 	 * (RFC 2461 7.3.3)
 	 */
 	if (lle->ln_state == ND6_LLINFO_STALE)
 		nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
 
 	/*
 	 * If the neighbor cache entry has a state other than INCOMPLETE
 	 * (i.e. its link-layer address is already resolved), just
 	 * send the packet.
 	 */
 	if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
 		bcopy(&lle->ll_addr, desten, ifp->if_addrlen);
 		if (pflags != NULL)
 			*pflags = lle->la_flags;
 		LLE_WUNLOCK(lle);
 		return (0);
 	}
 
 	/*
 	 * There is a neighbor cache entry, but no ethernet address
 	 * response yet.  Append this latest packet to the end of the
 	 * packet queue in the mbuf, unless the number of the packet
 	 * does not exceed nd6_maxqueuelen.  When it exceeds nd6_maxqueuelen,
 	 * the oldest packet in the queue will be removed.
 	 */
 
 	if (lle->la_hold != NULL) {
 		struct mbuf *m_hold;
 		int i;
 		
 		i = 0;
 		for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
 			i++;
 			if (m_hold->m_nextpkt == NULL) {
 				m_hold->m_nextpkt = m;
 				break;
 			}
 		}
 		while (i >= V_nd6_maxqueuelen) {
 			m_hold = lle->la_hold;
 			lle->la_hold = lle->la_hold->m_nextpkt;
 			m_freem(m_hold);
 			i--;
 		}
 	} else {
 		lle->la_hold = m;
 	}
 
 	/*
 	 * If there has been no NS for the neighbor after entering the
 	 * INCOMPLETE state, send the first solicitation.
 	 * Note that for newly-created lle la_asked will be 0,
 	 * so we will transition from ND6_LLINFO_NOSTATE to
 	 * ND6_LLINFO_INCOMPLETE state here.
 	 */
 	psrc = NULL;
 	send_ns = 0;
 	if (lle->la_asked == 0) {
 		lle->la_asked++;
 		send_ns = 1;
 		psrc = nd6_llinfo_get_holdsrc(lle, &src);
 
 		nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE);
 	}
 	LLE_WUNLOCK(lle);
 	if (send_ns != 0)
 		nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL);
 
 	return (EWOULDBLOCK);
 }
 
 
 int
 nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
     struct sockaddr_in6 *dst)
 {
 	struct mbuf *m, *m_head;
 	struct ifnet *outifp;
 	int error = 0;
 
 	m_head = chain;
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		outifp = origifp;
 	else
 		outifp = ifp;
 	
 	while (m_head) {
 		m = m_head;
 		m_head = m_head->m_nextpkt;
-		error = nd6_output_ifp(ifp, origifp, m, dst);
+		error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
 	}
 
 	/*
 	 * XXX
 	 * note that intermediate errors are blindly ignored
 	 */
 	return (error);
 }	
 
 static int
 nd6_need_cache(struct ifnet *ifp)
 {
 	/*
 	 * XXX: we currently do not make neighbor cache on any interface
 	 * other than ARCnet, Ethernet, FDDI and GIF.
 	 *
 	 * RFC2893 says:
 	 * - unidirectional tunnels needs no ND
 	 */
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_IEEE1394:
 	case IFT_L2VLAN:
 	case IFT_IEEE80211:
 	case IFT_INFINIBAND:
 	case IFT_BRIDGE:
 	case IFT_PROPVIRTUAL:
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 /*
  * Add pernament ND6 link-layer record for given
  * interface address.
  *
  * Very similar to IPv4 arp_ifinit(), but:
  * 1) IPv6 DAD is performed in different place
  * 2) It is called by IPv6 protocol stack in contrast to
  * arp_ifinit() which is typically called in SIOCSIFADDR
  * driver ioctl handler.
  *
  */
 int
 nd6_add_ifa_lle(struct in6_ifaddr *ia)
 {
 	struct ifnet *ifp;
 	struct llentry *ln, *ln_tmp;
 	struct sockaddr *dst;
 
 	ifp = ia->ia_ifa.ifa_ifp;
 	if (nd6_need_cache(ifp) == 0)
 		return (0);
 
 	ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
 	dst = (struct sockaddr *)&ia->ia_addr;
 	ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst);
 	if (ln == NULL)
 		return (ENOBUFS);
 
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(ln);
 	/* Unlink any entry if exists */
 	ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst);
 	if (ln_tmp != NULL)
 		lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
 	lltable_link_entry(LLTABLE6(ifp), ln);
 	IF_AFDATA_WUNLOCK(ifp);
 
 	if (ln_tmp != NULL)
 		EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED);
 	EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 
 	LLE_WUNLOCK(ln);
 	if (ln_tmp != NULL)
 		llentry_free(ln_tmp);
 
 	return (0);
 }
 
 /*
  * Removes either all lle entries for given @ia, or lle
  * corresponding to @ia address.
  */
 void
 nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
 {
 	struct sockaddr_in6 mask, addr;
 	struct sockaddr *saddr, *smask;
 	struct ifnet *ifp;
 
 	ifp = ia->ia_ifa.ifa_ifp;
 	memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
 	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
 	saddr = (struct sockaddr *)&addr;
 	smask = (struct sockaddr *)&mask;
 
 	if (all != 0)
 		lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC);
 	else
 		lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
 }
 
 static void 
 clear_llinfo_pqueue(struct llentry *ln)
 {
 	struct mbuf *m_hold, *m_hold_next;
 
 	for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
 		m_hold_next = m_hold->m_nextpkt;
 		m_freem(m_hold);
 	}
 
 	ln->la_hold = NULL;
 	return;
 }
 
 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
 static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet6_icmp6);
 #endif
 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 	CTLFLAG_RD, nd6_sysctl_drlist, "");
 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 	CTLFLAG_RD, nd6_sysctl_prlist, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
 SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
 
 static int
 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 {
 	struct in6_defrouter d;
 	struct nd_defrouter *dr;
 	int error;
 
 	if (req->newptr)
 		return (EPERM);
 
 	bzero(&d, sizeof(d));
 	d.rtaddr.sin6_family = AF_INET6;
 	d.rtaddr.sin6_len = sizeof(d.rtaddr);
 
 	/*
 	 * XXX locking
 	 */
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		d.rtaddr.sin6_addr = dr->rtaddr;
 		error = sa6_recoverscope(&d.rtaddr);
 		if (error != 0)
 			return (error);
 		d.flags = dr->flags;
 		d.rtlifetime = dr->rtlifetime;
 		d.expire = dr->expire + (time_second - time_uptime);
 		d.if_index = dr->ifp->if_index;
 		error = SYSCTL_OUT(req, &d, sizeof(d));
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 static int
 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 {
 	struct in6_prefix p;
 	struct sockaddr_in6 s6;
 	struct nd_prefix *pr;
 	struct nd_pfxrouter *pfr;
 	time_t maxexpire;
 	int error;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	if (req->newptr)
 		return (EPERM);
 
 	bzero(&p, sizeof(p));
 	p.origin = PR_ORIG_RA;
 	bzero(&s6, sizeof(s6));
 	s6.sin6_family = AF_INET6;
 	s6.sin6_len = sizeof(s6);
 
 	/*
 	 * XXX locking
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		p.prefix = pr->ndpr_prefix;
 		if (sa6_recoverscope(&p.prefix)) {
 			log(LOG_ERR, "scope error in prefix list (%s)\n",
 			    ip6_sprintf(ip6buf, &p.prefix.sin6_addr));
 			/* XXX: press on... */
 		}
 		p.raflags = pr->ndpr_raf;
 		p.prefixlen = pr->ndpr_plen;
 		p.vltime = pr->ndpr_vltime;
 		p.pltime = pr->ndpr_pltime;
 		p.if_index = pr->ndpr_ifp->if_index;
 		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 			p.expire = 0;
 		else {
 			/* XXX: we assume time_t is signed. */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
 				p.expire = pr->ndpr_lastupdate +
 				    pr->ndpr_vltime +
 				    (time_second - time_uptime);
 			else
 				p.expire = maxexpire;
 		}
 		p.refcnt = pr->ndpr_refcnt;
 		p.flags = pr->ndpr_stateflags;
 		p.advrtrs = 0;
 		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
 			p.advrtrs++;
 		error = SYSCTL_OUT(req, &p, sizeof(p));
 		if (error != 0)
 			return (error);
 		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
 			s6.sin6_addr = pfr->router->rtaddr;
 			if (sa6_recoverscope(&s6))
 				log(LOG_ERR,
 				    "scope error in prefix list (%s)\n",
 				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
 			error = SYSCTL_OUT(req, &s6, sizeof(s6));
 			if (error != 0)
 				return (error);
 		}
 	}
 	return (0);
 }
Index: projects/powernv/netinet6/nd6.h
===================================================================
--- projects/powernv/netinet6/nd6.h	(revision 290990)
+++ projects/powernv/netinet6/nd6.h	(revision 290991)
@@ -1,458 +1,458 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $
  * $FreeBSD$
  */
 
 #ifndef _NETINET6_ND6_H_
 #define _NETINET6_ND6_H_
 
 /* see net/route.h, or net/if_inarp.h */
 #ifndef RTF_ANNOUNCE
 #define RTF_ANNOUNCE	RTF_PROTO2
 #endif
 
 #include <sys/queue.h>
 #include <sys/callout.h>
 
 struct llentry;
 
 #define ND6_LLINFO_NOSTATE	-2
 /*
  * We don't need the WAITDELETE state any more, but we keep the definition
  * in a comment line instead of removing it. This is necessary to avoid
  * unintentionally reusing the value for another purpose, which might
  * affect backward compatibility with old applications.
  * (20000711 jinmei@kame.net)
  */
 /* #define ND6_LLINFO_WAITDELETE	-1 */
 #define ND6_LLINFO_INCOMPLETE	0
 #define ND6_LLINFO_REACHABLE	1
 #define ND6_LLINFO_STALE	2
 #define ND6_LLINFO_DELAY	3
 #define ND6_LLINFO_PROBE	4
 
 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE)
 #define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE))
 
 struct nd_ifinfo {
 	u_int32_t linkmtu;		/* LinkMTU */
 	u_int32_t maxmtu;		/* Upper bound of LinkMTU */
 	u_int32_t basereachable;	/* BaseReachableTime */
 	u_int32_t reachable;		/* Reachable Time */
 	u_int32_t retrans;		/* Retrans Timer */
 	u_int32_t flags;		/* Flags */
 	int recalctm;			/* BaseReacable re-calculation timer */
 	u_int8_t chlim;			/* CurHopLimit */
 	u_int8_t initialized; /* Flag to see the entry is initialized */
 	/* the following 3 members are for privacy extension for addrconf */
 	u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
 	u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
 	u_int8_t randomid[8];	/* current random ID */
 };
 
 #define ND6_IFF_PERFORMNUD	0x1
 #define ND6_IFF_ACCEPT_RTADV	0x2
 #define ND6_IFF_PREFER_SOURCE	0x4 /* Not used in FreeBSD. */
 #define ND6_IFF_IFDISABLED	0x8 /* IPv6 operation is disabled due to
 				     * DAD failure.  (XXX: not ND-specific)
 				     */
 #define ND6_IFF_DONT_SET_IFROUTE	0x10
 #define ND6_IFF_AUTO_LINKLOCAL	0x20
 #define	ND6_IFF_NO_RADR		0x40
 #define ND6_IFF_NO_PREFER_IFACE	0x80 /* XXX: not related to ND. */
 #define ND6_IFF_NO_DAD		0x100
 
 #ifdef _KERNEL
 #define ND_IFINFO(ifp) \
 	(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo)
 #define IN6_LINKMTU(ifp) \
 	((ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) \
 	    ? ND_IFINFO(ifp)->linkmtu \
 	    : ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) \
 		? ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu))
 #endif
 
 struct in6_nbrinfo {
 	char ifname[IFNAMSIZ];	/* if name, e.g. "en0" */
 	struct in6_addr addr;	/* IPv6 address of the neighbor */
 	long	asked;		/* number of queries already sent for this addr */
 	int	isrouter;	/* if it acts as a router */
 	int	state;		/* reachability state */
 	int	expire;		/* lifetime for NDP state transition */
 };
 
 #define DRLSTSIZ 10
 #define PRLSTSIZ 10
 struct	in6_drlist {
 	char ifname[IFNAMSIZ];
 	struct {
 		struct	in6_addr rtaddr;
 		u_char	flags;
 		u_short	rtlifetime;
 		u_long	expire;
 		u_short if_index;
 	} defrouter[DRLSTSIZ];
 };
 
 struct	in6_defrouter {
 	struct	sockaddr_in6 rtaddr;
 	u_char	flags;
 	u_short	rtlifetime;
 	u_long	expire;
 	u_short if_index;
 };
 
 #ifdef _KERNEL
 struct	in6_oprlist {
 	char ifname[IFNAMSIZ];
 	struct {
 		struct	in6_addr prefix;
 		struct prf_ra raflags;
 		u_char	prefixlen;
 		u_char	origin;
 		u_long vltime;
 		u_long pltime;
 		u_long expire;
 		u_short if_index;
 		u_short advrtrs; /* number of advertisement routers */
 		struct	in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */
 	} prefix[PRLSTSIZ];
 };
 #endif
 
 struct	in6_prlist {
 	char ifname[IFNAMSIZ];
 	struct {
 		struct	in6_addr prefix;
 		struct prf_ra raflags;
 		u_char	prefixlen;
 		u_char	origin;
 		u_int32_t vltime;
 		u_int32_t pltime;
 		time_t expire;
 		u_short if_index;
 		u_short advrtrs; /* number of advertisement routers */
 		struct	in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */
 	} prefix[PRLSTSIZ];
 };
 
 struct in6_prefix {
 	struct	sockaddr_in6 prefix;
 	struct prf_ra raflags;
 	u_char	prefixlen;
 	u_char	origin;
 	u_int32_t vltime;
 	u_int32_t pltime;
 	time_t expire;
 	u_int32_t flags;
 	int refcnt;
 	u_short if_index;
 	u_short advrtrs; /* number of advertisement routers */
 	/* struct sockaddr_in6 advrtr[] */
 };
 
 #ifdef _KERNEL
 struct	in6_ondireq {
 	char ifname[IFNAMSIZ];
 	struct {
 		u_int32_t linkmtu;	/* LinkMTU */
 		u_int32_t maxmtu;	/* Upper bound of LinkMTU */
 		u_int32_t basereachable; /* BaseReachableTime */
 		u_int32_t reachable;	/* Reachable Time */
 		u_int32_t retrans;	/* Retrans Timer */
 		u_int32_t flags;	/* Flags */
 		int recalctm;		/* BaseReacable re-calculation timer */
 		u_int8_t chlim;		/* CurHopLimit */
 		u_int8_t receivedra;
 	} ndi;
 };
 #endif
 
 struct	in6_ndireq {
 	char ifname[IFNAMSIZ];
 	struct nd_ifinfo ndi;
 };
 
 struct	in6_ndifreq {
 	char ifname[IFNAMSIZ];
 	u_long ifindex;
 };
 
 /* Prefix status */
 #define NDPRF_ONLINK		0x1
 #define NDPRF_DETACHED		0x2
 
 /* protocol constants */
 #define MAX_RTR_SOLICITATION_DELAY	1	/* 1sec */
 #define RTR_SOLICITATION_INTERVAL	4	/* 4sec */
 #define MAX_RTR_SOLICITATIONS		3
 
 #define ND6_INFINITE_LIFETIME		0xffffffff
 
 #ifdef _KERNEL
 /* node constants */
 #define MAX_REACHABLE_TIME		3600000	/* msec */
 #define REACHABLE_TIME			30000	/* msec */
 #define RETRANS_TIMER			1000	/* msec */
 #define MIN_RANDOM_FACTOR		512	/* 1024 * 0.5 */
 #define MAX_RANDOM_FACTOR		1536	/* 1024 * 1.5 */
 #define DEF_TEMP_VALID_LIFETIME		604800	/* 1 week */
 #define DEF_TEMP_PREFERRED_LIFETIME	86400	/* 1 day */
 #define TEMPADDR_REGEN_ADVANCE		5	/* sec */
 #define MAX_TEMP_DESYNC_FACTOR		600	/* 10 min */
 #define ND_COMPUTE_RTIME(x) \
 		(((MIN_RANDOM_FACTOR * (x >> 10)) + (arc4random() & \
 		((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
 
 TAILQ_HEAD(nd_drhead, nd_defrouter);
 struct	nd_defrouter {
 	TAILQ_ENTRY(nd_defrouter) dr_entry;
 	struct	in6_addr rtaddr;
 	u_char	flags;		/* flags on RA message */
 	u_short	rtlifetime;
 	u_long	expire;
 	struct  ifnet *ifp;
 	int	installed;	/* is installed into kernel routing table */
 };
 
 struct nd_prefixctl {
 	struct ifnet *ndpr_ifp;
 
 	/* prefix */
 	struct sockaddr_in6 ndpr_prefix;
 	u_char	ndpr_plen;
 
 	u_int32_t ndpr_vltime;	/* advertised valid lifetime */
 	u_int32_t ndpr_pltime;	/* advertised preferred lifetime */
 
 	struct prf_ra ndpr_flags;
 };
 
 
 struct nd_prefix {
 	struct ifnet *ndpr_ifp;
 	LIST_ENTRY(nd_prefix) ndpr_entry;
 	struct sockaddr_in6 ndpr_prefix;	/* prefix */
 	struct in6_addr ndpr_mask; /* netmask derived from the prefix */
 
 	u_int32_t ndpr_vltime;	/* advertised valid lifetime */
 	u_int32_t ndpr_pltime;	/* advertised preferred lifetime */
 
 	time_t ndpr_expire;	/* expiration time of the prefix */
 	time_t ndpr_preferred;	/* preferred time of the prefix */
 	time_t ndpr_lastupdate; /* reception time of last advertisement */
 
 	struct prf_ra ndpr_flags;
 	u_int32_t ndpr_stateflags; /* actual state flags */
 	/* list of routers that advertise the prefix: */
 	LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
 	u_char	ndpr_plen;
 	int	ndpr_refcnt;	/* reference couter from addresses */
 };
 
 #define ndpr_raf		ndpr_flags
 #define ndpr_raf_onlink		ndpr_flags.onlink
 #define ndpr_raf_auto		ndpr_flags.autonomous
 #define ndpr_raf_router		ndpr_flags.router
 
 /*
  * Message format for use in obtaining information about prefixes
  * from inet6 sysctl function
  */
 struct inet6_ndpr_msghdr {
 	u_short	inpm_msglen;	/* to skip over non-understood messages */
 	u_char	inpm_version;	/* future binary compatibility */
 	u_char	inpm_type;	/* message type */
 	struct in6_addr inpm_prefix;
 	u_long	prm_vltim;
 	u_long	prm_pltime;
 	u_long	prm_expire;
 	u_long	prm_preferred;
 	struct in6_prflags prm_flags;
 	u_short	prm_index;	/* index for associated ifp */
 	u_char	prm_plen;	/* length of prefix in bits */
 };
 
 #define prm_raf_onlink		prm_flags.prf_ra.onlink
 #define prm_raf_auto		prm_flags.prf_ra.autonomous
 
 #define prm_statef_onlink	prm_flags.prf_state.onlink
 
 #define prm_rrf_decrvalid	prm_flags.prf_rr.decrvalid
 #define prm_rrf_decrprefd	prm_flags.prf_rr.decrprefd
 
 struct nd_pfxrouter {
 	LIST_ENTRY(nd_pfxrouter) pfr_entry;
 	struct nd_defrouter *router;
 };
 
 LIST_HEAD(nd_prhead, nd_prefix);
 
 /* nd6.c */
 VNET_DECLARE(int, nd6_prune);
 VNET_DECLARE(int, nd6_delay);
 VNET_DECLARE(int, nd6_umaxtries);
 VNET_DECLARE(int, nd6_mmaxtries);
 VNET_DECLARE(int, nd6_useloopback);
 VNET_DECLARE(int, nd6_maxnudhint);
 VNET_DECLARE(int, nd6_gctimer);
 VNET_DECLARE(struct nd_drhead, nd_defrouter);
 VNET_DECLARE(struct nd_prhead, nd_prefix);
 VNET_DECLARE(int, nd6_debug);
 VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
 #define	V_nd6_prune			VNET(nd6_prune)
 #define	V_nd6_delay			VNET(nd6_delay)
 #define	V_nd6_umaxtries			VNET(nd6_umaxtries)
 #define	V_nd6_mmaxtries			VNET(nd6_mmaxtries)
 #define	V_nd6_useloopback		VNET(nd6_useloopback)
 #define	V_nd6_maxnudhint		VNET(nd6_maxnudhint)
 #define	V_nd6_gctimer			VNET(nd6_gctimer)
 #define	V_nd_defrouter			VNET(nd_defrouter)
 #define	V_nd_prefix			VNET(nd_prefix)
 #define	V_nd6_debug			VNET(nd6_debug)
 #define	V_nd6_onlink_ns_rfc4861		VNET(nd6_onlink_ns_rfc4861)
 
 #define nd6log(x)	do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
 
 VNET_DECLARE(struct callout, nd6_timer_ch);
 #define	V_nd6_timer_ch			VNET(nd6_timer_ch)
 
 /* nd6_rtr.c */
 VNET_DECLARE(int, nd6_defifindex);
 VNET_DECLARE(int, ip6_desync_factor);	/* seconds */
 VNET_DECLARE(u_int32_t, ip6_temp_preferred_lifetime); /* seconds */
 VNET_DECLARE(u_int32_t, ip6_temp_valid_lifetime); /* seconds */
 VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */
 #define	V_nd6_defifindex		VNET(nd6_defifindex)
 #define	V_ip6_desync_factor		VNET(ip6_desync_factor)
 #define	V_ip6_temp_preferred_lifetime	VNET(ip6_temp_preferred_lifetime)
 #define	V_ip6_temp_valid_lifetime	VNET(ip6_temp_valid_lifetime)
 #define	V_ip6_temp_regen_advance	VNET(ip6_temp_regen_advance)
 
 union nd_opts {
 	struct nd_opt_hdr *nd_opt_array[16];	/* max = ND_OPT_NONCE */
 	struct {
 		struct nd_opt_hdr *zero;
 		struct nd_opt_hdr *src_lladdr;
 		struct nd_opt_hdr *tgt_lladdr;
 		struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
 		struct nd_opt_rd_hdr *rh;
 		struct nd_opt_mtu *mtu;
 		struct nd_opt_hdr *__res6;
 		struct nd_opt_hdr *__res7;
 		struct nd_opt_hdr *__res8;
 		struct nd_opt_hdr *__res9;
 		struct nd_opt_hdr *__res10;
 		struct nd_opt_hdr *__res11;
 		struct nd_opt_hdr *__res12;
 		struct nd_opt_hdr *__res13;
 		struct nd_opt_nonce *nonce;
 		struct nd_opt_hdr *__res15;
 		struct nd_opt_hdr *search;	/* multiple opts */
 		struct nd_opt_hdr *last;	/* multiple opts */
 		int done;
 		struct nd_opt_prefix_info *pi_end;/* multiple opts, end */
 	} nd_opt_each;
 };
 #define nd_opts_src_lladdr	nd_opt_each.src_lladdr
 #define nd_opts_tgt_lladdr	nd_opt_each.tgt_lladdr
 #define nd_opts_pi		nd_opt_each.pi_beg
 #define nd_opts_pi_end		nd_opt_each.pi_end
 #define nd_opts_rh		nd_opt_each.rh
 #define nd_opts_mtu		nd_opt_each.mtu
 #define nd_opts_nonce		nd_opt_each.nonce
 #define nd_opts_search		nd_opt_each.search
 #define nd_opts_last		nd_opt_each.last
 #define nd_opts_done		nd_opt_each.done
 
 /* XXX: need nd6_var.h?? */
 /* nd6.c */
 void nd6_init(void);
 #ifdef VIMAGE
 void nd6_destroy(void);
 #endif
 struct nd_ifinfo *nd6_ifattach(struct ifnet *);
 void nd6_ifdetach(struct nd_ifinfo *);
 int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *);
 void nd6_option_init(void *, int, union nd_opts *);
 struct nd_opt_hdr *nd6_option(union nd_opts *);
 int nd6_options(union nd_opts *);
 struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *);
 struct llentry *nd6_alloc(const struct in6_addr *, int, struct ifnet *);
 void nd6_setmtu(struct ifnet *);
 void nd6_llinfo_setstate(struct llentry *lle, int newstate);
 void nd6_timer(void *);
 void nd6_purge(struct ifnet *);
 int nd6_resolve(struct ifnet *, int, struct mbuf *,
     const struct sockaddr *, u_char *, uint32_t *);
 int nd6_ioctl(u_long, caddr_t, struct ifnet *);
 void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
 	char *, int, int, int);
 void nd6_grab_holdchain(struct llentry *, struct mbuf **,
     struct sockaddr_in6 *);
 int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *,
     struct sockaddr_in6 *);
 int nd6_add_ifa_lle(struct in6_ifaddr *);
 void nd6_rem_ifa_lle(struct in6_ifaddr *, int);
 int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
-    struct sockaddr_in6 *);
+    struct sockaddr_in6 *, struct route *);
 
 /* nd6_nbr.c */
 void nd6_na_input(struct mbuf *, int, int);
 void nd6_na_output(struct ifnet *, const struct in6_addr *,
 	const struct in6_addr *, u_long, int, struct sockaddr *);
 void nd6_ns_input(struct mbuf *, int, int);
 void nd6_ns_output(struct ifnet *, const struct in6_addr *,
 	const struct in6_addr *, const struct in6_addr *, uint8_t *);
 caddr_t nd6_ifptomac(struct ifnet *);
 void nd6_dad_init(void);
 void nd6_dad_start(struct ifaddr *, int);
 void nd6_dad_stop(struct ifaddr *);
 
 /* nd6_rtr.c */
 void nd6_rs_input(struct mbuf *, int, int);
 void nd6_ra_input(struct mbuf *, int, int);
 void prelist_del(struct nd_prefix *);
 void defrouter_reset(void);
 void defrouter_select(void);
 void defrtrlist_del(struct nd_defrouter *);
 void prelist_remove(struct nd_prefix *);
 int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
 	struct nd_prefix **);
 void pfxlist_onlink_check(void);
 struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
 struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
 void rt6_flush(struct in6_addr *, struct ifnet *);
 int nd6_setdefaultiface(int);
 int in6_tmpifadd(const struct in6_ifaddr *, int, int);
 
 #endif /* _KERNEL */
 
 #endif /* _NETINET6_ND6_H_ */
Index: projects/powernv/netipsec/ipsec.h
===================================================================
--- projects/powernv/netipsec/ipsec.h	(revision 290990)
+++ projects/powernv/netipsec/ipsec.h	(revision 290991)
@@ -1,372 +1,373 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * IPsec controller part.
  */
 
 #ifndef _NETIPSEC_IPSEC_H_
 #define _NETIPSEC_IPSEC_H_
 
 #if defined(_KERNEL) && !defined(_LKM) && !defined(KLD_MODULE)
 #include "opt_inet.h"
 #include "opt_ipsec.h"
 #endif
 
 #include <net/pfkeyv2.h>
 #include <netipsec/keydb.h>
 
 #ifdef _KERNEL
 
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_rwlock.h>
 
 #define	IPSEC_ASSERT(_c,_m) KASSERT(_c, _m)
 
 #define	IPSEC_IS_PRIVILEGED_SO(_so) \
 	((_so)->so_cred != NULL && \
 	 priv_check_cred((_so)->so_cred, PRIV_NETINET_IPSEC, 0) \
 	 == 0)
 
 /*
  * Security Policy Index
  * Ensure that both address families in the "src" and "dst" are same.
  * When the value of the ul_proto is ICMPv6, the port field in "src"
  * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code.
  */
 struct secpolicyindex {
 	u_int8_t dir;			/* direction of packet flow, see below */
 	union sockaddr_union src;	/* IP src address for SP */
 	union sockaddr_union dst;	/* IP dst address for SP */
 	u_int8_t prefs;			/* prefix length in bits for src */
 	u_int8_t prefd;			/* prefix length in bits for dst */
 	u_int16_t ul_proto;		/* upper layer Protocol */
 #ifdef notyet
 	uid_t uids;
 	uid_t uidd;
 	gid_t gids;
 	gid_t gidd;
 #endif
 };
 
 /* Security Policy Data Base */
 struct secpolicy {
 	TAILQ_ENTRY(secpolicy) chain;
 
 	struct secpolicyindex spidx;	/* selector */
 	struct ipsecrequest *req;
 				/* pointer to the ipsec request tree, */
 				/* if policy == IPSEC else this value == NULL.*/
 	u_int refcnt;			/* reference count */
 	u_int policy;			/* policy_type per pfkeyv2.h */
 	u_int state;
 #define	IPSEC_SPSTATE_DEAD	0
 #define	IPSEC_SPSTATE_ALIVE	1
+	u_int32_t priority;		/* priority of this policy */
 	u_int32_t id;			/* It's unique number on the system. */
 	/*
 	 * lifetime handler.
 	 * the policy can be used without limitiation if both lifetime and
 	 * validtime are zero.
 	 * "lifetime" is passed by sadb_lifetime.sadb_lifetime_addtime.
 	 * "validtime" is passed by sadb_lifetime.sadb_lifetime_usetime.
 	 */
 	time_t created;		/* time created the policy */
 	time_t lastused;	/* updated every when kernel sends a packet */
 	long lifetime;		/* duration of the lifetime of this policy */
 	long validtime;		/* duration this policy is valid without use */
 };
 
 /* Request for IPsec */
 struct ipsecrequest {
 	struct ipsecrequest *next;
 				/* pointer to next structure */
 				/* If NULL, it means the end of chain. */
 	struct secasindex saidx;/* hint for search proper SA */
 				/* if __ss_len == 0 then no address specified.*/
 	u_int level;		/* IPsec level defined below. */
 
 	struct secasvar *sav;	/* place holder of SA for use */
 	struct secpolicy *sp;	/* back pointer to SP */
 	struct rwlock lock;	/* to interlock updates */
 };
 
 /*
  * Need recursion for when crypto callbacks happen directly,
  * as in the case of software crypto.  Need to look at how
  * hard it is to remove this...
  */
 #define	IPSECREQUEST_LOCK_INIT(_isr) \
 	rw_init_flags(&(_isr)->lock, "ipsec request", RW_RECURSE)
 #define	IPSECREQUEST_LOCK(_isr)		rw_rlock(&(_isr)->lock)
 #define	IPSECREQUEST_UNLOCK(_isr)	rw_runlock(&(_isr)->lock)
 #define	IPSECREQUEST_WLOCK(_isr)	rw_wlock(&(_isr)->lock)
 #define	IPSECREQUEST_WUNLOCK(_isr)	rw_wunlock(&(_isr)->lock)
 #define	IPSECREQUEST_UPGRADE(_isr)	rw_try_upgrade(&(_isr)->lock)
 #define	IPSECREQUEST_DOWNGRADE(_isr)	rw_downgrade(&(_isr)->lock)
 #define	IPSECREQUEST_LOCK_DESTROY(_isr)	rw_destroy(&(_isr)->lock)
 #define	IPSECREQUEST_LOCK_ASSERT(_isr)	rw_assert(&(_isr)->lock, RA_LOCKED)
 
 /* security policy in PCB */
 struct inpcbpolicy {
 	struct secpolicy *sp_in;
 	struct secpolicy *sp_out;
 	int priv;			/* privileged socket ? */
 };
 
 /* SP acquiring list table. */
 struct secspacq {
 	LIST_ENTRY(secspacq) chain;
 
 	struct secpolicyindex spidx;
 
 	time_t created;		/* for lifetime */
 	int count;		/* for lifetime */
 	/* XXX: here is mbuf place holder to be sent ? */
 };
 #endif /* _KERNEL */
 
 /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */
 #define IPSEC_PORT_ANY		0
 #define IPSEC_ULPROTO_ANY	255
 #define IPSEC_PROTO_ANY		255
 
 /* mode of security protocol */
 /* NOTE: DON'T use IPSEC_MODE_ANY at SPD.  It's only use in SAD */
 #define	IPSEC_MODE_ANY		0	/* i.e. wildcard. */
 #define	IPSEC_MODE_TRANSPORT	1
 #define	IPSEC_MODE_TUNNEL	2
 #define	IPSEC_MODE_TCPMD5	3	/* TCP MD5 mode */
 
 /*
  * Direction of security policy.
  * NOTE: Since INVALID is used just as flag.
  * The other are used for loop counter too.
  */
 #define IPSEC_DIR_ANY		0
 #define IPSEC_DIR_INBOUND	1
 #define IPSEC_DIR_OUTBOUND	2
 #define IPSEC_DIR_MAX		3
 #define IPSEC_DIR_INVALID	4
 
 /* Policy level */
 /*
  * IPSEC, ENTRUST and BYPASS are allowed for setsockopt() in PCB,
  * DISCARD, IPSEC and NONE are allowed for setkey() in SPD.
  * DISCARD and NONE are allowed for system default.
  */
 #define IPSEC_POLICY_DISCARD	0	/* discarding packet */
 #define IPSEC_POLICY_NONE	1	/* through IPsec engine */
 #define IPSEC_POLICY_IPSEC	2	/* do IPsec */
 #define IPSEC_POLICY_ENTRUST	3	/* consulting SPD if present. */
 #define IPSEC_POLICY_BYPASS	4	/* only for privileged socket. */
 
 /* Security protocol level */
 #define	IPSEC_LEVEL_DEFAULT	0	/* reference to system default */
 #define	IPSEC_LEVEL_USE		1	/* use SA if present. */
 #define	IPSEC_LEVEL_REQUIRE	2	/* require SA. */
 #define	IPSEC_LEVEL_UNIQUE	3	/* unique SA. */
 
 #define IPSEC_MANUAL_REQID_MAX	0x3fff
 				/*
 				 * if security policy level == unique, this id
 				 * indicate to a relative SA for use, else is
 				 * zero.
 				 * 1 - 0x3fff are reserved for manual keying.
 				 * 0 are reserved for above reason.  Others is
 				 * for kernel use.
 				 * Note that this id doesn't identify SA
 				 * by only itself.
 				 */
 #define IPSEC_REPLAYWSIZE  32
 
 /* statistics for ipsec processing */
 struct ipsecstat {
 	uint64_t ips_in_polvio;		/* input: sec policy violation */
 	uint64_t ips_in_nomem;		/* input: no memory available */
 	uint64_t ips_in_inval;		/* input: generic error */
 
 	uint64_t ips_out_polvio;	/* output: sec policy violation */
 	uint64_t ips_out_nosa;		/* output: SA unavailable  */
 	uint64_t ips_out_nomem;		/* output: no memory available */
 	uint64_t ips_out_noroute;	/* output: no route available */
 	uint64_t ips_out_inval;		/* output: generic error */
 	uint64_t ips_out_bundlesa;	/* output: bundled SA processed */
 
 	uint64_t ips_mbcoalesced;	/* mbufs coalesced during clone */
 	uint64_t ips_clcoalesced;	/* clusters coalesced during clone */
 	uint64_t ips_clcopied;		/* clusters copied during clone */
 	uint64_t ips_mbinserted;	/* mbufs inserted during makespace */
 	/* 
 	 * Temporary statistics for performance analysis.
 	 */
 	/* See where ESP/AH/IPCOMP header land in mbuf on input */
 	uint64_t ips_input_front;
 	uint64_t ips_input_middle;
 	uint64_t ips_input_end;
 };
 
 /*
  * Definitions for IPsec & Key sysctl operations.
  */
 #define IPSECCTL_STATS			1	/* stats */
 #define IPSECCTL_DEF_POLICY		2
 #define IPSECCTL_DEF_ESP_TRANSLEV	3	/* int; ESP transport mode */
 #define IPSECCTL_DEF_ESP_NETLEV		4	/* int; ESP tunnel mode */
 #define IPSECCTL_DEF_AH_TRANSLEV	5	/* int; AH transport mode */
 #define IPSECCTL_DEF_AH_NETLEV		6	/* int; AH tunnel mode */
 #if 0	/* obsolete, do not reuse */
 #define IPSECCTL_INBOUND_CALL_IKE	7
 #endif
 #define	IPSECCTL_AH_CLEARTOS		8
 #define	IPSECCTL_AH_OFFSETMASK		9
 #define	IPSECCTL_DFBIT			10
 #define	IPSECCTL_ECN			11
 #define	IPSECCTL_DEBUG			12
 #define	IPSECCTL_ESP_RANDPAD		13
 
 #ifdef _KERNEL
 #include <sys/counter.h>
 
 VNET_DECLARE(int, ipsec_debug);
 #define	V_ipsec_debug		VNET(ipsec_debug)
 
 #ifdef REGRESSION
 VNET_DECLARE(int, ipsec_replay);
 VNET_DECLARE(int, ipsec_integrity);
 
 #define	V_ipsec_replay		VNET(ipsec_replay)
 #define	V_ipsec_integrity	VNET(ipsec_integrity)
 #endif
 
 VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec4stat);
 VNET_DECLARE(int, ip4_esp_trans_deflev);
 VNET_DECLARE(int, ip4_esp_net_deflev);
 VNET_DECLARE(int, ip4_ah_trans_deflev);
 VNET_DECLARE(int, ip4_ah_net_deflev);
 VNET_DECLARE(int, ip4_ah_offsetmask);
 VNET_DECLARE(int, ip4_ipsec_dfbit);
 VNET_DECLARE(int, ip4_ipsec_ecn);
 VNET_DECLARE(int, ip4_esp_randpad);
 VNET_DECLARE(int, crypto_support);
 
 #define	IPSECSTAT_INC(name)	\
     VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1)
 #define	V_ip4_esp_trans_deflev	VNET(ip4_esp_trans_deflev)
 #define	V_ip4_esp_net_deflev	VNET(ip4_esp_net_deflev)
 #define	V_ip4_ah_trans_deflev	VNET(ip4_ah_trans_deflev)
 #define	V_ip4_ah_net_deflev	VNET(ip4_ah_net_deflev)
 #define	V_ip4_ah_offsetmask	VNET(ip4_ah_offsetmask)
 #define	V_ip4_ipsec_dfbit	VNET(ip4_ipsec_dfbit)
 #define	V_ip4_ipsec_ecn		VNET(ip4_ipsec_ecn)
 #define	V_ip4_esp_randpad	VNET(ip4_esp_randpad)
 #define	V_crypto_support	VNET(crypto_support)
 
 #define ipseclog(x)	do { if (V_ipsec_debug) log x; } while (0)
 /* for openbsd compatibility */
 #define	DPRINTF(x)	do { if (V_ipsec_debug) printf x; } while (0)
 
 extern	struct ipsecrequest *ipsec_newisr(void);
 extern	void ipsec_delisr(struct ipsecrequest *);
 
 struct tdb_ident;
 extern struct secpolicy *ipsec_getpolicy(struct tdb_ident*, u_int);
 struct inpcb;
 extern struct secpolicy *ipsec4_checkpolicy(struct mbuf *, u_int,
 	int *, struct inpcb *);
 extern struct secpolicy * ipsec_getpolicybyaddr(struct mbuf *, u_int, int *);
 
 struct inpcb;
 extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **);
 extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *);
 extern u_int ipsec_get_reqlevel(struct ipsecrequest *);
 
 extern int ipsec_set_policy(struct inpcb *inp, int optname,
 	caddr_t request, size_t len, struct ucred *cred);
 extern int ipsec_get_policy(struct inpcb *inpcb, caddr_t request,
 	size_t len, struct mbuf **mp);
 extern int ipsec_delete_pcbpolicy(struct inpcb *);
 extern int ipsec4_in_reject(struct mbuf *, struct inpcb *);
 
 struct secas;
 struct tcpcb;
 extern int ipsec_chkreplay(u_int32_t, struct secasvar *);
 extern int ipsec_updatereplay(u_int32_t, struct secasvar *);
 
 extern size_t ipsec_hdrsiz(struct mbuf *, u_int, struct inpcb *);
 extern size_t ipsec_hdrsiz_tcp(struct tcpcb *);
 
 union sockaddr_union;
 extern char *ipsec_address(union sockaddr_union *, char *, socklen_t);
 extern char *ipsec_logsastr(struct secasvar *, char *, size_t);
 
 extern void ipsec_dumpmbuf(struct mbuf *);
 
 struct m_tag;
 extern int ah4_input(struct mbuf **mp, int *offp, int proto);
 extern void ah4_ctlinput(int cmd, struct sockaddr *sa, void *);
 extern int esp4_input(struct mbuf **mp, int *offp, int proto);
 extern void esp4_ctlinput(int cmd, struct sockaddr *sa, void *);
 extern int ipcomp4_input(struct mbuf **mp, int *offp, int proto);
 extern int ipsec_common_input(struct mbuf *m, int, int, int, int); 
 extern int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
 			int skip, int protoff);
 extern int ipsec4_process_packet(struct mbuf *, struct ipsecrequest *);
 extern int ipsec_process_done(struct mbuf *, struct ipsecrequest *);
 
 extern struct mbuf *ipsec_copypkt(struct mbuf *);
 
 extern	void m_checkalignment(const char* where, struct mbuf *m0,
 		int off, int len);
 extern	struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off);
 extern	caddr_t m_pad(struct mbuf *m, int n);
 extern	int m_striphdr(struct mbuf *m, int skip, int hlen);
 
 #ifdef DEV_ENC
 #define	ENC_BEFORE	0x0001
 #define	ENC_AFTER	0x0002
 #define	ENC_IN		0x0100
 #define	ENC_OUT		0x0200
 extern	int ipsec_filter(struct mbuf **, int, int);
 extern	void ipsec_bpf(struct mbuf *, struct secasvar *, int, int);
 #endif
 #endif /* _KERNEL */
 
 #ifndef _KERNEL
 extern caddr_t ipsec_set_policy(char *, int);
 extern int ipsec_get_policylen(caddr_t);
 extern char *ipsec_dump_policy(caddr_t, char *);
 extern const char *ipsec_strerror(void);
 
 #endif /* ! KERNEL */
 
 #endif /* _NETIPSEC_IPSEC_H_ */
Index: projects/powernv/netipsec/key.c
===================================================================
--- projects/powernv/netipsec/key.c	(revision 290990)
+++ projects/powernv/netipsec/key.c	(revision 290991)
@@ -1,7847 +1,7870 @@
 /*	$FreeBSD$	*/
 /*	$KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This code is referd to RFC 2367
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/malloc.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/refcount.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 #include <net/raw_cb.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_var.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in_pcb.h>
 #endif
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #endif /* INET6 */
 
 #include <net/pfkeyv2.h>
 #include <netipsec/keydb.h>
 #include <netipsec/key.h>
 #include <netipsec/keysock.h>
 #include <netipsec/key_debug.h>
 
 #include <netipsec/ipsec.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 
 #include <netipsec/xform.h>
 
 #include <machine/stdarg.h>
 
 /* randomness */
 #include <sys/random.h>
 
 #define FULLMASK	0xff
 #define	_BITS(bytes)	((bytes) << 3)
 
 /*
  * Note on SA reference counting:
  * - SAs that are not in DEAD state will have (total external reference + 1)
  *   following value in reference count field.  they cannot be freed and are
  *   referenced from SA header.
  * - SAs that are in DEAD state will have (total external reference)
  *   in reference count field.  they are ready to be freed.  reference from
  *   SA header will be removed in key_delsav(), when the reference count
  *   field hits 0 (= no external reference other than from SA header.
  */
 
 VNET_DEFINE(u_int32_t, key_debug_level) = 0;
 static VNET_DEFINE(u_int, key_spi_trycnt) = 1000;
 static VNET_DEFINE(u_int32_t, key_spi_minval) = 0x100;
 static VNET_DEFINE(u_int32_t, key_spi_maxval) = 0x0fffffff;	/* XXX */
 static VNET_DEFINE(u_int32_t, policy_id) = 0;
 /*interval to initialize randseed,1(m)*/
 static VNET_DEFINE(u_int, key_int_random) = 60;
 /* interval to expire acquiring, 30(s)*/
 static VNET_DEFINE(u_int, key_larval_lifetime) = 30;
 /* counter for blocking SADB_ACQUIRE.*/
 static VNET_DEFINE(int, key_blockacq_count) = 10;
 /* lifetime for blocking SADB_ACQUIRE.*/
 static VNET_DEFINE(int, key_blockacq_lifetime) = 20;
 /* preferred old sa rather than new sa.*/
 static VNET_DEFINE(int, key_preferred_oldsa) = 1;
 #define	V_key_spi_trycnt	VNET(key_spi_trycnt)
 #define	V_key_spi_minval	VNET(key_spi_minval)
 #define	V_key_spi_maxval	VNET(key_spi_maxval)
 #define	V_policy_id		VNET(policy_id)
 #define	V_key_int_random	VNET(key_int_random)
 #define	V_key_larval_lifetime	VNET(key_larval_lifetime)
 #define	V_key_blockacq_count	VNET(key_blockacq_count)
 #define	V_key_blockacq_lifetime	VNET(key_blockacq_lifetime)
 #define	V_key_preferred_oldsa	VNET(key_preferred_oldsa)
 
 static VNET_DEFINE(u_int32_t, acq_seq) = 0;
 #define	V_acq_seq		VNET(acq_seq)
 
 								/* SPD */
 static VNET_DEFINE(TAILQ_HEAD(_sptree, secpolicy), sptree[IPSEC_DIR_MAX]);
 static struct rmlock sptree_lock;
 #define	V_sptree		VNET(sptree)
 #define	SPTREE_LOCK_INIT()      rm_init(&sptree_lock, "sptree")
 #define	SPTREE_LOCK_DESTROY()   rm_destroy(&sptree_lock)
 #define	SPTREE_RLOCK_TRACKER    struct rm_priotracker sptree_tracker
 #define	SPTREE_RLOCK()          rm_rlock(&sptree_lock, &sptree_tracker)
 #define	SPTREE_RUNLOCK()        rm_runlock(&sptree_lock, &sptree_tracker)
 #define	SPTREE_RLOCK_ASSERT()   rm_assert(&sptree_lock, RA_RLOCKED)
 #define	SPTREE_WLOCK()          rm_wlock(&sptree_lock)
 #define	SPTREE_WUNLOCK()        rm_wunlock(&sptree_lock)
 #define	SPTREE_WLOCK_ASSERT()   rm_assert(&sptree_lock, RA_WLOCKED)
 #define	SPTREE_UNLOCK_ASSERT()  rm_assert(&sptree_lock, RA_UNLOCKED)
 
 static VNET_DEFINE(LIST_HEAD(_sahtree, secashead), sahtree);	/* SAD */
 #define	V_sahtree		VNET(sahtree)
 static struct mtx sahtree_lock;
 #define	SAHTREE_LOCK_INIT() \
 	mtx_init(&sahtree_lock, "sahtree", \
 		"fast ipsec security association database", MTX_DEF)
 #define	SAHTREE_LOCK_DESTROY()	mtx_destroy(&sahtree_lock)
 #define	SAHTREE_LOCK()		mtx_lock(&sahtree_lock)
 #define	SAHTREE_UNLOCK()	mtx_unlock(&sahtree_lock)
 #define	SAHTREE_LOCK_ASSERT()	mtx_assert(&sahtree_lock, MA_OWNED)
 
 							/* registed list */
 static VNET_DEFINE(LIST_HEAD(_regtree, secreg), regtree[SADB_SATYPE_MAX + 1]);
 #define	V_regtree		VNET(regtree)
 static struct mtx regtree_lock;
 #define	REGTREE_LOCK_INIT() \
 	mtx_init(&regtree_lock, "regtree", "fast ipsec regtree", MTX_DEF)
 #define	REGTREE_LOCK_DESTROY()	mtx_destroy(&regtree_lock)
 #define	REGTREE_LOCK()		mtx_lock(&regtree_lock)
 #define	REGTREE_UNLOCK()	mtx_unlock(&regtree_lock)
 #define	REGTREE_LOCK_ASSERT()	mtx_assert(&regtree_lock, MA_OWNED)
 
 static VNET_DEFINE(LIST_HEAD(_acqtree, secacq), acqtree); /* acquiring list */
 #define	V_acqtree		VNET(acqtree)
 static struct mtx acq_lock;
 #define	ACQ_LOCK_INIT() \
 	mtx_init(&acq_lock, "acqtree", "fast ipsec acquire list", MTX_DEF)
 #define	ACQ_LOCK_DESTROY()	mtx_destroy(&acq_lock)
 #define	ACQ_LOCK()		mtx_lock(&acq_lock)
 #define	ACQ_UNLOCK()		mtx_unlock(&acq_lock)
 #define	ACQ_LOCK_ASSERT()	mtx_assert(&acq_lock, MA_OWNED)
 
 							/* SP acquiring list */
 static VNET_DEFINE(LIST_HEAD(_spacqtree, secspacq), spacqtree);
 #define	V_spacqtree		VNET(spacqtree)
 static struct mtx spacq_lock;
 #define	SPACQ_LOCK_INIT() \
 	mtx_init(&spacq_lock, "spacqtree", \
 		"fast ipsec security policy acquire list", MTX_DEF)
 #define	SPACQ_LOCK_DESTROY()	mtx_destroy(&spacq_lock)
 #define	SPACQ_LOCK()		mtx_lock(&spacq_lock)
 #define	SPACQ_UNLOCK()		mtx_unlock(&spacq_lock)
 #define	SPACQ_LOCK_ASSERT()	mtx_assert(&spacq_lock, MA_OWNED)
 
 /* search order for SAs */
 static const u_int saorder_state_valid_prefer_old[] = {
 	SADB_SASTATE_DYING, SADB_SASTATE_MATURE,
 };
 static const u_int saorder_state_valid_prefer_new[] = {
 	SADB_SASTATE_MATURE, SADB_SASTATE_DYING,
 };
 static const u_int saorder_state_alive[] = {
 	/* except DEAD */
 	SADB_SASTATE_MATURE, SADB_SASTATE_DYING, SADB_SASTATE_LARVAL
 };
 static const u_int saorder_state_any[] = {
 	SADB_SASTATE_MATURE, SADB_SASTATE_DYING,
 	SADB_SASTATE_LARVAL, SADB_SASTATE_DEAD
 };
 
 static const int minsize[] = {
 	sizeof(struct sadb_msg),	/* SADB_EXT_RESERVED */
 	sizeof(struct sadb_sa),		/* SADB_EXT_SA */
 	sizeof(struct sadb_lifetime),	/* SADB_EXT_LIFETIME_CURRENT */
 	sizeof(struct sadb_lifetime),	/* SADB_EXT_LIFETIME_HARD */
 	sizeof(struct sadb_lifetime),	/* SADB_EXT_LIFETIME_SOFT */
 	sizeof(struct sadb_address),	/* SADB_EXT_ADDRESS_SRC */
 	sizeof(struct sadb_address),	/* SADB_EXT_ADDRESS_DST */
 	sizeof(struct sadb_address),	/* SADB_EXT_ADDRESS_PROXY */
 	sizeof(struct sadb_key),	/* SADB_EXT_KEY_AUTH */
 	sizeof(struct sadb_key),	/* SADB_EXT_KEY_ENCRYPT */
 	sizeof(struct sadb_ident),	/* SADB_EXT_IDENTITY_SRC */
 	sizeof(struct sadb_ident),	/* SADB_EXT_IDENTITY_DST */
 	sizeof(struct sadb_sens),	/* SADB_EXT_SENSITIVITY */
 	sizeof(struct sadb_prop),	/* SADB_EXT_PROPOSAL */
 	sizeof(struct sadb_supported),	/* SADB_EXT_SUPPORTED_AUTH */
 	sizeof(struct sadb_supported),	/* SADB_EXT_SUPPORTED_ENCRYPT */
 	sizeof(struct sadb_spirange),	/* SADB_EXT_SPIRANGE */
 	0,				/* SADB_X_EXT_KMPRIVATE */
 	sizeof(struct sadb_x_policy),	/* SADB_X_EXT_POLICY */
 	sizeof(struct sadb_x_sa2),	/* SADB_X_SA2 */
 	sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */
 	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */
 	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */
 	sizeof(struct sadb_address),	/* SADB_X_EXT_NAT_T_OAI */
 	sizeof(struct sadb_address),	/* SADB_X_EXT_NAT_T_OAR */
 	sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */
 };
 static const int maxsize[] = {
 	sizeof(struct sadb_msg),	/* SADB_EXT_RESERVED */
 	sizeof(struct sadb_sa),		/* SADB_EXT_SA */
 	sizeof(struct sadb_lifetime),	/* SADB_EXT_LIFETIME_CURRENT */
 	sizeof(struct sadb_lifetime),	/* SADB_EXT_LIFETIME_HARD */
 	sizeof(struct sadb_lifetime),	/* SADB_EXT_LIFETIME_SOFT */
 	0,				/* SADB_EXT_ADDRESS_SRC */
 	0,				/* SADB_EXT_ADDRESS_DST */
 	0,				/* SADB_EXT_ADDRESS_PROXY */
 	0,				/* SADB_EXT_KEY_AUTH */
 	0,				/* SADB_EXT_KEY_ENCRYPT */
 	0,				/* SADB_EXT_IDENTITY_SRC */
 	0,				/* SADB_EXT_IDENTITY_DST */
 	0,				/* SADB_EXT_SENSITIVITY */
 	0,				/* SADB_EXT_PROPOSAL */
 	0,				/* SADB_EXT_SUPPORTED_AUTH */
 	0,				/* SADB_EXT_SUPPORTED_ENCRYPT */
 	sizeof(struct sadb_spirange),	/* SADB_EXT_SPIRANGE */
 	0,				/* SADB_X_EXT_KMPRIVATE */
 	0,				/* SADB_X_EXT_POLICY */
 	sizeof(struct sadb_x_sa2),	/* SADB_X_SA2 */
 	sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */
 	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */
 	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */
 	0,				/* SADB_X_EXT_NAT_T_OAI */
 	0,				/* SADB_X_EXT_NAT_T_OAR */
 	sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */
 };
 
 static VNET_DEFINE(int, ipsec_esp_keymin) = 256;
 static VNET_DEFINE(int, ipsec_esp_auth) = 0;
 static VNET_DEFINE(int, ipsec_ah_keymin) = 128;
 
 #define	V_ipsec_esp_keymin	VNET(ipsec_esp_keymin)
 #define	V_ipsec_esp_auth	VNET(ipsec_esp_auth)
 #define	V_ipsec_ah_keymin	VNET(ipsec_ah_keymin)
 
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_key);
 #endif
 
 SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL,	debug,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_debug_level), 0, "");
 
 /* max count of trial for the decision of spi value */
 SYSCTL_INT(_net_key, KEYCTL_SPI_TRY, spi_trycnt,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_trycnt), 0, "");
 
 /* minimum spi value to allocate automatically. */
 SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE, spi_minval,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_minval), 0, "");
 
 /* maximun spi value to allocate automatically. */
 SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE, spi_maxval,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_maxval), 0, "");
 
 /* interval to initialize randseed */
 SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT, int_random,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_int_random), 0, "");
 
 /* lifetime for larval SA */
 SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME, larval_lifetime,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_larval_lifetime), 0, "");
 
 /* counter for blocking to send SADB_ACQUIRE to IKEd */
 SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT, blockacq_count,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_count), 0, "");
 
 /* lifetime for blocking to send SADB_ACQUIRE to IKEd */
 SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME, blockacq_lifetime,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_lifetime), 0, "");
 
 /* ESP auth */
 SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH, esp_auth,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_auth), 0, "");
 
 /* minimum ESP key length */
 SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN, esp_keymin,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_keymin), 0, "");
 
 /* minimum AH key length */
 SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN, ah_keymin,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_ah_keymin), 0, "");
 
 /* perfered old SA rather than new SA */
 SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, preferred_oldsa,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_preferred_oldsa), 0, "");
 
 #define __LIST_CHAINED(elm) \
 	(!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL))
 #define LIST_INSERT_TAIL(head, elm, type, field) \
 do {\
 	struct type *curelm = LIST_FIRST(head); \
 	if (curelm == NULL) {\
 		LIST_INSERT_HEAD(head, elm, field); \
 	} else { \
 		while (LIST_NEXT(curelm, field)) \
 			curelm = LIST_NEXT(curelm, field);\
 		LIST_INSERT_AFTER(curelm, elm, field);\
 	}\
 } while (0)
 
 #define KEY_CHKSASTATE(head, sav, name) \
 do { \
 	if ((head) != (sav)) {						\
 		ipseclog((LOG_DEBUG, "%s: state mismatched (TREE=%d SA=%d)\n", \
 			(name), (head), (sav)));			\
 		continue;						\
 	}								\
 } while (0)
 
 #define KEY_CHKSPDIR(head, sp, name) \
 do { \
 	if ((head) != (sp)) {						\
 		ipseclog((LOG_DEBUG, "%s: direction mismatched (TREE=%d SP=%d), " \
 			"anyway continue.\n",				\
 			(name), (head), (sp)));				\
 	}								\
 } while (0)
 
 MALLOC_DEFINE(M_IPSEC_SA, "secasvar", "ipsec security association");
 MALLOC_DEFINE(M_IPSEC_SAH, "sahead", "ipsec sa head");
 MALLOC_DEFINE(M_IPSEC_SP, "ipsecpolicy", "ipsec security policy");
 MALLOC_DEFINE(M_IPSEC_SR, "ipsecrequest", "ipsec security request");
 MALLOC_DEFINE(M_IPSEC_MISC, "ipsec-misc", "ipsec miscellaneous");
 MALLOC_DEFINE(M_IPSEC_SAQ, "ipsec-saq", "ipsec sa acquire");
 MALLOC_DEFINE(M_IPSEC_SAR, "ipsec-reg", "ipsec sa acquire");
 
 /*
  * set parameters into secpolicyindex buffer.
  * Must allocate secpolicyindex buffer passed to this function.
  */
 #define KEY_SETSECSPIDX(_dir, s, d, ps, pd, ulp, idx) \
 do { \
 	bzero((idx), sizeof(struct secpolicyindex));                         \
 	(idx)->dir = (_dir);                                                 \
 	(idx)->prefs = (ps);                                                 \
 	(idx)->prefd = (pd);                                                 \
 	(idx)->ul_proto = (ulp);                                             \
 	bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len);     \
 	bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len);     \
 } while (0)
 
 /*
  * set parameters into secasindex buffer.
  * Must allocate secasindex buffer before calling this function.
  */
 #define KEY_SETSECASIDX(p, m, r, s, d, idx) \
 do { \
 	bzero((idx), sizeof(struct secasindex));                             \
 	(idx)->proto = (p);                                                  \
 	(idx)->mode = (m);                                                   \
 	(idx)->reqid = (r);                                                  \
 	bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len);     \
 	bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len);     \
 } while (0)
 
 /* key statistics */
 struct _keystat {
 	u_long getspi_count; /* the avarage of count to try to get new SPI */
 } keystat;
 
 struct sadb_msghdr {
 	struct sadb_msg *msg;
 	struct sadb_ext *ext[SADB_EXT_MAX + 1];
 	int extoff[SADB_EXT_MAX + 1];
 	int extlen[SADB_EXT_MAX + 1];
 };
 
 #ifndef IPSEC_DEBUG2
 static struct callout key_timer;
 #endif
 
 static struct secasvar *key_allocsa_policy(const struct secasindex *);
 static void key_freesp_so(struct secpolicy **);
 static struct secasvar *key_do_allocsa_policy(struct secashead *, u_int);
 static void key_unlink(struct secpolicy *);
 static struct secpolicy *key_getsp(struct secpolicyindex *);
 static struct secpolicy *key_getspbyid(u_int32_t);
 static u_int32_t key_newreqid(void);
 static struct mbuf *key_gather_mbuf(struct mbuf *,
 	const struct sadb_msghdr *, int, int, ...);
 static int key_spdadd(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static u_int32_t key_getnewspid(void);
 static int key_spddelete(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_spddelete2(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_spdget(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_spdflush(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_spddump(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static struct mbuf *key_setdumpsp(struct secpolicy *,
 	u_int8_t, u_int32_t, u_int32_t);
 static u_int key_getspreqmsglen(struct secpolicy *);
 static int key_spdexpire(struct secpolicy *);
 static struct secashead *key_newsah(struct secasindex *);
 static void key_delsah(struct secashead *);
 static struct secasvar *key_newsav(struct mbuf *,
 	const struct sadb_msghdr *, struct secashead *, int *,
 	const char*, int);
 #define	KEY_NEWSAV(m, sadb, sah, e)				\
 	key_newsav(m, sadb, sah, e, __FILE__, __LINE__)
 static void key_delsav(struct secasvar *);
 static struct secashead *key_getsah(struct secasindex *);
 static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t);
 static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t);
 static int key_setsaval(struct secasvar *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_mature(struct secasvar *);
 static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t,
 	u_int8_t, u_int32_t, u_int32_t);
 static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t,
 	u_int32_t, pid_t, u_int16_t);
 static struct mbuf *key_setsadbsa(struct secasvar *);
 static struct mbuf *key_setsadbaddr(u_int16_t,
 	const struct sockaddr *, u_int8_t, u_int16_t);
 #ifdef IPSEC_NAT_T
 static struct mbuf *key_setsadbxport(u_int16_t, u_int16_t);
 static struct mbuf *key_setsadbxtype(u_int16_t);
 #endif
 static void key_porttosaddr(struct sockaddr *, u_int16_t);
 #define	KEY_PORTTOSADDR(saddr, port)				\
 	key_porttosaddr((struct sockaddr *)(saddr), (port))
 static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t);
 static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t,
-	u_int32_t);
+	u_int32_t, u_int32_t);
 static struct seckey *key_dup_keymsg(const struct sadb_key *, u_int, 
 				     struct malloc_type *);
 static struct seclifetime *key_dup_lifemsg(const struct sadb_lifetime *src,
 					    struct malloc_type *type);
 #ifdef INET6
 static int key_ismyaddr6(struct sockaddr_in6 *);
 #endif
 
 /* flags for key_cmpsaidx() */
 #define CMP_HEAD	1	/* protocol, addresses. */
 #define CMP_MODE_REQID	2	/* additionally HEAD, reqid, mode. */
 #define CMP_REQID	3	/* additionally HEAD, reaid. */
 #define CMP_EXACTLY	4	/* all elements. */
 static int key_cmpsaidx(const struct secasindex *,
     const struct secasindex *, int);
 static int key_cmpspidx_exactly(struct secpolicyindex *,
     struct secpolicyindex *);
 static int key_cmpspidx_withmask(struct secpolicyindex *,
     struct secpolicyindex *);
 static int key_sockaddrcmp(const struct sockaddr *,
     const struct sockaddr *, int);
 static int key_bbcmp(const void *, const void *, u_int);
 static u_int16_t key_satype2proto(u_int8_t);
 static u_int8_t key_proto2satype(u_int16_t);
 
 static int key_getspi(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static u_int32_t key_do_getnewspi(struct sadb_spirange *,
 					struct secasindex *);
 static int key_update(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 #ifdef IPSEC_DOSEQCHECK
 static struct secasvar *key_getsavbyseq(struct secashead *, u_int32_t);
 #endif
 static int key_add(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_setident(struct secashead *, struct mbuf *,
 	const struct sadb_msghdr *);
 static struct mbuf *key_getmsgbuf_x1(struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_delete(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_delete_all(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *, u_int16_t);
 static int key_get(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 
 static void key_getcomb_setlifetime(struct sadb_comb *);
 static struct mbuf *key_getcomb_esp(void);
 static struct mbuf *key_getcomb_ah(void);
 static struct mbuf *key_getcomb_ipcomp(void);
 static struct mbuf *key_getprop(const struct secasindex *);
 
 static int key_acquire(const struct secasindex *, struct secpolicy *);
 static struct secacq *key_newacq(const struct secasindex *);
 static struct secacq *key_getacq(const struct secasindex *);
 static struct secacq *key_getacqbyseq(u_int32_t);
 static struct secspacq *key_newspacq(struct secpolicyindex *);
 static struct secspacq *key_getspacq(struct secpolicyindex *);
 static int key_acquire2(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_register(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_expire(struct secasvar *, int);
 static int key_flush(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_dump(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_promisc(struct socket *, struct mbuf *,
 	const struct sadb_msghdr *);
 static int key_senderror(struct socket *, struct mbuf *, int);
 static int key_validate_ext(const struct sadb_ext *, int);
 static int key_align(struct mbuf *, struct sadb_msghdr *);
 static struct mbuf *key_setlifetime(struct seclifetime *src, 
 				     u_int16_t exttype);
 static struct mbuf *key_setkey(struct seckey *src, u_int16_t exttype);
 
 #if 0
 static const char *key_getfqdn(void);
 static const char *key_getuserfqdn(void);
 #endif
 static void key_sa_chgstate(struct secasvar *, u_int8_t);
 
 static __inline void
 sa_initref(struct secasvar *sav)
 {
 
 	refcount_init(&sav->refcnt, 1);
 }
 static __inline void
 sa_addref(struct secasvar *sav)
 {
 
 	refcount_acquire(&sav->refcnt);
 	IPSEC_ASSERT(sav->refcnt != 0, ("SA refcnt overflow"));
 }
 static __inline int
 sa_delref(struct secasvar *sav)
 {
 
 	IPSEC_ASSERT(sav->refcnt > 0, ("SA refcnt underflow"));
 	return (refcount_release(&sav->refcnt));
 }
 
 #define	SP_ADDREF(p)	refcount_acquire(&(p)->refcnt)
 #define	SP_DELREF(p)	refcount_release(&(p)->refcnt)
 
 /*
  * Update the refcnt while holding the SPTREE lock.
  */
 void
 key_addref(struct secpolicy *sp)
 {
 
 	SP_ADDREF(sp);
 }
 
 /*
  * Return 0 when there are known to be no SP's for the specified
  * direction.  Otherwise return 1.  This is used by IPsec code
  * to optimize performance.
  */
 int
 key_havesp(u_int dir)
 {
 
 	return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ?
 		TAILQ_FIRST(&V_sptree[dir]) != NULL : 1);
 }
 
 /* %%% IPsec policy management */
 /*
  * allocating a SP for OUTBOUND or INBOUND packet.
  * Must call key_freesp() later.
  * OUT:	NULL:	not found
  *	others:	found and return the pointer.
  */
 struct secpolicy *
 key_allocsp(struct secpolicyindex *spidx, u_int dir, const char* where,
     int tag)
 {
 	SPTREE_RLOCK_TRACKER;
 	struct secpolicy *sp;
 
 	IPSEC_ASSERT(spidx != NULL, ("null spidx"));
 	IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND,
 		("invalid direction %u", dir));
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u\n", __func__, where, tag));
 
 	/* get a SP entry */
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("*** objects\n");
 		kdebug_secpolicyindex(spidx));
 
 	SPTREE_RLOCK();
 	TAILQ_FOREACH(sp, &V_sptree[dir], chain) {
 		KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 			printf("*** in SPD\n");
 			kdebug_secpolicyindex(&sp->spidx));
 		if (key_cmpspidx_withmask(&sp->spidx, spidx))
 			goto found;
 	}
 	sp = NULL;
 found:
 	if (sp) {
 		/* sanity check */
 		KEY_CHKSPDIR(sp->spidx.dir, dir, __func__);
 
 		/* found a SPD entry */
 		sp->lastused = time_second;
 		SP_ADDREF(sp);
 	}
 	SPTREE_RUNLOCK();
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__,
 			sp, sp ? sp->id : 0, sp ? sp->refcnt : 0));
 	return sp;
 }
 
 /*
  * allocating a SP for OUTBOUND or INBOUND packet.
  * Must call key_freesp() later.
  * OUT:	NULL:	not found
  *	others:	found and return the pointer.
  */
 struct secpolicy *
 key_allocsp2(u_int32_t spi, union sockaddr_union *dst, u_int8_t proto,
     u_int dir, const char* where, int tag)
 {
 	SPTREE_RLOCK_TRACKER;
 	struct secpolicy *sp;
 
 	IPSEC_ASSERT(dst != NULL, ("null dst"));
 	IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND,
 		("invalid direction %u", dir));
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u\n", __func__, where, tag));
 
 	/* get a SP entry */
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("*** objects\n");
 		printf("spi %u proto %u dir %u\n", spi, proto, dir);
 		kdebug_sockaddr(&dst->sa));
 
 	SPTREE_RLOCK();
 	TAILQ_FOREACH(sp, &V_sptree[dir], chain) {
 		KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 			printf("*** in SPD\n");
 			kdebug_secpolicyindex(&sp->spidx));
 		/* compare simple values, then dst address */
 		if (sp->spidx.ul_proto != proto)
 			continue;
 		/* NB: spi's must exist and match */
 		if (!sp->req || !sp->req->sav || sp->req->sav->spi != spi)
 			continue;
 		if (key_sockaddrcmp(&sp->spidx.dst.sa, &dst->sa, 1) == 0)
 			goto found;
 	}
 	sp = NULL;
 found:
 	if (sp) {
 		/* sanity check */
 		KEY_CHKSPDIR(sp->spidx.dir, dir, __func__);
 
 		/* found a SPD entry */
 		sp->lastused = time_second;
 		SP_ADDREF(sp);
 	}
 	SPTREE_RUNLOCK();
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__,
 			sp, sp ? sp->id : 0, sp ? sp->refcnt : 0));
 	return sp;
 }
 
 #if 0
 /*
  * return a policy that matches this particular inbound packet.
  * XXX slow
  */
 struct secpolicy *
 key_gettunnel(const struct sockaddr *osrc,
 	      const struct sockaddr *odst,
 	      const struct sockaddr *isrc,
 	      const struct sockaddr *idst,
 	      const char* where, int tag)
 {
 	struct secpolicy *sp;
 	const int dir = IPSEC_DIR_INBOUND;
 	struct ipsecrequest *r1, *r2, *p;
 	struct secpolicyindex spidx;
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u\n", __func__, where, tag));
 
 	if (isrc->sa_family != idst->sa_family) {
 		ipseclog((LOG_ERR, "%s: protocol family mismatched %d != %d\n.",
 			__func__, isrc->sa_family, idst->sa_family));
 		sp = NULL;
 		goto done;
 	}
 
 	SPTREE_LOCK();
 	LIST_FOREACH(sp, &V_sptree[dir], chain) {
 		if (sp->state == IPSEC_SPSTATE_DEAD)
 			continue;
 
 		r1 = r2 = NULL;
 		for (p = sp->req; p; p = p->next) {
 			if (p->saidx.mode != IPSEC_MODE_TUNNEL)
 				continue;
 
 			r1 = r2;
 			r2 = p;
 
 			if (!r1) {
 				/* here we look at address matches only */
 				spidx = sp->spidx;
 				if (isrc->sa_len > sizeof(spidx.src) ||
 				    idst->sa_len > sizeof(spidx.dst))
 					continue;
 				bcopy(isrc, &spidx.src, isrc->sa_len);
 				bcopy(idst, &spidx.dst, idst->sa_len);
 				if (!key_cmpspidx_withmask(&sp->spidx, &spidx))
 					continue;
 			} else {
 				if (key_sockaddrcmp(&r1->saidx.src.sa, isrc, 0) ||
 				    key_sockaddrcmp(&r1->saidx.dst.sa, idst, 0))
 					continue;
 			}
 
 			if (key_sockaddrcmp(&r2->saidx.src.sa, osrc, 0) ||
 			    key_sockaddrcmp(&r2->saidx.dst.sa, odst, 0))
 				continue;
 
 			goto found;
 		}
 	}
 	sp = NULL;
 found:
 	if (sp) {
 		sp->lastused = time_second;
 		SP_ADDREF(sp);
 	}
 	SPTREE_UNLOCK();
 done:
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__,
 			sp, sp ? sp->id : 0, sp ? sp->refcnt : 0));
 	return sp;
 }
 #endif
 
 /*
  * allocating an SA entry for an *OUTBOUND* packet.
  * checking each request entries in SP, and acquire an SA if need.
  * OUT:	0: there are valid requests.
  *	ENOENT: policy may be valid, but SA with REQUIRE is on acquiring.
  */
 int
 key_checkrequest(struct ipsecrequest *isr, const struct secasindex *saidx)
 {
 	u_int level;
 	int error;
 	struct secasvar *sav;
 
 	IPSEC_ASSERT(isr != NULL, ("null isr"));
 	IPSEC_ASSERT(saidx != NULL, ("null saidx"));
 	IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TRANSPORT ||
 		saidx->mode == IPSEC_MODE_TUNNEL,
 		("unexpected policy %u", saidx->mode));
 
 	/*
 	 * XXX guard against protocol callbacks from the crypto
 	 * thread as they reference ipsecrequest.sav which we
 	 * temporarily null out below.  Need to rethink how we
 	 * handle bundled SA's in the callback thread.
 	 */
 	IPSECREQUEST_LOCK_ASSERT(isr);
 
 	/* get current level */
 	level = ipsec_get_reqlevel(isr);
 
 	/*
 	 * We check new SA in the IPsec request because a different
 	 * SA may be involved each time this request is checked, either
 	 * because new SAs are being configured, or this request is
 	 * associated with an unconnected datagram socket, or this request
 	 * is associated with a system default policy.
 	 *
 	 * key_allocsa_policy should allocate the oldest SA available.
 	 * See key_do_allocsa_policy(), and draft-jenkins-ipsec-rekeying-03.txt.
 	 */
 	sav = key_allocsa_policy(saidx);
 	if (sav != isr->sav) {
 		/* SA need to be updated. */
 		if (!IPSECREQUEST_UPGRADE(isr)) {
 			/* Kick everyone off. */
 			IPSECREQUEST_UNLOCK(isr);
 			IPSECREQUEST_WLOCK(isr);
 		}
 		if (isr->sav != NULL)
 			KEY_FREESAV(&isr->sav);
 		isr->sav = sav;
 		IPSECREQUEST_DOWNGRADE(isr);
 	} else if (sav != NULL)
 		KEY_FREESAV(&sav);
 
 	/* When there is SA. */
 	if (isr->sav != NULL) {
 		if (isr->sav->state != SADB_SASTATE_MATURE &&
 		    isr->sav->state != SADB_SASTATE_DYING)
 			return EINVAL;
 		return 0;
 	}
 
 	/* there is no SA */
 	error = key_acquire(saidx, isr->sp);
 	if (error != 0) {
 		/* XXX What should I do ? */
 		ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n",
 			__func__, error));
 		return error;
 	}
 
 	if (level != IPSEC_LEVEL_REQUIRE) {
 		/* XXX sigh, the interface to this routine is botched */
 		IPSEC_ASSERT(isr->sav == NULL, ("unexpected SA"));
 		return 0;
 	} else {
 		return ENOENT;
 	}
 }
 
 /*
  * allocating a SA for policy entry from SAD.
  * NOTE: searching SAD of aliving state.
  * OUT:	NULL:	not found.
  *	others:	found and return the pointer.
  */
 static struct secasvar *
 key_allocsa_policy(const struct secasindex *saidx)
 {
 #define	N(a)	_ARRAYLEN(a)
 	struct secashead *sah;
 	struct secasvar *sav;
 	u_int stateidx, arraysize;
 	const u_int *state_valid;
 
 	state_valid = NULL;	/* silence gcc */
 	arraysize = 0;		/* silence gcc */
 
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (sah->state == SADB_SASTATE_DEAD)
 			continue;
 		if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) {
 			if (V_key_preferred_oldsa) {
 				state_valid = saorder_state_valid_prefer_old;
 				arraysize = N(saorder_state_valid_prefer_old);
 			} else {
 				state_valid = saorder_state_valid_prefer_new;
 				arraysize = N(saorder_state_valid_prefer_new);
 			}
 			break;
 		}
 	}
 	SAHTREE_UNLOCK();
 	if (sah == NULL)
 		return NULL;
 
 	/* search valid state */
 	for (stateidx = 0; stateidx < arraysize; stateidx++) {
 		sav = key_do_allocsa_policy(sah, state_valid[stateidx]);
 		if (sav != NULL)
 			return sav;
 	}
 
 	return NULL;
 #undef N
 }
 
 /*
  * searching SAD with direction, protocol, mode and state.
  * called by key_allocsa_policy().
  * OUT:
  *	NULL	: not found
  *	others	: found, pointer to a SA.
  */
 static struct secasvar *
 key_do_allocsa_policy(struct secashead *sah, u_int state)
 {
 	struct secasvar *sav, *nextsav, *candidate, *d;
 
 	/* initilize */
 	candidate = NULL;
 
 	SAHTREE_LOCK();
 	for (sav = LIST_FIRST(&sah->savtree[state]);
 	     sav != NULL;
 	     sav = nextsav) {
 
 		nextsav = LIST_NEXT(sav, chain);
 
 		/* sanity check */
 		KEY_CHKSASTATE(sav->state, state, __func__);
 
 		/* initialize */
 		if (candidate == NULL) {
 			candidate = sav;
 			continue;
 		}
 
 		/* Which SA is the better ? */
 
 		IPSEC_ASSERT(candidate->lft_c != NULL,
 			("null candidate lifetime"));
 		IPSEC_ASSERT(sav->lft_c != NULL, ("null sav lifetime"));
 
 		/* What the best method is to compare ? */
 		if (V_key_preferred_oldsa) {
 			if (candidate->lft_c->addtime >
 					sav->lft_c->addtime) {
 				candidate = sav;
 			}
 			continue;
 			/*NOTREACHED*/
 		}
 
 		/* preferred new sa rather than old sa */
 		if (candidate->lft_c->addtime <
 				sav->lft_c->addtime) {
 			d = candidate;
 			candidate = sav;
 		} else
 			d = sav;
 
 		/*
 		 * prepared to delete the SA when there is more
 		 * suitable candidate and the lifetime of the SA is not
 		 * permanent.
 		 */
 		if (d->lft_h->addtime != 0) {
 			struct mbuf *m, *result;
 			u_int8_t satype;
 
 			key_sa_chgstate(d, SADB_SASTATE_DEAD);
 
 			IPSEC_ASSERT(d->refcnt > 0, ("bogus ref count"));
 
 			satype = key_proto2satype(d->sah->saidx.proto);
 			if (satype == 0)
 				goto msgfail;
 
 			m = key_setsadbmsg(SADB_DELETE, 0,
 			    satype, 0, 0, d->refcnt - 1);
 			if (!m)
 				goto msgfail;
 			result = m;
 
 			/* set sadb_address for saidx's. */
 			m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 				&d->sah->saidx.src.sa,
 				d->sah->saidx.src.sa.sa_len << 3,
 				IPSEC_ULPROTO_ANY);
 			if (!m)
 				goto msgfail;
 			m_cat(result, m);
 
 			/* set sadb_address for saidx's. */
 			m = key_setsadbaddr(SADB_EXT_ADDRESS_DST,
 				&d->sah->saidx.dst.sa,
 				d->sah->saidx.dst.sa.sa_len << 3,
 				IPSEC_ULPROTO_ANY);
 			if (!m)
 				goto msgfail;
 			m_cat(result, m);
 
 			/* create SA extension */
 			m = key_setsadbsa(d);
 			if (!m)
 				goto msgfail;
 			m_cat(result, m);
 
 			if (result->m_len < sizeof(struct sadb_msg)) {
 				result = m_pullup(result,
 						sizeof(struct sadb_msg));
 				if (result == NULL)
 					goto msgfail;
 			}
 
 			result->m_pkthdr.len = 0;
 			for (m = result; m; m = m->m_next)
 				result->m_pkthdr.len += m->m_len;
 			mtod(result, struct sadb_msg *)->sadb_msg_len =
 				PFKEY_UNIT64(result->m_pkthdr.len);
 
 			if (key_sendup_mbuf(NULL, result,
 					KEY_SENDUP_REGISTERED))
 				goto msgfail;
 		 msgfail:
 			KEY_FREESAV(&d);
 		}
 	}
 	if (candidate) {
 		sa_addref(candidate);
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP %s cause refcnt++:%d SA:%p\n",
 				__func__, candidate->refcnt, candidate));
 	}
 	SAHTREE_UNLOCK();
 
 	return candidate;
 }
 
 /*
  * allocating a usable SA entry for a *INBOUND* packet.
  * Must call key_freesav() later.
  * OUT: positive:	pointer to a usable sav (i.e. MATURE or DYING state).
  *	NULL:		not found, or error occured.
  *
  * In the comparison, no source address is used--for RFC2401 conformance.
  * To quote, from section 4.1:
  *	A security association is uniquely identified by a triple consisting
  *	of a Security Parameter Index (SPI), an IP Destination Address, and a
  *	security protocol (AH or ESP) identifier.
  * Note that, however, we do need to keep source address in IPsec SA.
  * IKE specification and PF_KEY specification do assume that we
  * keep source address in IPsec SA.  We see a tricky situation here.
  */
 struct secasvar *
 key_allocsa(union sockaddr_union *dst, u_int proto, u_int32_t spi,
     const char* where, int tag)
 {
 	struct secashead *sah;
 	struct secasvar *sav;
 	u_int stateidx, arraysize, state;
 	const u_int *saorder_state_valid;
 #ifdef IPSEC_NAT_T
 	int natt_chkport;
 #endif
 
 	IPSEC_ASSERT(dst != NULL, ("null dst address"));
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u\n", __func__, where, tag));
 
 #ifdef IPSEC_NAT_T
         natt_chkport = (dst->sa.sa_family == AF_INET &&
 	    dst->sa.sa_len == sizeof(struct sockaddr_in) &&
 	    dst->sin.sin_port != 0);
 #endif
 
 	/*
 	 * searching SAD.
 	 * XXX: to be checked internal IP header somewhere.  Also when
 	 * IPsec tunnel packet is received.  But ESP tunnel mode is
 	 * encrypted so we can't check internal IP header.
 	 */
 	SAHTREE_LOCK();
 	if (V_key_preferred_oldsa) {
 		saorder_state_valid = saorder_state_valid_prefer_old;
 		arraysize = _ARRAYLEN(saorder_state_valid_prefer_old);
 	} else {
 		saorder_state_valid = saorder_state_valid_prefer_new;
 		arraysize = _ARRAYLEN(saorder_state_valid_prefer_new);
 	}
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		int checkport;
 
 		/* search valid state */
 		for (stateidx = 0; stateidx < arraysize; stateidx++) {
 			state = saorder_state_valid[stateidx];
 			LIST_FOREACH(sav, &sah->savtree[state], chain) {
 				/* sanity check */
 				KEY_CHKSASTATE(sav->state, state, __func__);
 				/* do not return entries w/ unusable state */
 				if (sav->state != SADB_SASTATE_MATURE &&
 				    sav->state != SADB_SASTATE_DYING)
 					continue;
 				if (proto != sav->sah->saidx.proto)
 					continue;
 				if (spi != sav->spi)
 					continue;
 				checkport = 0;
 #ifdef IPSEC_NAT_T
 				/*
 				 * Really only check ports when this is a NAT-T
 				 * SA.  Otherwise other lookups providing ports
 				 * might suffer.
 				 */
 				if (sav->natt_type && natt_chkport)
 					checkport = 1;
 #endif
 #if 0	/* don't check src */
 				/* check src address */
 				if (key_sockaddrcmp(&src->sa,	
 				    &sav->sah->saidx.src.sa, checkport) != 0)
 					continue;
 #endif
 				/* check dst address */
 				if (key_sockaddrcmp(&dst->sa,
 				    &sav->sah->saidx.dst.sa, checkport) != 0)
 					continue;
 				sa_addref(sav);
 				goto done;
 			}
 		}
 	}
 	sav = NULL;
 done:
 	SAHTREE_UNLOCK();
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s return SA:%p; refcnt %u\n", __func__,
 			sav, sav ? sav->refcnt : 0));
 	return sav;
 }
 
 /*
  * Must be called after calling key_allocsp().
  * For both the packet without socket and key_freeso().
  */
 void
 _key_freesp(struct secpolicy **spp, const char* where, int tag)
 {
 	struct ipsecrequest *isr, *nextisr;
 	struct secpolicy *sp = *spp;
 
 	IPSEC_ASSERT(sp != NULL, ("null sp"));
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s SP:%p (ID=%u) from %s:%u; refcnt now %u\n",
 			__func__, sp, sp->id, where, tag, sp->refcnt));
 
 	if (SP_DELREF(sp) == 0)
 		return;
 	*spp = NULL;
 	for (isr = sp->req; isr != NULL; isr = nextisr) {
 		if (isr->sav != NULL) {
 			KEY_FREESAV(&isr->sav);
 			isr->sav = NULL;
 		}
 		nextisr = isr->next;
 		ipsec_delisr(isr);
 	}
 	free(sp, M_IPSEC_SP);
 }
 
 static void
 key_unlink(struct secpolicy *sp)
 {
 
 	IPSEC_ASSERT(sp != NULL, ("null sp"));
 	IPSEC_ASSERT(sp->spidx.dir == IPSEC_DIR_INBOUND ||
 	    sp->spidx.dir == IPSEC_DIR_OUTBOUND,
 	    ("invalid direction %u", sp->spidx.dir));
 	SPTREE_UNLOCK_ASSERT();
 
 	SPTREE_WLOCK();
 	if (sp->state == IPSEC_SPSTATE_DEAD) {
 		SPTREE_WUNLOCK();
 		return;
 	}
 	sp->state = IPSEC_SPSTATE_DEAD;
 	TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain);
 	SPTREE_WUNLOCK();
 	KEY_FREESP(&sp);
 }
 
 /*
+ * insert a secpolicy into the SP database. Lower priorities first
+ */
+static void
+key_insertsp(struct secpolicy *newsp)
+{
+	struct secpolicy *sp;
+
+	SPTREE_WLOCK();
+	TAILQ_FOREACH(sp, &V_sptree[newsp->spidx.dir], chain) {
+		if (newsp->priority < sp->priority) {
+			TAILQ_INSERT_BEFORE(sp, newsp, chain);
+			goto done;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain);
+
+done:
+	newsp->state = IPSEC_SPSTATE_ALIVE;
+	SPTREE_WUNLOCK();
+}
+
+/*
  * Must be called after calling key_allocsp().
  * For the packet with socket.
  */
 void
 key_freeso(struct socket *so)
 {
 	IPSEC_ASSERT(so != NULL, ("null so"));
 
 	switch (so->so_proto->pr_domain->dom_family) {
 #if defined(INET) || defined(INET6)
 #ifdef INET
 	case PF_INET:
 #endif
 #ifdef INET6
 	case PF_INET6:
 #endif
 	    {
 		struct inpcb *pcb = sotoinpcb(so);
 
 		/* Does it have a PCB ? */
 		if (pcb == NULL)
 			return;
 		key_freesp_so(&pcb->inp_sp->sp_in);
 		key_freesp_so(&pcb->inp_sp->sp_out);
 	    }
 		break;
 #endif /* INET || INET6 */
 	default:
 		ipseclog((LOG_DEBUG, "%s: unknown address family=%d.\n",
 		    __func__, so->so_proto->pr_domain->dom_family));
 		return;
 	}
 }
 
 static void
 key_freesp_so(struct secpolicy **sp)
 {
 	IPSEC_ASSERT(sp != NULL && *sp != NULL, ("null sp"));
 
 	if ((*sp)->policy == IPSEC_POLICY_ENTRUST ||
 	    (*sp)->policy == IPSEC_POLICY_BYPASS)
 		return;
 
 	IPSEC_ASSERT((*sp)->policy == IPSEC_POLICY_IPSEC,
 		("invalid policy %u", (*sp)->policy));
 	KEY_FREESP(sp);
 }
 
 void
 key_addrefsa(struct secasvar *sav, const char* where, int tag)
 {
 
 	IPSEC_ASSERT(sav != NULL, ("null sav"));
 	IPSEC_ASSERT(sav->refcnt > 0, ("refcount must exist"));
 
 	sa_addref(sav);
 }
 
 /*
  * Must be called after calling key_allocsa().
  * This function is called by key_freesp() to free some SA allocated
  * for a policy.
  */
 void
 key_freesav(struct secasvar **psav, const char* where, int tag)
 {
 	struct secasvar *sav = *psav;
 
 	IPSEC_ASSERT(sav != NULL, ("null sav"));
 
 	if (sa_delref(sav)) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n",
 				__func__, sav, ntohl(sav->spi), where, tag, sav->refcnt));
 		*psav = NULL;
 		key_delsav(sav);
 	} else {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n",
 				__func__, sav, ntohl(sav->spi), where, tag, sav->refcnt));
 	}
 }
 
 /* %%% SPD management */
 /*
  * search SPD
  * OUT:	NULL	: not found
  *	others	: found, pointer to a SP.
  */
 static struct secpolicy *
 key_getsp(struct secpolicyindex *spidx)
 {
 	SPTREE_RLOCK_TRACKER;
 	struct secpolicy *sp;
 
 	IPSEC_ASSERT(spidx != NULL, ("null spidx"));
 
 	SPTREE_RLOCK();
 	TAILQ_FOREACH(sp, &V_sptree[spidx->dir], chain) {
 		if (key_cmpspidx_exactly(spidx, &sp->spidx)) {
 			SP_ADDREF(sp);
 			break;
 		}
 	}
 	SPTREE_RUNLOCK();
 
 	return sp;
 }
 
 /*
  * get SP by index.
  * OUT:	NULL	: not found
  *	others	: found, pointer to a SP.
  */
 static struct secpolicy *
 key_getspbyid(u_int32_t id)
 {
 	SPTREE_RLOCK_TRACKER;
 	struct secpolicy *sp;
 
 	SPTREE_RLOCK();
 	TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_INBOUND], chain) {
 		if (sp->id == id) {
 			SP_ADDREF(sp);
 			goto done;
 		}
 	}
 
 	TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_OUTBOUND], chain) {
 		if (sp->id == id) {
 			SP_ADDREF(sp);
 			goto done;
 		}
 	}
 done:
 	SPTREE_RUNLOCK();
 
 	return sp;
 }
 
 struct secpolicy *
 key_newsp(const char* where, int tag)
 {
 	struct secpolicy *newsp = NULL;
 
 	newsp = (struct secpolicy *)
 		malloc(sizeof(struct secpolicy), M_IPSEC_SP, M_NOWAIT|M_ZERO);
 	if (newsp)
 		refcount_init(&newsp->refcnt, 1);
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u return SP:%p\n", __func__,
 			where, tag, newsp));
 	return newsp;
 }
 
 /*
  * create secpolicy structure from sadb_x_policy structure.
  * NOTE: `state', `secpolicyindex' in secpolicy structure are not set,
  * so must be set properly later.
  */
 struct secpolicy *
 key_msg2sp(struct sadb_x_policy *xpl0, size_t len, int *error)
 {
 	struct secpolicy *newsp;
 
 	IPSEC_ASSERT(xpl0 != NULL, ("null xpl0"));
 	IPSEC_ASSERT(len >= sizeof(*xpl0), ("policy too short: %zu", len));
 
 	if (len != PFKEY_EXTLEN(xpl0)) {
 		ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n", __func__));
 		*error = EINVAL;
 		return NULL;
 	}
 
 	if ((newsp = KEY_NEWSP()) == NULL) {
 		*error = ENOBUFS;
 		return NULL;
 	}
 
 	newsp->spidx.dir = xpl0->sadb_x_policy_dir;
 	newsp->policy = xpl0->sadb_x_policy_type;
+	newsp->priority = xpl0->sadb_x_policy_priority;
 
 	/* check policy */
 	switch (xpl0->sadb_x_policy_type) {
 	case IPSEC_POLICY_DISCARD:
 	case IPSEC_POLICY_NONE:
 	case IPSEC_POLICY_ENTRUST:
 	case IPSEC_POLICY_BYPASS:
 		newsp->req = NULL;
 		break;
 
 	case IPSEC_POLICY_IPSEC:
 	    {
 		int tlen;
 		struct sadb_x_ipsecrequest *xisr;
 		struct ipsecrequest **p_isr = &newsp->req;
 
 		/* validity check */
 		if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) {
 			ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n",
 				__func__));
 			KEY_FREESP(&newsp);
 			*error = EINVAL;
 			return NULL;
 		}
 
 		tlen = PFKEY_EXTLEN(xpl0) - sizeof(*xpl0);
 		xisr = (struct sadb_x_ipsecrequest *)(xpl0 + 1);
 
 		while (tlen > 0) {
 			/* length check */
 			if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) {
 				ipseclog((LOG_DEBUG, "%s: invalid ipsecrequest "
 					"length.\n", __func__));
 				KEY_FREESP(&newsp);
 				*error = EINVAL;
 				return NULL;
 			}
 
 			/* allocate request buffer */
 			/* NB: data structure is zero'd */
 			*p_isr = ipsec_newisr();
 			if ((*p_isr) == NULL) {
 				ipseclog((LOG_DEBUG,
 				    "%s: No more memory.\n", __func__));
 				KEY_FREESP(&newsp);
 				*error = ENOBUFS;
 				return NULL;
 			}
 
 			/* set values */
 			switch (xisr->sadb_x_ipsecrequest_proto) {
 			case IPPROTO_ESP:
 			case IPPROTO_AH:
 			case IPPROTO_IPCOMP:
 				break;
 			default:
 				ipseclog((LOG_DEBUG,
 				    "%s: invalid proto type=%u\n", __func__,
 				    xisr->sadb_x_ipsecrequest_proto));
 				KEY_FREESP(&newsp);
 				*error = EPROTONOSUPPORT;
 				return NULL;
 			}
 			(*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto;
 
 			switch (xisr->sadb_x_ipsecrequest_mode) {
 			case IPSEC_MODE_TRANSPORT:
 			case IPSEC_MODE_TUNNEL:
 				break;
 			case IPSEC_MODE_ANY:
 			default:
 				ipseclog((LOG_DEBUG,
 				    "%s: invalid mode=%u\n", __func__,
 				    xisr->sadb_x_ipsecrequest_mode));
 				KEY_FREESP(&newsp);
 				*error = EINVAL;
 				return NULL;
 			}
 			(*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode;
 
 			switch (xisr->sadb_x_ipsecrequest_level) {
 			case IPSEC_LEVEL_DEFAULT:
 			case IPSEC_LEVEL_USE:
 			case IPSEC_LEVEL_REQUIRE:
 				break;
 			case IPSEC_LEVEL_UNIQUE:
 				/* validity check */
 				/*
 				 * If range violation of reqid, kernel will
 				 * update it, don't refuse it.
 				 */
 				if (xisr->sadb_x_ipsecrequest_reqid
 						> IPSEC_MANUAL_REQID_MAX) {
 					ipseclog((LOG_DEBUG,
 					    "%s: reqid=%d range "
 					    "violation, updated by kernel.\n",
 					    __func__,
 					    xisr->sadb_x_ipsecrequest_reqid));
 					xisr->sadb_x_ipsecrequest_reqid = 0;
 				}
 
 				/* allocate new reqid id if reqid is zero. */
 				if (xisr->sadb_x_ipsecrequest_reqid == 0) {
 					u_int32_t reqid;
 					if ((reqid = key_newreqid()) == 0) {
 						KEY_FREESP(&newsp);
 						*error = ENOBUFS;
 						return NULL;
 					}
 					(*p_isr)->saidx.reqid = reqid;
 					xisr->sadb_x_ipsecrequest_reqid = reqid;
 				} else {
 				/* set it for manual keying. */
 					(*p_isr)->saidx.reqid =
 						xisr->sadb_x_ipsecrequest_reqid;
 				}
 				break;
 
 			default:
 				ipseclog((LOG_DEBUG, "%s: invalid level=%u\n",
 					__func__,
 					xisr->sadb_x_ipsecrequest_level));
 				KEY_FREESP(&newsp);
 				*error = EINVAL;
 				return NULL;
 			}
 			(*p_isr)->level = xisr->sadb_x_ipsecrequest_level;
 
 			/* set IP addresses if there */
 			if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) {
 				struct sockaddr *paddr;
 
 				paddr = (struct sockaddr *)(xisr + 1);
 
 				/* validity check */
 				if (paddr->sa_len
 				    > sizeof((*p_isr)->saidx.src)) {
 					ipseclog((LOG_DEBUG, "%s: invalid "
 						"request address length.\n",
 						__func__));
 					KEY_FREESP(&newsp);
 					*error = EINVAL;
 					return NULL;
 				}
 				bcopy(paddr, &(*p_isr)->saidx.src,
 					paddr->sa_len);
 
 				paddr = (struct sockaddr *)((caddr_t)paddr
 							+ paddr->sa_len);
 
 				/* validity check */
 				if (paddr->sa_len
 				    > sizeof((*p_isr)->saidx.dst)) {
 					ipseclog((LOG_DEBUG, "%s: invalid "
 						"request address length.\n",
 						__func__));
 					KEY_FREESP(&newsp);
 					*error = EINVAL;
 					return NULL;
 				}
 				bcopy(paddr, &(*p_isr)->saidx.dst,
 					paddr->sa_len);
 			}
 
 			(*p_isr)->sp = newsp;
 
 			/* initialization for the next. */
 			p_isr = &(*p_isr)->next;
 			tlen -= xisr->sadb_x_ipsecrequest_len;
 
 			/* validity check */
 			if (tlen < 0) {
 				ipseclog((LOG_DEBUG, "%s: becoming tlen < 0.\n",
 					__func__));
 				KEY_FREESP(&newsp);
 				*error = EINVAL;
 				return NULL;
 			}
 
 			xisr = (struct sadb_x_ipsecrequest *)((caddr_t)xisr
 			                 + xisr->sadb_x_ipsecrequest_len);
 		}
 	    }
 		break;
 	default:
 		ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__));
 		KEY_FREESP(&newsp);
 		*error = EINVAL;
 		return NULL;
 	}
 
 	*error = 0;
 	return newsp;
 }
 
 static u_int32_t
 key_newreqid()
 {
 	static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1;
 
 	auto_reqid = (auto_reqid == ~0
 			? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1);
 
 	/* XXX should be unique check */
 
 	return auto_reqid;
 }
 
 /*
  * copy secpolicy struct to sadb_x_policy structure indicated.
  */
 struct mbuf *
 key_sp2msg(struct secpolicy *sp)
 {
 	struct sadb_x_policy *xpl;
 	int tlen;
 	caddr_t p;
 	struct mbuf *m;
 
 	IPSEC_ASSERT(sp != NULL, ("null policy"));
 
 	tlen = key_getspreqmsglen(sp);
 
 	m = m_get2(tlen, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return (NULL);
 	m_align(m, tlen);
 	m->m_len = tlen;
 	xpl = mtod(m, struct sadb_x_policy *);
 	bzero(xpl, tlen);
 
 	xpl->sadb_x_policy_len = PFKEY_UNIT64(tlen);
 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	xpl->sadb_x_policy_type = sp->policy;
 	xpl->sadb_x_policy_dir = sp->spidx.dir;
 	xpl->sadb_x_policy_id = sp->id;
+	xpl->sadb_x_policy_priority = sp->priority;
 	p = (caddr_t)xpl + sizeof(*xpl);
 
 	/* if is the policy for ipsec ? */
 	if (sp->policy == IPSEC_POLICY_IPSEC) {
 		struct sadb_x_ipsecrequest *xisr;
 		struct ipsecrequest *isr;
 
 		for (isr = sp->req; isr != NULL; isr = isr->next) {
 
 			xisr = (struct sadb_x_ipsecrequest *)p;
 
 			xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto;
 			xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode;
 			xisr->sadb_x_ipsecrequest_level = isr->level;
 			xisr->sadb_x_ipsecrequest_reqid = isr->saidx.reqid;
 
 			p += sizeof(*xisr);
 			bcopy(&isr->saidx.src, p, isr->saidx.src.sa.sa_len);
 			p += isr->saidx.src.sa.sa_len;
 			bcopy(&isr->saidx.dst, p, isr->saidx.dst.sa.sa_len);
 			p += isr->saidx.src.sa.sa_len;
 
 			xisr->sadb_x_ipsecrequest_len =
 				PFKEY_ALIGN8(sizeof(*xisr)
 					+ isr->saidx.src.sa.sa_len
 					+ isr->saidx.dst.sa.sa_len);
 		}
 	}
 
 	return m;
 }
 
 /* m will not be freed nor modified */
 static struct mbuf *
 key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp,
     int ndeep, int nitem, ...)
 {
 	va_list ap;
 	int idx;
 	int i;
 	struct mbuf *result = NULL, *n;
 	int len;
 
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 
 	va_start(ap, nitem);
 	for (i = 0; i < nitem; i++) {
 		idx = va_arg(ap, int);
 		if (idx < 0 || idx > SADB_EXT_MAX)
 			goto fail;
 		/* don't attempt to pull empty extension */
 		if (idx == SADB_EXT_RESERVED && mhp->msg == NULL)
 			continue;
 		if (idx != SADB_EXT_RESERVED  &&
 		    (mhp->ext[idx] == NULL || mhp->extlen[idx] == 0))
 			continue;
 
 		if (idx == SADB_EXT_RESERVED) {
 			len = PFKEY_ALIGN8(sizeof(struct sadb_msg));
 
 			IPSEC_ASSERT(len <= MHLEN, ("header too big %u", len));
 
 			MGETHDR(n, M_NOWAIT, MT_DATA);
 			if (!n)
 				goto fail;
 			n->m_len = len;
 			n->m_next = NULL;
 			m_copydata(m, 0, sizeof(struct sadb_msg),
 			    mtod(n, caddr_t));
 		} else if (i < ndeep) {
 			len = mhp->extlen[idx];
 			n = m_get2(len, M_NOWAIT, MT_DATA, 0);
 			if (n == NULL)
 				goto fail;
 			m_align(n, len);
 			n->m_len = len;
 			m_copydata(m, mhp->extoff[idx], mhp->extlen[idx],
 			    mtod(n, caddr_t));
 		} else {
 			n = m_copym(m, mhp->extoff[idx], mhp->extlen[idx],
 			    M_NOWAIT);
 		}
 		if (n == NULL)
 			goto fail;
 
 		if (result)
 			m_cat(result, n);
 		else
 			result = n;
 	}
 	va_end(ap);
 
 	if ((result->m_flags & M_PKTHDR) != 0) {
 		result->m_pkthdr.len = 0;
 		for (n = result; n; n = n->m_next)
 			result->m_pkthdr.len += n->m_len;
 	}
 
 	return result;
 
 fail:
 	m_freem(result);
 	va_end(ap);
 	return NULL;
 }
 
 /*
  * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing
  * add an entry to SP database, when received
  *   <base, address(SD), (lifetime(H),) policy>
  * from the user(?).
  * Adding to SP database,
  * and send
  *   <base, address(SD), (lifetime(H),) policy>
  * to the socket which was send.
  *
  * SPDADD set a unique policy entry.
  * SPDSETIDX like SPDADD without a part of policy requests.
  * SPDUPDATE replace a unique policy entry.
  *
  * m will always be freed.
  */
 static int
 key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct sadb_address *src0, *dst0;
 	struct sadb_x_policy *xpl0, *xpl;
 	struct sadb_lifetime *lft = NULL;
 	struct secpolicyindex spidx;
 	struct secpolicy *newsp;
 	int error;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
 	    mhp->ext[SADB_X_EXT_POLICY] == NULL) {
 		ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n"));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL) {
 		if (mhp->extlen[SADB_EXT_LIFETIME_HARD]
 			< sizeof(struct sadb_lifetime)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 				__func__));
 			return key_senderror(so, m, EINVAL);
 		}
 		lft = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD];
 	}
 
 	src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 	xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
 
 	/* 
 	 * Note: do not parse SADB_X_EXT_NAT_T_* here:
 	 * we are processing traffic endpoints.
 	 */
 
 	/* make secindex */
 	/* XXX boundary check against sa_len */
 	KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir,
 	                src0 + 1,
 	                dst0 + 1,
 	                src0->sadb_address_prefixlen,
 	                dst0->sadb_address_prefixlen,
 	                src0->sadb_address_proto,
 	                &spidx);
 
 	/* checking the direciton. */
 	switch (xpl0->sadb_x_policy_dir) {
 	case IPSEC_DIR_INBOUND:
 	case IPSEC_DIR_OUTBOUND:
 		break;
 	default:
 		ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__));
 		mhp->msg->sadb_msg_errno = EINVAL;
 		return 0;
 	}
 
 	/* check policy */
 	/* key_spdadd() accepts DISCARD, NONE and IPSEC. */
 	if (xpl0->sadb_x_policy_type == IPSEC_POLICY_ENTRUST
 	 || xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) {
 		ipseclog((LOG_DEBUG, "%s: Invalid policy type.\n", __func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/* policy requests are mandatory when action is ipsec. */
         if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX
 	 && xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC
 	 && mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) {
 		ipseclog((LOG_DEBUG, "%s: some policy requests part required\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/*
 	 * checking there is SP already or not.
 	 * SPDUPDATE doesn't depend on whether there is a SP or not.
 	 * If the type is either SPDADD or SPDSETIDX AND a SP is found,
 	 * then error.
 	 */
 	newsp = key_getsp(&spidx);
 	if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) {
 		if (newsp) {
 			key_unlink(newsp);
 			KEY_FREESP(&newsp);
 		}
 	} else {
 		if (newsp != NULL) {
 			KEY_FREESP(&newsp);
 			ipseclog((LOG_DEBUG, "%s: a SP entry exists already.\n",
 				__func__));
 			return key_senderror(so, m, EEXIST);
 		}
 	}
 
 	/* XXX: there is race between key_getsp and key_msg2sp. */
 
 	/* allocation new SP entry */
 	if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) {
 		return key_senderror(so, m, error);
 	}
 
 	if ((newsp->id = key_getnewspid()) == 0) {
 		KEY_FREESP(&newsp);
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir,
 	                src0 + 1,
 	                dst0 + 1,
 	                src0->sadb_address_prefixlen,
 	                dst0->sadb_address_prefixlen,
 	                src0->sadb_address_proto,
 	                &newsp->spidx);
 
 	/* sanity check on addr pair */
 	if (((struct sockaddr *)(src0 + 1))->sa_family !=
 			((struct sockaddr *)(dst0+ 1))->sa_family) {
 		KEY_FREESP(&newsp);
 		return key_senderror(so, m, EINVAL);
 	}
 	if (((struct sockaddr *)(src0 + 1))->sa_len !=
 			((struct sockaddr *)(dst0+ 1))->sa_len) {
 		KEY_FREESP(&newsp);
 		return key_senderror(so, m, EINVAL);
 	}
 #if 1
 	if (newsp->req && newsp->req->saidx.src.sa.sa_family &&
 	    newsp->req->saidx.dst.sa.sa_family) {
 		if (newsp->req->saidx.src.sa.sa_family !=
 		    newsp->req->saidx.dst.sa.sa_family) {
 			KEY_FREESP(&newsp);
 			return key_senderror(so, m, EINVAL);
 		}
 	}
 #endif
 
 	newsp->created = time_second;
 	newsp->lastused = newsp->created;
 	newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0;
 	newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0;
 
-	SPTREE_WLOCK();
-	TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain);
-	newsp->state = IPSEC_SPSTATE_ALIVE;
-	SPTREE_WUNLOCK();
+	key_insertsp(newsp);
 
 	/* delete the entry in spacqtree */
 	if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) {
 		struct secspacq *spacq = key_getspacq(&spidx);
 		if (spacq != NULL) {
 			/* reset counter in order to deletion by timehandler. */
 			spacq->created = time_second;
 			spacq->count = 0;
 			SPACQ_UNLOCK();
 		}
     	}
 
     {
 	struct mbuf *n, *mpolicy;
 	struct sadb_msg *newmsg;
 	int off;
 
 	/*
 	 * Note: do not send SADB_X_EXT_NAT_T_* here:
 	 * we are sending traffic endpoints.
 	 */
 
 	/* create new sadb_msg to reply. */
 	if (lft) {
 		n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED,
 		    SADB_X_EXT_POLICY, SADB_EXT_LIFETIME_HARD,
 		    SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
 	} else {
 		n = key_gather_mbuf(m, mhp, 2, 4, SADB_EXT_RESERVED,
 		    SADB_X_EXT_POLICY,
 		    SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
 	}
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	if (n->m_len < sizeof(*newmsg)) {
 		n = m_pullup(n, sizeof(*newmsg));
 		if (!n)
 			return key_senderror(so, m, ENOBUFS);
 	}
 	newmsg = mtod(n, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len);
 
 	off = 0;
 	mpolicy = m_pulldown(n, PFKEY_ALIGN8(sizeof(struct sadb_msg)),
 	    sizeof(*xpl), &off);
 	if (mpolicy == NULL) {
 		/* n is already freed */
 		return key_senderror(so, m, ENOBUFS);
 	}
 	xpl = (struct sadb_x_policy *)(mtod(mpolicy, caddr_t) + off);
 	if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) {
 		m_freem(n);
 		return key_senderror(so, m, EINVAL);
 	}
 	xpl->sadb_x_policy_id = newsp->id;
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
     }
 }
 
 /*
  * get new policy id.
  * OUT:
  *	0:	failure.
  *	others: success.
  */
 static u_int32_t
 key_getnewspid()
 {
 	u_int32_t newid = 0;
 	int count = V_key_spi_trycnt;	/* XXX */
 	struct secpolicy *sp;
 
 	/* when requesting to allocate spi ranged */
 	while (count--) {
 		newid = (V_policy_id = (V_policy_id == ~0 ? 1 : V_policy_id + 1));
 
 		if ((sp = key_getspbyid(newid)) == NULL)
 			break;
 
 		KEY_FREESP(&sp);
 	}
 
 	if (count == 0 || newid == 0) {
 		ipseclog((LOG_DEBUG, "%s: to allocate policy id is failed.\n",
 			__func__));
 		return 0;
 	}
 
 	return newid;
 }
 
 /*
  * SADB_SPDDELETE processing
  * receive
  *   <base, address(SD), policy(*)>
  * from the user(?), and set SADB_SASTATE_DEAD,
  * and send,
  *   <base, address(SD), policy(*)>
  * to the ikmpd.
  * policy(*) including direction of policy.
  *
  * m will always be freed.
  */
 static int
 key_spddelete(struct socket *so, struct mbuf *m,
     const struct sadb_msghdr *mhp)
 {
 	struct sadb_address *src0, *dst0;
 	struct sadb_x_policy *xpl0;
 	struct secpolicyindex spidx;
 	struct secpolicy *sp;
 
 	IPSEC_ASSERT(so != NULL, ("null so"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
 	    mhp->ext[SADB_X_EXT_POLICY] == NULL) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 	xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
 
 	/*
 	 * Note: do not parse SADB_X_EXT_NAT_T_* here:
 	 * we are processing traffic endpoints.
 	 */
 
 	/* make secindex */
 	/* XXX boundary check against sa_len */
 	KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir,
 	                src0 + 1,
 	                dst0 + 1,
 	                src0->sadb_address_prefixlen,
 	                dst0->sadb_address_prefixlen,
 	                src0->sadb_address_proto,
 	                &spidx);
 
 	/* checking the direciton. */
 	switch (xpl0->sadb_x_policy_dir) {
 	case IPSEC_DIR_INBOUND:
 	case IPSEC_DIR_OUTBOUND:
 		break;
 	default:
 		ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/* Is there SP in SPD ? */
 	if ((sp = key_getsp(&spidx)) == NULL) {
 		ipseclog((LOG_DEBUG, "%s: no SP found.\n", __func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/* save policy id to buffer to be returned. */
 	xpl0->sadb_x_policy_id = sp->id;
 
 	key_unlink(sp);
 	KEY_FREESP(&sp);
 
     {
 	struct mbuf *n;
 	struct sadb_msg *newmsg;
 
 	/*
 	 * Note: do not send SADB_X_EXT_NAT_T_* here:
 	 * we are sending traffic endpoints.
 	 */
 
 	/* create new sadb_msg to reply. */
 	n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED,
 	    SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	newmsg = mtod(n, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len);
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
     }
 }
 
 /*
  * SADB_SPDDELETE2 processing
  * receive
  *   <base, policy(*)>
  * from the user(?), and set SADB_SASTATE_DEAD,
  * and send,
  *   <base, policy(*)>
  * to the ikmpd.
  * policy(*) including direction of policy.
  *
  * m will always be freed.
  */
 static int
 key_spddelete2(struct socket *so, struct mbuf *m,
     const struct sadb_msghdr *mhp)
 {
 	u_int32_t id;
 	struct secpolicy *sp;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	if (mhp->ext[SADB_X_EXT_POLICY] == NULL ||
 	    mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id;
 
 	/* Is there SP in SPD ? */
 	if ((sp = key_getspbyid(id)) == NULL) {
 		ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	key_unlink(sp);
 	KEY_FREESP(&sp);
 
     {
 	struct mbuf *n, *nn;
 	struct sadb_msg *newmsg;
 	int off, len;
 
 	/* create new sadb_msg to reply. */
 	len = PFKEY_ALIGN8(sizeof(struct sadb_msg));
 
 	MGETHDR(n, M_NOWAIT, MT_DATA);
 	if (n && len > MHLEN) {
 		if (!(MCLGET(n, M_NOWAIT))) {
 			m_freem(n);
 			n = NULL;
 		}
 	}
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	n->m_len = len;
 	n->m_next = NULL;
 	off = 0;
 
 	m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off);
 	off += PFKEY_ALIGN8(sizeof(struct sadb_msg));
 
 	IPSEC_ASSERT(off == len, ("length inconsistency (off %u len %u)",
 		off, len));
 
 	n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY],
 	    mhp->extlen[SADB_X_EXT_POLICY], M_NOWAIT);
 	if (!n->m_next) {
 		m_freem(n);
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	n->m_pkthdr.len = 0;
 	for (nn = n; nn; nn = nn->m_next)
 		n->m_pkthdr.len += nn->m_len;
 
 	newmsg = mtod(n, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len);
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
     }
 }
 
 /*
  * SADB_X_SPDGET processing
  * receive
  *   <base, policy(*)>
  * from the user(?),
  * and send,
  *   <base, address(SD), policy>
  * to the ikmpd.
  * policy(*) including direction of policy.
  *
  * m will always be freed.
  */
 static int
 key_spdget(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	u_int32_t id;
 	struct secpolicy *sp;
 	struct mbuf *n;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	if (mhp->ext[SADB_X_EXT_POLICY] == NULL ||
 	    mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id;
 
 	/* Is there SP in SPD ? */
 	if ((sp = key_getspbyid(id)) == NULL) {
 		ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id));
 		return key_senderror(so, m, ENOENT);
 	}
 
 	n = key_setdumpsp(sp, SADB_X_SPDGET, mhp->msg->sadb_msg_seq,
 	    mhp->msg->sadb_msg_pid);
 	KEY_FREESP(&sp);
 	if (n != NULL) {
 		m_freem(m);
 		return key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
 	} else
 		return key_senderror(so, m, ENOBUFS);
 }
 
 /*
  * SADB_X_SPDACQUIRE processing.
  * Acquire policy and SA(s) for a *OUTBOUND* packet.
  * send
  *   <base, policy(*)>
  * to KMD, and expect to receive
  *   <base> with SADB_X_SPDACQUIRE if error occured,
  * or
  *   <base, policy>
  * with SADB_X_SPDUPDATE from KMD by PF_KEY.
  * policy(*) is without policy requests.
  *
  *    0     : succeed
  *    others: error number
  */
 int
 key_spdacquire(struct secpolicy *sp)
 {
 	struct mbuf *result = NULL, *m;
 	struct secspacq *newspacq;
 
 	IPSEC_ASSERT(sp != NULL, ("null secpolicy"));
 	IPSEC_ASSERT(sp->req == NULL, ("policy exists"));
 	IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC,
 		("policy not IPSEC %u", sp->policy));
 
 	/* Get an entry to check whether sent message or not. */
 	newspacq = key_getspacq(&sp->spidx);
 	if (newspacq != NULL) {
 		if (V_key_blockacq_count < newspacq->count) {
 			/* reset counter and do send message. */
 			newspacq->count = 0;
 		} else {
 			/* increment counter and do nothing. */
 			newspacq->count++;
 			SPACQ_UNLOCK();
 			return (0);
 		}
 		SPACQ_UNLOCK();
 	} else {
 		/* make new entry for blocking to send SADB_ACQUIRE. */
 		newspacq = key_newspacq(&sp->spidx);
 		if (newspacq == NULL)
 			return ENOBUFS;
 	}
 
 	/* create new sadb_msg to reply. */
 	m = key_setsadbmsg(SADB_X_SPDACQUIRE, 0, 0, 0, 0, 0);
 	if (!m)
 		return ENOBUFS;
 
 	result = m;
 
 	result->m_pkthdr.len = 0;
 	for (m = result; m; m = m->m_next)
 		result->m_pkthdr.len += m->m_len;
 
 	mtod(result, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(result->m_pkthdr.len);
 
 	return key_sendup_mbuf(NULL, m, KEY_SENDUP_REGISTERED);
 }
 
 /*
  * SADB_SPDFLUSH processing
  * receive
  *   <base>
  * from the user, and free all entries in secpctree.
  * and send,
  *   <base>
  * to the user.
  * NOTE: what to do is only marking SADB_SASTATE_DEAD.
  *
  * m will always be freed.
  */
 static int
 key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	TAILQ_HEAD(, secpolicy) drainq;
 	struct sadb_msg *newmsg;
 	struct secpolicy *sp, *nextsp;
 	u_int dir;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	if (m->m_len != PFKEY_ALIGN8(sizeof(struct sadb_msg)))
 		return key_senderror(so, m, EINVAL);
 
 	TAILQ_INIT(&drainq);
 	SPTREE_WLOCK();
 	for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
 		TAILQ_CONCAT(&drainq, &V_sptree[dir], chain);
 	}
 	/*
 	 * We need to set state to DEAD for each policy to be sure,
 	 * that another thread won't try to unlink it.
 	 */
 	TAILQ_FOREACH(sp, &drainq, chain)
 		sp->state = IPSEC_SPSTATE_DEAD;
 	SPTREE_WUNLOCK();
 	sp = TAILQ_FIRST(&drainq);
 	while (sp != NULL) {
 		nextsp = TAILQ_NEXT(sp, chain);
 		KEY_FREESP(&sp);
 		sp = nextsp;
 	}
 
 	if (sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	if (m->m_next)
 		m_freem(m->m_next);
 	m->m_next = NULL;
 	m->m_pkthdr.len = m->m_len = PFKEY_ALIGN8(sizeof(struct sadb_msg));
 	newmsg = mtod(m, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);
 
 	return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
 }
 
 /*
  * SADB_SPDDUMP processing
  * receive
  *   <base>
  * from the user, and dump all SP leaves
  * and send,
  *   <base> .....
  * to the ikmpd.
  *
  * m will always be freed.
  */
 static int
 key_spddump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	SPTREE_RLOCK_TRACKER;
 	struct secpolicy *sp;
 	int cnt;
 	u_int dir;
 	struct mbuf *n;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* search SPD entry and get buffer size. */
 	cnt = 0;
 	SPTREE_RLOCK();
 	for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
 		TAILQ_FOREACH(sp, &V_sptree[dir], chain) {
 			cnt++;
 		}
 	}
 
 	if (cnt == 0) {
 		SPTREE_RUNLOCK();
 		return key_senderror(so, m, ENOENT);
 	}
 
 	for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
 		TAILQ_FOREACH(sp, &V_sptree[dir], chain) {
 			--cnt;
 			n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt,
 			    mhp->msg->sadb_msg_pid);
 
 			if (n)
 				key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
 		}
 	}
 
 	SPTREE_RUNLOCK();
 	m_freem(m);
 	return 0;
 }
 
 static struct mbuf *
 key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq,
     u_int32_t pid)
 {
 	struct mbuf *result = NULL, *m;
 	struct seclifetime lt;
 
 	m = key_setsadbmsg(type, 0, SADB_SATYPE_UNSPEC, seq, pid, sp->refcnt);
 	if (!m)
 		goto fail;
 	result = m;
 
 	/*
 	 * Note: do not send SADB_X_EXT_NAT_T_* here:
 	 * we are sending traffic endpoints.
 	 */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 	    &sp->spidx.src.sa, sp->spidx.prefs,
 	    sp->spidx.ul_proto);
 	if (!m)
 		goto fail;
 	m_cat(result, m);
 
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_DST,
 	    &sp->spidx.dst.sa, sp->spidx.prefd,
 	    sp->spidx.ul_proto);
 	if (!m)
 		goto fail;
 	m_cat(result, m);
 
 	m = key_sp2msg(sp);
 	if (!m)
 		goto fail;
 	m_cat(result, m);
 
 	if(sp->lifetime){
 		lt.addtime=sp->created;
 		lt.usetime= sp->lastused;
 		m = key_setlifetime(&lt, SADB_EXT_LIFETIME_CURRENT);
 		if (!m)
 			goto fail;
 		m_cat(result, m);
 		
 		lt.addtime=sp->lifetime;
 		lt.usetime= sp->validtime;
 		m = key_setlifetime(&lt, SADB_EXT_LIFETIME_HARD);
 		if (!m)
 			goto fail;
 		m_cat(result, m);
 	}
 
 	if ((result->m_flags & M_PKTHDR) == 0)
 		goto fail;
 
 	if (result->m_len < sizeof(struct sadb_msg)) {
 		result = m_pullup(result, sizeof(struct sadb_msg));
 		if (result == NULL)
 			goto fail;
 	}
 
 	result->m_pkthdr.len = 0;
 	for (m = result; m; m = m->m_next)
 		result->m_pkthdr.len += m->m_len;
 
 	mtod(result, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(result->m_pkthdr.len);
 
 	return result;
 
 fail:
 	m_freem(result);
 	return NULL;
 }
 
 /*
  * get PFKEY message length for security policy and request.
  */
 static u_int
 key_getspreqmsglen(struct secpolicy *sp)
 {
 	u_int tlen;
 
 	tlen = sizeof(struct sadb_x_policy);
 
 	/* if is the policy for ipsec ? */
 	if (sp->policy != IPSEC_POLICY_IPSEC)
 		return tlen;
 
 	/* get length of ipsec requests */
     {
 	struct ipsecrequest *isr;
 	int len;
 
 	for (isr = sp->req; isr != NULL; isr = isr->next) {
 		len = sizeof(struct sadb_x_ipsecrequest)
 			+ isr->saidx.src.sa.sa_len
 			+ isr->saidx.dst.sa.sa_len;
 
 		tlen += PFKEY_ALIGN8(len);
 	}
     }
 
 	return tlen;
 }
 
 /*
  * SADB_SPDEXPIRE processing
  * send
  *   <base, address(SD), lifetime(CH), policy>
  * to KMD by PF_KEY.
  *
  * OUT:	0	: succeed
  *	others	: error number
  */
 static int
 key_spdexpire(struct secpolicy *sp)
 {
 	struct mbuf *result = NULL, *m;
 	int len;
 	int error = -1;
 	struct sadb_lifetime *lt;
 
 	/* XXX: Why do we lock ? */
 
 	IPSEC_ASSERT(sp != NULL, ("null secpolicy"));
 
 	/* set msg header */
 	m = key_setsadbmsg(SADB_X_SPDEXPIRE, 0, 0, 0, 0, 0);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	result = m;
 
 	/* create lifetime extension (current and hard) */
 	len = PFKEY_ALIGN8(sizeof(*lt)) * 2;
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_align(m, len);
 	m->m_len = len;
 	bzero(mtod(m, caddr_t), len);
 	lt = mtod(m, struct sadb_lifetime *);
 	lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
 	lt->sadb_lifetime_allocations = 0;
 	lt->sadb_lifetime_bytes = 0;
 	lt->sadb_lifetime_addtime = sp->created;
 	lt->sadb_lifetime_usetime = sp->lastused;
 	lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2);
 	lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
 	lt->sadb_lifetime_allocations = 0;
 	lt->sadb_lifetime_bytes = 0;
 	lt->sadb_lifetime_addtime = sp->lifetime;
 	lt->sadb_lifetime_usetime = sp->validtime;
 	m_cat(result, m);
 
 	/*
 	 * Note: do not send SADB_X_EXT_NAT_T_* here:
 	 * we are sending traffic endpoints.
 	 */
 
 	/* set sadb_address for source */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 	    &sp->spidx.src.sa,
 	    sp->spidx.prefs, sp->spidx.ul_proto);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	/* set sadb_address for destination */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_DST,
 	    &sp->spidx.dst.sa,
 	    sp->spidx.prefd, sp->spidx.ul_proto);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	/* set secpolicy */
 	m = key_sp2msg(sp);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	if ((result->m_flags & M_PKTHDR) == 0) {
 		error = EINVAL;
 		goto fail;
 	}
 
 	if (result->m_len < sizeof(struct sadb_msg)) {
 		result = m_pullup(result, sizeof(struct sadb_msg));
 		if (result == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
 	result->m_pkthdr.len = 0;
 	for (m = result; m; m = m->m_next)
 		result->m_pkthdr.len += m->m_len;
 
 	mtod(result, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(result->m_pkthdr.len);
 
 	return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);
 
  fail:
 	if (result)
 		m_freem(result);
 	return error;
 }
 
 /* %%% SAD management */
 /*
  * allocating a memory for new SA head, and copy from the values of mhp.
  * OUT:	NULL	: failure due to the lack of memory.
  *	others	: pointer to new SA head.
  */
 static struct secashead *
 key_newsah(struct secasindex *saidx)
 {
 	struct secashead *newsah;
 
 	IPSEC_ASSERT(saidx != NULL, ("null saidx"));
 
 	newsah = malloc(sizeof(struct secashead), M_IPSEC_SAH, M_NOWAIT|M_ZERO);
 	if (newsah != NULL) {
 		int i;
 		for (i = 0; i < sizeof(newsah->savtree)/sizeof(newsah->savtree[0]); i++)
 			LIST_INIT(&newsah->savtree[i]);
 		newsah->saidx = *saidx;
 
 		/* add to saidxtree */
 		newsah->state = SADB_SASTATE_MATURE;
 
 		SAHTREE_LOCK();
 		LIST_INSERT_HEAD(&V_sahtree, newsah, chain);
 		SAHTREE_UNLOCK();
 	}
 	return(newsah);
 }
 
 /*
  * delete SA index and all SA registerd.
  */
 static void
 key_delsah(struct secashead *sah)
 {
 	struct secasvar *sav, *nextsav;
 	u_int stateidx;
 	int zombie = 0;
 
 	IPSEC_ASSERT(sah != NULL, ("NULL sah"));
 	SAHTREE_LOCK_ASSERT();
 
 	/* searching all SA registerd in the secindex. */
 	for (stateidx = 0;
 	     stateidx < _ARRAYLEN(saorder_state_any);
 	     stateidx++) {
 		u_int state = saorder_state_any[stateidx];
 		LIST_FOREACH_SAFE(sav, &sah->savtree[state], chain, nextsav) {
 			if (sav->refcnt == 0) {
 				/* sanity check */
 				KEY_CHKSASTATE(state, sav->state, __func__);
 				/* 
 				 * do NOT call KEY_FREESAV here:
 				 * it will only delete the sav if refcnt == 1,
 				 * where we already know that refcnt == 0
 				 */
 				key_delsav(sav);
 			} else {
 				/* give up to delete this sa */
 				zombie++;
 			}
 		}
 	}
 	if (!zombie) {		/* delete only if there are savs */
 		/* remove from tree of SA index */
 		if (__LIST_CHAINED(sah))
 			LIST_REMOVE(sah, chain);
 		free(sah, M_IPSEC_SAH);
 	}
 }
 
 /*
  * allocating a new SA with LARVAL state.  key_add() and key_getspi() call,
  * and copy the values of mhp into new buffer.
  * When SAD message type is GETSPI:
  *	to set sequence number from acq_seq++,
  *	to set zero to SPI.
  *	not to call key_setsava().
  * OUT:	NULL	: fail
  *	others	: pointer to new secasvar.
  *
  * does not modify mbuf.  does not free mbuf on error.
  */
 static struct secasvar *
 key_newsav(struct mbuf *m, const struct sadb_msghdr *mhp,
     struct secashead *sah, int *errp, const char *where, int tag)
 {
 	struct secasvar *newsav;
 	const struct sadb_sa *xsa;
 
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 	IPSEC_ASSERT(sah != NULL, ("null secashead"));
 
 	newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT|M_ZERO);
 	if (newsav == NULL) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		*errp = ENOBUFS;
 		goto done;
 	}
 
 	switch (mhp->msg->sadb_msg_type) {
 	case SADB_GETSPI:
 		newsav->spi = 0;
 
 #ifdef IPSEC_DOSEQCHECK
 		/* sync sequence number */
 		if (mhp->msg->sadb_msg_seq == 0)
 			newsav->seq =
 				(V_acq_seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq));
 		else
 #endif
 			newsav->seq = mhp->msg->sadb_msg_seq;
 		break;
 
 	case SADB_ADD:
 		/* sanity check */
 		if (mhp->ext[SADB_EXT_SA] == NULL) {
 			free(newsav, M_IPSEC_SA);
 			newsav = NULL;
 			ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 				__func__));
 			*errp = EINVAL;
 			goto done;
 		}
 		xsa = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA];
 		newsav->spi = xsa->sadb_sa_spi;
 		newsav->seq = mhp->msg->sadb_msg_seq;
 		break;
 	default:
 		free(newsav, M_IPSEC_SA);
 		newsav = NULL;
 		*errp = EINVAL;
 		goto done;
 	}
 
 
 	/* copy sav values */
 	if (mhp->msg->sadb_msg_type != SADB_GETSPI) {
 		*errp = key_setsaval(newsav, m, mhp);
 		if (*errp) {
 			free(newsav, M_IPSEC_SA);
 			newsav = NULL;
 			goto done;
 		}
 	}
 
 	SECASVAR_LOCK_INIT(newsav);
 
 	/* reset created */
 	newsav->created = time_second;
 	newsav->pid = mhp->msg->sadb_msg_pid;
 
 	/* add to satree */
 	newsav->sah = sah;
 	sa_initref(newsav);
 	newsav->state = SADB_SASTATE_LARVAL;
 
 	SAHTREE_LOCK();
 	LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_LARVAL], newsav,
 			secasvar, chain);
 	SAHTREE_UNLOCK();
 done:
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u return SP:%p\n", __func__,
 			where, tag, newsav));
 
 	return newsav;
 }
 
 /*
  * free() SA variable entry.
  */
 static void
 key_cleansav(struct secasvar *sav)
 {
 	/*
 	 * Cleanup xform state.  Note that zeroize'ing causes the
 	 * keys to be cleared; otherwise we must do it ourself.
 	 */
 	if (sav->tdb_xform != NULL) {
 		sav->tdb_xform->xf_zeroize(sav);
 		sav->tdb_xform = NULL;
 	} else {
 		if (sav->key_auth != NULL)
 			bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth));
 		if (sav->key_enc != NULL)
 			bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc));
 	}
 	if (sav->key_auth != NULL) {
 		if (sav->key_auth->key_data != NULL)
 			free(sav->key_auth->key_data, M_IPSEC_MISC);
 		free(sav->key_auth, M_IPSEC_MISC);
 		sav->key_auth = NULL;
 	}
 	if (sav->key_enc != NULL) {
 		if (sav->key_enc->key_data != NULL)
 			free(sav->key_enc->key_data, M_IPSEC_MISC);
 		free(sav->key_enc, M_IPSEC_MISC);
 		sav->key_enc = NULL;
 	}
 	if (sav->sched) {
 		bzero(sav->sched, sav->schedlen);
 		free(sav->sched, M_IPSEC_MISC);
 		sav->sched = NULL;
 	}
 	if (sav->replay != NULL) {
 		free(sav->replay, M_IPSEC_MISC);
 		sav->replay = NULL;
 	}
 	if (sav->lft_c != NULL) {
 		free(sav->lft_c, M_IPSEC_MISC);
 		sav->lft_c = NULL;
 	}
 	if (sav->lft_h != NULL) {
 		free(sav->lft_h, M_IPSEC_MISC);
 		sav->lft_h = NULL;
 	}
 	if (sav->lft_s != NULL) {
 		free(sav->lft_s, M_IPSEC_MISC);
 		sav->lft_s = NULL;
 	}
 }
 
 /*
  * free() SA variable entry.
  */
 static void
 key_delsav(struct secasvar *sav)
 {
 	IPSEC_ASSERT(sav != NULL, ("null sav"));
 	IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt));
 
 	/* remove from SA header */
 	if (__LIST_CHAINED(sav))
 		LIST_REMOVE(sav, chain);
 	key_cleansav(sav);
 	SECASVAR_LOCK_DESTROY(sav);
 	free(sav, M_IPSEC_SA);
 }
 
 /*
  * search SAD.
  * OUT:
  *	NULL	: not found
  *	others	: found, pointer to a SA.
  */
 static struct secashead *
 key_getsah(struct secasindex *saidx)
 {
 	struct secashead *sah;
 
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (sah->state == SADB_SASTATE_DEAD)
 			continue;
 		if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID))
 			break;
 	}
 	SAHTREE_UNLOCK();
 
 	return sah;
 }
 
 /*
  * check not to be duplicated SPI.
  * NOTE: this function is too slow due to searching all SAD.
  * OUT:
  *	NULL	: not found
  *	others	: found, pointer to a SA.
  */
 static struct secasvar *
 key_checkspidup(struct secasindex *saidx, u_int32_t spi)
 {
 	struct secashead *sah;
 	struct secasvar *sav;
 
 	/* check address family */
 	if (saidx->src.sa.sa_family != saidx->dst.sa.sa_family) {
 		ipseclog((LOG_DEBUG, "%s: address family mismatched.\n",
 			__func__));
 		return NULL;
 	}
 
 	sav = NULL;
 	/* check all SAD */
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (!key_ismyaddr((struct sockaddr *)&sah->saidx.dst))
 			continue;
 		sav = key_getsavbyspi(sah, spi);
 		if (sav != NULL)
 			break;
 	}
 	SAHTREE_UNLOCK();
 
 	return sav;
 }
 
 /*
  * search SAD litmited alive SA, protocol, SPI.
  * OUT:
  *	NULL	: not found
  *	others	: found, pointer to a SA.
  */
 static struct secasvar *
 key_getsavbyspi(struct secashead *sah, u_int32_t spi)
 {
 	struct secasvar *sav;
 	u_int stateidx, state;
 
 	sav = NULL;
 	SAHTREE_LOCK_ASSERT();
 	/* search all status */
 	for (stateidx = 0;
 	     stateidx < _ARRAYLEN(saorder_state_alive);
 	     stateidx++) {
 
 		state = saorder_state_alive[stateidx];
 		LIST_FOREACH(sav, &sah->savtree[state], chain) {
 
 			/* sanity check */
 			if (sav->state != state) {
 				ipseclog((LOG_DEBUG, "%s: "
 				    "invalid sav->state (queue: %d SA: %d)\n",
 				    __func__, state, sav->state));
 				continue;
 			}
 
 			if (sav->spi == spi)
 				return sav;
 		}
 	}
 
 	return NULL;
 }
 
 /*
  * copy SA values from PF_KEY message except *SPI, SEQ, PID, STATE and TYPE*.
  * You must update these if need.
  * OUT:	0:	success.
  *	!0:	failure.
  *
  * does not modify mbuf.  does not free mbuf on error.
  */
 static int
 key_setsaval(struct secasvar *sav, struct mbuf *m,
     const struct sadb_msghdr *mhp)
 {
 	int error = 0;
 
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* initialization */
 	sav->replay = NULL;
 	sav->key_auth = NULL;
 	sav->key_enc = NULL;
 	sav->sched = NULL;
 	sav->schedlen = 0;
 	sav->lft_c = NULL;
 	sav->lft_h = NULL;
 	sav->lft_s = NULL;
 	sav->tdb_xform = NULL;		/* transform */
 	sav->tdb_encalgxform = NULL;	/* encoding algorithm */
 	sav->tdb_authalgxform = NULL;	/* authentication algorithm */
 	sav->tdb_compalgxform = NULL;	/* compression algorithm */
 	/*  Initialize even if NAT-T not compiled in: */
 	sav->natt_type = 0;
 	sav->natt_esp_frag_len = 0;
 
 	/* SA */
 	if (mhp->ext[SADB_EXT_SA] != NULL) {
 		const struct sadb_sa *sa0;
 
 		sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA];
 		if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) {
 			error = EINVAL;
 			goto fail;
 		}
 
 		sav->alg_auth = sa0->sadb_sa_auth;
 		sav->alg_enc = sa0->sadb_sa_encrypt;
 		sav->flags = sa0->sadb_sa_flags;
 
 		/* replay window */
 		if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) {
 			sav->replay = (struct secreplay *)
 				malloc(sizeof(struct secreplay)+sa0->sadb_sa_replay, M_IPSEC_MISC, M_NOWAIT|M_ZERO);
 			if (sav->replay == NULL) {
 				ipseclog((LOG_DEBUG, "%s: No more memory.\n",
 					__func__));
 				error = ENOBUFS;
 				goto fail;
 			}
 			if (sa0->sadb_sa_replay != 0)
 				sav->replay->bitmap = (caddr_t)(sav->replay+1);
 			sav->replay->wsize = sa0->sadb_sa_replay;
 		}
 	}
 
 	/* Authentication keys */
 	if (mhp->ext[SADB_EXT_KEY_AUTH] != NULL) {
 		const struct sadb_key *key0;
 		int len;
 
 		key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH];
 		len = mhp->extlen[SADB_EXT_KEY_AUTH];
 
 		error = 0;
 		if (len < sizeof(*key0)) {
 			error = EINVAL;
 			goto fail;
 		}
 		switch (mhp->msg->sadb_msg_satype) {
 		case SADB_SATYPE_AH:
 		case SADB_SATYPE_ESP:
 		case SADB_X_SATYPE_TCPSIGNATURE:
 			if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) &&
 			    sav->alg_auth != SADB_X_AALG_NULL)
 				error = EINVAL;
 			break;
 		case SADB_X_SATYPE_IPCOMP:
 		default:
 			error = EINVAL;
 			break;
 		}
 		if (error) {
 			ipseclog((LOG_DEBUG, "%s: invalid key_auth values.\n",
 				__func__));
 			goto fail;
 		}
 
 		sav->key_auth = (struct seckey *)key_dup_keymsg(key0, len,
 								M_IPSEC_MISC);
 		if (sav->key_auth == NULL ) {
 			ipseclog((LOG_DEBUG, "%s: No more memory.\n",
 				  __func__));
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
 	/* Encryption key */
 	if (mhp->ext[SADB_EXT_KEY_ENCRYPT] != NULL) {
 		const struct sadb_key *key0;
 		int len;
 
 		key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT];
 		len = mhp->extlen[SADB_EXT_KEY_ENCRYPT];
 
 		error = 0;
 		if (len < sizeof(*key0)) {
 			error = EINVAL;
 			goto fail;
 		}
 		switch (mhp->msg->sadb_msg_satype) {
 		case SADB_SATYPE_ESP:
 			if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) &&
 			    sav->alg_enc != SADB_EALG_NULL) {
 				error = EINVAL;
 				break;
 			}
 			sav->key_enc = (struct seckey *)key_dup_keymsg(key0,
 								       len,
 								       M_IPSEC_MISC);
 			if (sav->key_enc == NULL) {
 				ipseclog((LOG_DEBUG, "%s: No more memory.\n",
 					__func__));
 				error = ENOBUFS;
 				goto fail;
 			}
 			break;
 		case SADB_X_SATYPE_IPCOMP:
 			if (len != PFKEY_ALIGN8(sizeof(struct sadb_key)))
 				error = EINVAL;
 			sav->key_enc = NULL;	/*just in case*/
 			break;
 		case SADB_SATYPE_AH:
 		case SADB_X_SATYPE_TCPSIGNATURE:
 		default:
 			error = EINVAL;
 			break;
 		}
 		if (error) {
 			ipseclog((LOG_DEBUG, "%s: invalid key_enc value.\n",
 				__func__));
 			goto fail;
 		}
 	}
 
 	/* set iv */
 	sav->ivlen = 0;
 
 	switch (mhp->msg->sadb_msg_satype) {
 	case SADB_SATYPE_AH:
 		error = xform_init(sav, XF_AH);
 		break;
 	case SADB_SATYPE_ESP:
 		error = xform_init(sav, XF_ESP);
 		break;
 	case SADB_X_SATYPE_IPCOMP:
 		error = xform_init(sav, XF_IPCOMP);
 		break;
 	case SADB_X_SATYPE_TCPSIGNATURE:
 		error = xform_init(sav, XF_TCPSIGNATURE);
 		break;
 	}
 	if (error) {
 		ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n",
 		        __func__, mhp->msg->sadb_msg_satype));
 		goto fail;
 	}
 
 	/* reset created */
 	sav->created = time_second;
 
 	/* make lifetime for CURRENT */
 	sav->lft_c = malloc(sizeof(struct seclifetime), M_IPSEC_MISC, M_NOWAIT);
 	if (sav->lft_c == NULL) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		error = ENOBUFS;
 		goto fail;
 	}
 
 	sav->lft_c->allocations = 0;
 	sav->lft_c->bytes = 0;
 	sav->lft_c->addtime = time_second;
 	sav->lft_c->usetime = 0;
 
 	/* lifetimes for HARD and SOFT */
     {
 	const struct sadb_lifetime *lft0;
 
 	lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD];
 	if (lft0 != NULL) {
 		if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) {
 			error = EINVAL;
 			goto fail;
 		}
 		sav->lft_h = key_dup_lifemsg(lft0, M_IPSEC_MISC);
 		if (sav->lft_h == NULL) {
 			ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__));
 			error = ENOBUFS;
 			goto fail;
 		}
 		/* to be initialize ? */
 	}
 
 	lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT];
 	if (lft0 != NULL) {
 		if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) {
 			error = EINVAL;
 			goto fail;
 		}
 		sav->lft_s = key_dup_lifemsg(lft0, M_IPSEC_MISC);
 		if (sav->lft_s == NULL) {
 			ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__));
 			error = ENOBUFS;
 			goto fail;
 		}
 		/* to be initialize ? */
 	}
     }
 
 	return 0;
 
  fail:
 	/* initialization */
 	key_cleansav(sav);
 
 	return error;
 }
 
 /*
  * validation with a secasvar entry, and set SADB_SATYPE_MATURE.
  * OUT:	0:	valid
  *	other:	errno
  */
 static int
 key_mature(struct secasvar *sav)
 {
 	int error;
 
 	/* check SPI value */
 	switch (sav->sah->saidx.proto) {
 	case IPPROTO_ESP:
 	case IPPROTO_AH:
 		/*
 		 * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values
 		 * 1-255 reserved by IANA for future use,
 		 * 0 for implementation specific, local use.
 		 */
 		if (ntohl(sav->spi) <= 255) {
 			ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n",
 			    __func__, (u_int32_t)ntohl(sav->spi)));
 			return EINVAL;
 		}
 		break;
 	}
 
 	/* check satype */
 	switch (sav->sah->saidx.proto) {
 	case IPPROTO_ESP:
 		/* check flags */
 		if ((sav->flags & (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) ==
 		    (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) {
 			ipseclog((LOG_DEBUG, "%s: invalid flag (derived) "
 				"given to old-esp.\n", __func__));
 			return EINVAL;
 		}
 		error = xform_init(sav, XF_ESP);
 		break;
 	case IPPROTO_AH:
 		/* check flags */
 		if (sav->flags & SADB_X_EXT_DERIV) {
 			ipseclog((LOG_DEBUG, "%s: invalid flag (derived) "
 				"given to AH SA.\n", __func__));
 			return EINVAL;
 		}
 		if (sav->alg_enc != SADB_EALG_NONE) {
 			ipseclog((LOG_DEBUG, "%s: protocol and algorithm "
 				"mismated.\n", __func__));
 			return(EINVAL);
 		}
 		error = xform_init(sav, XF_AH);
 		break;
 	case IPPROTO_IPCOMP:
 		if (sav->alg_auth != SADB_AALG_NONE) {
 			ipseclog((LOG_DEBUG, "%s: protocol and algorithm "
 				"mismated.\n", __func__));
 			return(EINVAL);
 		}
 		if ((sav->flags & SADB_X_EXT_RAWCPI) == 0
 		 && ntohl(sav->spi) >= 0x10000) {
 			ipseclog((LOG_DEBUG, "%s: invalid cpi for IPComp.\n",
 				__func__));
 			return(EINVAL);
 		}
 		error = xform_init(sav, XF_IPCOMP);
 		break;
 	case IPPROTO_TCP:
 		if (sav->alg_enc != SADB_EALG_NONE) {
 			ipseclog((LOG_DEBUG, "%s: protocol and algorithm "
 				"mismated.\n", __func__));
 			return(EINVAL);
 		}
 		error = xform_init(sav, XF_TCPSIGNATURE);
 		break;
 	default:
 		ipseclog((LOG_DEBUG, "%s: Invalid satype.\n", __func__));
 		error = EPROTONOSUPPORT;
 		break;
 	}
 	if (error == 0) {
 		SAHTREE_LOCK();
 		key_sa_chgstate(sav, SADB_SASTATE_MATURE);
 		SAHTREE_UNLOCK();
 	}
 	return (error);
 }
 
 /*
  * subroutine for SADB_GET and SADB_DUMP.
  */
 static struct mbuf *
 key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype,
     u_int32_t seq, u_int32_t pid)
 {
 	struct mbuf *result = NULL, *tres = NULL, *m;
 	int i;
 	int dumporder[] = {
 		SADB_EXT_SA, SADB_X_EXT_SA2,
 		SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT,
 		SADB_EXT_LIFETIME_CURRENT, SADB_EXT_ADDRESS_SRC,
 		SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH,
 		SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC,
 		SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY,
 #ifdef IPSEC_NAT_T
 		SADB_X_EXT_NAT_T_TYPE,
 		SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT,
 		SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR,
 		SADB_X_EXT_NAT_T_FRAG,
 #endif
 	};
 
 	m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt);
 	if (m == NULL)
 		goto fail;
 	result = m;
 
 	for (i = sizeof(dumporder)/sizeof(dumporder[0]) - 1; i >= 0; i--) {
 		m = NULL;
 		switch (dumporder[i]) {
 		case SADB_EXT_SA:
 			m = key_setsadbsa(sav);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_X_EXT_SA2:
 			m = key_setsadbxsa2(sav->sah->saidx.mode,
 					sav->replay ? sav->replay->count : 0,
 					sav->sah->saidx.reqid);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_EXT_ADDRESS_SRC:
 			m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 			    &sav->sah->saidx.src.sa,
 			    FULLMASK, IPSEC_ULPROTO_ANY);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_EXT_ADDRESS_DST:
 			m = key_setsadbaddr(SADB_EXT_ADDRESS_DST,
 			    &sav->sah->saidx.dst.sa,
 			    FULLMASK, IPSEC_ULPROTO_ANY);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_EXT_KEY_AUTH:
 			if (!sav->key_auth)
 				continue;
 			m = key_setkey(sav->key_auth, SADB_EXT_KEY_AUTH);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_EXT_KEY_ENCRYPT:
 			if (!sav->key_enc)
 				continue;
 			m = key_setkey(sav->key_enc, SADB_EXT_KEY_ENCRYPT);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_EXT_LIFETIME_CURRENT:
 			if (!sav->lft_c)
 				continue;
 			m = key_setlifetime(sav->lft_c, 
 					    SADB_EXT_LIFETIME_CURRENT);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_EXT_LIFETIME_HARD:
 			if (!sav->lft_h)
 				continue;
 			m = key_setlifetime(sav->lft_h, 
 					    SADB_EXT_LIFETIME_HARD);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_EXT_LIFETIME_SOFT:
 			if (!sav->lft_s)
 				continue;
 			m = key_setlifetime(sav->lft_s, 
 					    SADB_EXT_LIFETIME_SOFT);
 
 			if (!m)
 				goto fail;
 			break;
 
 #ifdef IPSEC_NAT_T
 		case SADB_X_EXT_NAT_T_TYPE:
 			m = key_setsadbxtype(sav->natt_type);
 			if (!m)
 				goto fail;
 			break;
 		
 		case SADB_X_EXT_NAT_T_DPORT:
 			m = key_setsadbxport(
 			    KEY_PORTFROMSADDR(&sav->sah->saidx.dst),
 			    SADB_X_EXT_NAT_T_DPORT);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_X_EXT_NAT_T_SPORT:
 			m = key_setsadbxport(
 			    KEY_PORTFROMSADDR(&sav->sah->saidx.src),
 			    SADB_X_EXT_NAT_T_SPORT);
 			if (!m)
 				goto fail;
 			break;
 
 		case SADB_X_EXT_NAT_T_OAI:
 		case SADB_X_EXT_NAT_T_OAR:
 		case SADB_X_EXT_NAT_T_FRAG:
 			/* We do not (yet) support those. */
 			continue;
 #endif
 
 		case SADB_EXT_ADDRESS_PROXY:
 		case SADB_EXT_IDENTITY_SRC:
 		case SADB_EXT_IDENTITY_DST:
 			/* XXX: should we brought from SPD ? */
 		case SADB_EXT_SENSITIVITY:
 		default:
 			continue;
 		}
 
 		if (!m)
 			goto fail;
 		if (tres)
 			m_cat(m, tres);
 		tres = m;
 		  
 	}
 
 	m_cat(result, tres);
 	if (result->m_len < sizeof(struct sadb_msg)) {
 		result = m_pullup(result, sizeof(struct sadb_msg));
 		if (result == NULL)
 			goto fail;
 	}
 
 	result->m_pkthdr.len = 0;
 	for (m = result; m; m = m->m_next)
 		result->m_pkthdr.len += m->m_len;
 
 	mtod(result, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(result->m_pkthdr.len);
 
 	return result;
 
 fail:
 	m_freem(result);
 	m_freem(tres);
 	return NULL;
 }
 
 /*
  * set data into sadb_msg.
  */
 static struct mbuf *
 key_setsadbmsg(u_int8_t type, u_int16_t tlen, u_int8_t satype, u_int32_t seq,
     pid_t pid, u_int16_t reserved)
 {
 	struct mbuf *m;
 	struct sadb_msg *p;
 	int len;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_msg));
 	if (len > MCLBYTES)
 		return NULL;
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m && len > MHLEN) {
 		if (!(MCLGET(m, M_NOWAIT))) {
 			m_freem(m);
 			m = NULL;
 		}
 	}
 	if (!m)
 		return NULL;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_next = NULL;
 
 	p = mtod(m, struct sadb_msg *);
 
 	bzero(p, len);
 	p->sadb_msg_version = PF_KEY_V2;
 	p->sadb_msg_type = type;
 	p->sadb_msg_errno = 0;
 	p->sadb_msg_satype = satype;
 	p->sadb_msg_len = PFKEY_UNIT64(tlen);
 	p->sadb_msg_reserved = reserved;
 	p->sadb_msg_seq = seq;
 	p->sadb_msg_pid = (u_int32_t)pid;
 
 	return m;
 }
 
 /*
  * copy secasvar data into sadb_address.
  */
 static struct mbuf *
 key_setsadbsa(struct secasvar *sav)
 {
 	struct mbuf *m;
 	struct sadb_sa *p;
 	int len;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_sa));
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return (NULL);
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_sa *);
 	bzero(p, len);
 	p->sadb_sa_len = PFKEY_UNIT64(len);
 	p->sadb_sa_exttype = SADB_EXT_SA;
 	p->sadb_sa_spi = sav->spi;
 	p->sadb_sa_replay = (sav->replay != NULL ? sav->replay->wsize : 0);
 	p->sadb_sa_state = sav->state;
 	p->sadb_sa_auth = sav->alg_auth;
 	p->sadb_sa_encrypt = sav->alg_enc;
 	p->sadb_sa_flags = sav->flags;
 
 	return m;
 }
 
 /*
  * set data into sadb_address.
  */
 static struct mbuf *
 key_setsadbaddr(u_int16_t exttype, const struct sockaddr *saddr,
     u_int8_t prefixlen, u_int16_t ul_proto)
 {
 	struct mbuf *m;
 	struct sadb_address *p;
 	size_t len;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_address)) +
 	    PFKEY_ALIGN8(saddr->sa_len);
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return (NULL);
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_address *);
 
 	bzero(p, len);
 	p->sadb_address_len = PFKEY_UNIT64(len);
 	p->sadb_address_exttype = exttype;
 	p->sadb_address_proto = ul_proto;
 	if (prefixlen == FULLMASK) {
 		switch (saddr->sa_family) {
 		case AF_INET:
 			prefixlen = sizeof(struct in_addr) << 3;
 			break;
 		case AF_INET6:
 			prefixlen = sizeof(struct in6_addr) << 3;
 			break;
 		default:
 			; /*XXX*/
 		}
 	}
 	p->sadb_address_prefixlen = prefixlen;
 	p->sadb_address_reserved = 0;
 
 	bcopy(saddr,
 	    mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(struct sadb_address)),
 	    saddr->sa_len);
 
 	return m;
 }
 
 /*
  * set data into sadb_x_sa2.
  */
 static struct mbuf *
 key_setsadbxsa2(u_int8_t mode, u_int32_t seq, u_int32_t reqid)
 {
 	struct mbuf *m;
 	struct sadb_x_sa2 *p;
 	size_t len;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa2));
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return (NULL);
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_x_sa2 *);
 
 	bzero(p, len);
 	p->sadb_x_sa2_len = PFKEY_UNIT64(len);
 	p->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
 	p->sadb_x_sa2_mode = mode;
 	p->sadb_x_sa2_reserved1 = 0;
 	p->sadb_x_sa2_reserved2 = 0;
 	p->sadb_x_sa2_sequence = seq;
 	p->sadb_x_sa2_reqid = reqid;
 
 	return m;
 }
 
 #ifdef IPSEC_NAT_T
 /*
  * Set a type in sadb_x_nat_t_type.
  */
 static struct mbuf *
 key_setsadbxtype(u_int16_t type)
 {
 	struct mbuf *m;
 	size_t len;
 	struct sadb_x_nat_t_type *p;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_type));
 
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return (NULL);
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_x_nat_t_type *);
 
 	bzero(p, len);
 	p->sadb_x_nat_t_type_len = PFKEY_UNIT64(len);
 	p->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
 	p->sadb_x_nat_t_type_type = type;
 
 	return (m);
 }
 /*
  * Set a port in sadb_x_nat_t_port.
  * In contrast to default RFC 2367 behaviour, port is in network byte order.
  */
 static struct mbuf *
 key_setsadbxport(u_int16_t port, u_int16_t type)
 {
 	struct mbuf *m;
 	size_t len;
 	struct sadb_x_nat_t_port *p;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_port));
 
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return (NULL);
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_x_nat_t_port *);
 
 	bzero(p, len);
 	p->sadb_x_nat_t_port_len = PFKEY_UNIT64(len);
 	p->sadb_x_nat_t_port_exttype = type;
 	p->sadb_x_nat_t_port_port = port;
 
 	return (m);
 }
 
 /* 
  * Get port from sockaddr. Port is in network byte order.
  */
 u_int16_t
 key_portfromsaddr(struct sockaddr *sa)
 {
 
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		return ((struct sockaddr_in *)sa)->sin_port;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		return ((struct sockaddr_in6 *)sa)->sin6_port;
 #endif
 	}
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s unexpected address family %d\n",
 			__func__, sa->sa_family));
 	return (0);
 }
 #endif /* IPSEC_NAT_T */
 
 /*
  * Set port in struct sockaddr. Port is in network byte order.
  */
 static void
 key_porttosaddr(struct sockaddr *sa, u_int16_t port)
 {
 
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		((struct sockaddr_in *)sa)->sin_port = port;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		((struct sockaddr_in6 *)sa)->sin6_port = port;
 		break;
 #endif
 	default:
 		ipseclog((LOG_DEBUG, "%s: unexpected address family %d.\n",
 			__func__, sa->sa_family));
 		break;
 	}
 }
 
 /*
  * set data into sadb_x_policy
  */
 static struct mbuf *
-key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id)
+key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id, u_int32_t priority)
 {
 	struct mbuf *m;
 	struct sadb_x_policy *p;
 	size_t len;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_x_policy));
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return (NULL);
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_x_policy *);
 
 	bzero(p, len);
 	p->sadb_x_policy_len = PFKEY_UNIT64(len);
 	p->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	p->sadb_x_policy_type = type;
 	p->sadb_x_policy_dir = dir;
 	p->sadb_x_policy_id = id;
+	p->sadb_x_policy_priority = priority;
 
 	return m;
 }
 
 /* %%% utilities */
 /* Take a key message (sadb_key) from the socket and turn it into one
  * of the kernel's key structures (seckey).
  *
  * IN: pointer to the src
  * OUT: NULL no more memory
  */
 struct seckey *
 key_dup_keymsg(const struct sadb_key *src, u_int len,
     struct malloc_type *type)
 {
 	struct seckey *dst;
 	dst = (struct seckey *)malloc(sizeof(struct seckey), type, M_NOWAIT);
 	if (dst != NULL) {
 		dst->bits = src->sadb_key_bits;
 		dst->key_data = (char *)malloc(len, type, M_NOWAIT);
 		if (dst->key_data != NULL) {
 			bcopy((const char *)src + sizeof(struct sadb_key), 
 			      dst->key_data, len);
 		} else {
 			ipseclog((LOG_DEBUG, "%s: No more memory.\n", 
 				  __func__));
 			free(dst, type);
 			dst = NULL;
 		}
 	} else {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", 
 			  __func__));
 
 	}
 	return dst;
 }
 
 /* Take a lifetime message (sadb_lifetime) passed in on a socket and
  * turn it into one of the kernel's lifetime structures (seclifetime).
  *
  * IN: pointer to the destination, source and malloc type
  * OUT: NULL, no more memory
  */
 
 static struct seclifetime *
 key_dup_lifemsg(const struct sadb_lifetime *src, struct malloc_type *type)
 {
 	struct seclifetime *dst = NULL;
 
 	dst = (struct seclifetime *)malloc(sizeof(struct seclifetime), 
 					   type, M_NOWAIT);
 	if (dst == NULL) {
 		/* XXX counter */
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 	} else {
 		dst->allocations = src->sadb_lifetime_allocations;
 		dst->bytes = src->sadb_lifetime_bytes;
 		dst->addtime = src->sadb_lifetime_addtime;
 		dst->usetime = src->sadb_lifetime_usetime;
 	}
 	return dst;
 }
 
 /* compare my own address
  * OUT:	1: true, i.e. my address.
  *	0: false
  */
 int
 key_ismyaddr(struct sockaddr *sa)
 {
 
 	IPSEC_ASSERT(sa != NULL, ("null sockaddr"));
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		return (in_localip(satosin(sa)->sin_addr));
 #endif
 #ifdef INET6
 	case AF_INET6:
 		return key_ismyaddr6((struct sockaddr_in6 *)sa);
 #endif
 	}
 
 	return 0;
 }
 
 #ifdef INET6
 /*
  * compare my own address for IPv6.
  * 1: ours
  * 0: other
  */
 static int
 key_ismyaddr6(struct sockaddr_in6 *sin6)
 {
 	struct in6_addr in6;
 
 	if (!IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		return (in6_localip(&sin6->sin6_addr));
 
 	/* Convert address into kernel-internal form */
 	in6 = sin6->sin6_addr;
 	in6.s6_addr16[1] = htons(sin6->sin6_scope_id & 0xffff);
 	return (in6_localip(&in6));
 }
 #endif /*INET6*/
 
 /*
  * compare two secasindex structure.
  * flag can specify to compare 2 saidxes.
  * compare two secasindex structure without both mode and reqid.
  * don't compare port.
  * IN:  
  *      saidx0: source, it can be in SAD.
  *      saidx1: object.
  * OUT: 
  *      1 : equal
  *      0 : not equal
  */
 static int
 key_cmpsaidx(const struct secasindex *saidx0, const struct secasindex *saidx1,
     int flag)
 {
 	int chkport = 0;
 
 	/* sanity */
 	if (saidx0 == NULL && saidx1 == NULL)
 		return 1;
 
 	if (saidx0 == NULL || saidx1 == NULL)
 		return 0;
 
 	if (saidx0->proto != saidx1->proto)
 		return 0;
 
 	if (flag == CMP_EXACTLY) {
 		if (saidx0->mode != saidx1->mode)
 			return 0;
 		if (saidx0->reqid != saidx1->reqid)
 			return 0;
 		if (bcmp(&saidx0->src, &saidx1->src, saidx0->src.sa.sa_len) != 0 ||
 		    bcmp(&saidx0->dst, &saidx1->dst, saidx0->dst.sa.sa_len) != 0)
 			return 0;
 	} else {
 
 		/* CMP_MODE_REQID, CMP_REQID, CMP_HEAD */
 		if (flag == CMP_MODE_REQID
 		  ||flag == CMP_REQID) {
 			/*
 			 * If reqid of SPD is non-zero, unique SA is required.
 			 * The result must be of same reqid in this case.
 			 */
 			if (saidx1->reqid != 0 && saidx0->reqid != saidx1->reqid)
 				return 0;
 		}
 
 		if (flag == CMP_MODE_REQID) {
 			if (saidx0->mode != IPSEC_MODE_ANY
 			 && saidx0->mode != saidx1->mode)
 				return 0;
 		}
 
 #ifdef IPSEC_NAT_T
 		/*
 		 * If NAT-T is enabled, check ports for tunnel mode.
 		 * Do not check ports if they are set to zero in the SPD.
 		 * Also do not do it for native transport mode, as there
 		 * is no port information available in the SP.
 		 */
 		if ((saidx1->mode == IPSEC_MODE_TUNNEL ||
 		     (saidx1->mode == IPSEC_MODE_TRANSPORT &&
 		      saidx1->proto == IPPROTO_ESP)) &&
 		    saidx1->src.sa.sa_family == AF_INET &&
 		    saidx1->dst.sa.sa_family == AF_INET &&
 		    ((const struct sockaddr_in *)(&saidx1->src))->sin_port &&
 		    ((const struct sockaddr_in *)(&saidx1->dst))->sin_port)
 			chkport = 1;
 #endif /* IPSEC_NAT_T */
 
 		if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, chkport) != 0) {
 			return 0;
 		}
 		if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, chkport) != 0) {
 			return 0;
 		}
 	}
 
 	return 1;
 }
 
 /*
  * compare two secindex structure exactly.
  * IN:
  *	spidx0: source, it is often in SPD.
  *	spidx1: object, it is often from PFKEY message.
  * OUT:
  *	1 : equal
  *	0 : not equal
  */
 static int
 key_cmpspidx_exactly(struct secpolicyindex *spidx0,
     struct secpolicyindex *spidx1)
 {
 	/* sanity */
 	if (spidx0 == NULL && spidx1 == NULL)
 		return 1;
 
 	if (spidx0 == NULL || spidx1 == NULL)
 		return 0;
 
 	if (spidx0->prefs != spidx1->prefs
 	 || spidx0->prefd != spidx1->prefd
 	 || spidx0->ul_proto != spidx1->ul_proto)
 		return 0;
 
 	return key_sockaddrcmp(&spidx0->src.sa, &spidx1->src.sa, 1) == 0 &&
 	       key_sockaddrcmp(&spidx0->dst.sa, &spidx1->dst.sa, 1) == 0;
 }
 
 /*
  * compare two secindex structure with mask.
  * IN:
  *	spidx0: source, it is often in SPD.
  *	spidx1: object, it is often from IP header.
  * OUT:
  *	1 : equal
  *	0 : not equal
  */
 static int
 key_cmpspidx_withmask(struct secpolicyindex *spidx0,
     struct secpolicyindex *spidx1)
 {
 	/* sanity */
 	if (spidx0 == NULL && spidx1 == NULL)
 		return 1;
 
 	if (spidx0 == NULL || spidx1 == NULL)
 		return 0;
 
 	if (spidx0->src.sa.sa_family != spidx1->src.sa.sa_family ||
 	    spidx0->dst.sa.sa_family != spidx1->dst.sa.sa_family ||
 	    spidx0->src.sa.sa_len != spidx1->src.sa.sa_len ||
 	    spidx0->dst.sa.sa_len != spidx1->dst.sa.sa_len)
 		return 0;
 
 	/* if spidx.ul_proto == IPSEC_ULPROTO_ANY, ignore. */
 	if (spidx0->ul_proto != (u_int16_t)IPSEC_ULPROTO_ANY
 	 && spidx0->ul_proto != spidx1->ul_proto)
 		return 0;
 
 	switch (spidx0->src.sa.sa_family) {
 	case AF_INET:
 		if (spidx0->src.sin.sin_port != IPSEC_PORT_ANY
 		 && spidx0->src.sin.sin_port != spidx1->src.sin.sin_port)
 			return 0;
 		if (!key_bbcmp(&spidx0->src.sin.sin_addr,
 		    &spidx1->src.sin.sin_addr, spidx0->prefs))
 			return 0;
 		break;
 	case AF_INET6:
 		if (spidx0->src.sin6.sin6_port != IPSEC_PORT_ANY
 		 && spidx0->src.sin6.sin6_port != spidx1->src.sin6.sin6_port)
 			return 0;
 		/*
 		 * scope_id check. if sin6_scope_id is 0, we regard it
 		 * as a wildcard scope, which matches any scope zone ID. 
 		 */
 		if (spidx0->src.sin6.sin6_scope_id &&
 		    spidx1->src.sin6.sin6_scope_id &&
 		    spidx0->src.sin6.sin6_scope_id != spidx1->src.sin6.sin6_scope_id)
 			return 0;
 		if (!key_bbcmp(&spidx0->src.sin6.sin6_addr,
 		    &spidx1->src.sin6.sin6_addr, spidx0->prefs))
 			return 0;
 		break;
 	default:
 		/* XXX */
 		if (bcmp(&spidx0->src, &spidx1->src, spidx0->src.sa.sa_len) != 0)
 			return 0;
 		break;
 	}
 
 	switch (spidx0->dst.sa.sa_family) {
 	case AF_INET:
 		if (spidx0->dst.sin.sin_port != IPSEC_PORT_ANY
 		 && spidx0->dst.sin.sin_port != spidx1->dst.sin.sin_port)
 			return 0;
 		if (!key_bbcmp(&spidx0->dst.sin.sin_addr,
 		    &spidx1->dst.sin.sin_addr, spidx0->prefd))
 			return 0;
 		break;
 	case AF_INET6:
 		if (spidx0->dst.sin6.sin6_port != IPSEC_PORT_ANY
 		 && spidx0->dst.sin6.sin6_port != spidx1->dst.sin6.sin6_port)
 			return 0;
 		/*
 		 * scope_id check. if sin6_scope_id is 0, we regard it
 		 * as a wildcard scope, which matches any scope zone ID. 
 		 */
 		if (spidx0->dst.sin6.sin6_scope_id &&
 		    spidx1->dst.sin6.sin6_scope_id &&
 		    spidx0->dst.sin6.sin6_scope_id != spidx1->dst.sin6.sin6_scope_id)
 			return 0;
 		if (!key_bbcmp(&spidx0->dst.sin6.sin6_addr,
 		    &spidx1->dst.sin6.sin6_addr, spidx0->prefd))
 			return 0;
 		break;
 	default:
 		/* XXX */
 		if (bcmp(&spidx0->dst, &spidx1->dst, spidx0->dst.sa.sa_len) != 0)
 			return 0;
 		break;
 	}
 
 	/* XXX Do we check other field ?  e.g. flowinfo */
 
 	return 1;
 }
 
 /* returns 0 on match */
 static int
 key_sockaddrcmp(const struct sockaddr *sa1, const struct sockaddr *sa2,
     int port)
 {
 #ifdef satosin
 #undef satosin
 #endif
 #define satosin(s) ((const struct sockaddr_in *)s)
 #ifdef satosin6
 #undef satosin6
 #endif
 #define satosin6(s) ((const struct sockaddr_in6 *)s)
 	if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len)
 		return 1;
 
 	switch (sa1->sa_family) {
 	case AF_INET:
 		if (sa1->sa_len != sizeof(struct sockaddr_in))
 			return 1;
 		if (satosin(sa1)->sin_addr.s_addr !=
 		    satosin(sa2)->sin_addr.s_addr) {
 			return 1;
 		}
 		if (port && satosin(sa1)->sin_port != satosin(sa2)->sin_port)
 			return 1;
 		break;
 	case AF_INET6:
 		if (sa1->sa_len != sizeof(struct sockaddr_in6))
 			return 1;	/*EINVAL*/
 		if (satosin6(sa1)->sin6_scope_id !=
 		    satosin6(sa2)->sin6_scope_id) {
 			return 1;
 		}
 		if (!IN6_ARE_ADDR_EQUAL(&satosin6(sa1)->sin6_addr,
 		    &satosin6(sa2)->sin6_addr)) {
 			return 1;
 		}
 		if (port &&
 		    satosin6(sa1)->sin6_port != satosin6(sa2)->sin6_port) {
 			return 1;
 		}
 		break;
 	default:
 		if (bcmp(sa1, sa2, sa1->sa_len) != 0)
 			return 1;
 		break;
 	}
 
 	return 0;
 #undef satosin
 #undef satosin6
 }
 
 /*
  * compare two buffers with mask.
  * IN:
  *	addr1: source
  *	addr2: object
  *	bits:  Number of bits to compare
  * OUT:
  *	1 : equal
  *	0 : not equal
  */
 static int
 key_bbcmp(const void *a1, const void *a2, u_int bits)
 {
 	const unsigned char *p1 = a1;
 	const unsigned char *p2 = a2;
 
 	/* XXX: This could be considerably faster if we compare a word
 	 * at a time, but it is complicated on LSB Endian machines */
 
 	/* Handle null pointers */
 	if (p1 == NULL || p2 == NULL)
 		return (p1 == p2);
 
 	while (bits >= 8) {
 		if (*p1++ != *p2++)
 			return 0;
 		bits -= 8;
 	}
 
 	if (bits > 0) {
 		u_int8_t mask = ~((1<<(8-bits))-1);
 		if ((*p1 & mask) != (*p2 & mask))
 			return 0;
 	}
 	return 1;	/* Match! */
 }
 
 static void
 key_flush_spd(time_t now)
 {
 	SPTREE_RLOCK_TRACKER;
 	struct secpolicy *sp;
 	u_int dir;
 
 	/* SPD */
 	for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
 restart:
 		SPTREE_RLOCK();
 		TAILQ_FOREACH(sp, &V_sptree[dir], chain) {
 			if (sp->lifetime == 0 && sp->validtime == 0)
 				continue;
 			if ((sp->lifetime &&
 			    now - sp->created > sp->lifetime) ||
 			    (sp->validtime &&
 			    now - sp->lastused > sp->validtime)) {
 				SP_ADDREF(sp);
 				SPTREE_RUNLOCK();
 				key_spdexpire(sp);
 				key_unlink(sp);
 				KEY_FREESP(&sp);
 				goto restart;
 			}
 		}
 		SPTREE_RUNLOCK();
 	}
 }
 
 static void
 key_flush_sad(time_t now)
 {
 	struct secashead *sah, *nextsah;
 	struct secasvar *sav, *nextsav;
 
 	/* SAD */
 	SAHTREE_LOCK();
 	LIST_FOREACH_SAFE(sah, &V_sahtree, chain, nextsah) {
 		/* if sah has been dead, then delete it and process next sah. */
 		if (sah->state == SADB_SASTATE_DEAD) {
 			key_delsah(sah);
 			continue;
 		}
 
 		/* if LARVAL entry doesn't become MATURE, delete it. */
 		LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_LARVAL], chain, nextsav) {
 			/* Need to also check refcnt for a larval SA ??? */
 			if (now - sav->created > V_key_larval_lifetime)
 				KEY_FREESAV(&sav);
 		}
 
 		/*
 		 * check MATURE entry to start to send expire message
 		 * whether or not.
 		 */
 		LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_MATURE], chain, nextsav) {
 			/* we don't need to check. */
 			if (sav->lft_s == NULL)
 				continue;
 
 			/* sanity check */
 			if (sav->lft_c == NULL) {
 				ipseclog((LOG_DEBUG,"%s: there is no CURRENT "
 					"time, why?\n", __func__));
 				continue;
 			}
 			/*
 			 * RFC 2367:
 			 * HARD lifetimes MUST take precedence over SOFT
 			 * lifetimes, meaning if the HARD and SOFT lifetimes
 			 * are the same, the HARD lifetime will appear on the
 			 * EXPIRE message.
 			 */
 			/* check HARD lifetime */
 			if ((sav->lft_h->addtime != 0 &&
 			    now - sav->created > sav->lft_h->addtime) ||
 			    (sav->lft_h->bytes != 0 &&
 			    sav->lft_h->bytes < sav->lft_c->bytes)) {
 				key_sa_chgstate(sav, SADB_SASTATE_DEAD);
 				key_expire(sav, 1);
 				KEY_FREESAV(&sav);
 			}
 			/* check SOFT lifetime */
 			else if ((sav->lft_s->addtime != 0 &&
 			    now - sav->created > sav->lft_s->addtime) ||
 			    (sav->lft_s->bytes != 0 &&
 			    sav->lft_s->bytes < sav->lft_c->bytes)) {
 				key_sa_chgstate(sav, SADB_SASTATE_DYING);
 				key_expire(sav, 0);
 			}
 		}
 
 		/* check DYING entry to change status to DEAD. */
 		LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DYING], chain, nextsav) {
 			/* we don't need to check. */
 			if (sav->lft_h == NULL)
 				continue;
 
 			/* sanity check */
 			if (sav->lft_c == NULL) {
 				ipseclog((LOG_DEBUG, "%s: there is no CURRENT "
 					"time, why?\n", __func__));
 				continue;
 			}
 
 			if (sav->lft_h->addtime != 0 &&
 			    now - sav->created > sav->lft_h->addtime) {
 				key_sa_chgstate(sav, SADB_SASTATE_DEAD);
 				key_expire(sav, 1);
 				KEY_FREESAV(&sav);
 			}
 #if 0	/* XXX Should we keep to send expire message until HARD lifetime ? */
 			else if (sav->lft_s != NULL
 			      && sav->lft_s->addtime != 0
 			      && now - sav->created > sav->lft_s->addtime) {
 				/*
 				 * XXX: should be checked to be
 				 * installed the valid SA.
 				 */
 
 				/*
 				 * If there is no SA then sending
 				 * expire message.
 				 */
 				key_expire(sav, 0);
 			}
 #endif
 			/* check HARD lifetime by bytes */
 			else if (sav->lft_h->bytes != 0 &&
 			    sav->lft_h->bytes < sav->lft_c->bytes) {
 				key_sa_chgstate(sav, SADB_SASTATE_DEAD);
 				key_expire(sav, 1);
 				KEY_FREESAV(&sav);
 			}
 		}
 
 		/* delete entry in DEAD */
 		LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DEAD], chain, nextsav) {
 			/* sanity check */
 			if (sav->state != SADB_SASTATE_DEAD) {
 				ipseclog((LOG_DEBUG, "%s: invalid sav->state "
 					"(queue: %d SA: %d): kill it anyway\n",
 					__func__,
 					SADB_SASTATE_DEAD, sav->state));
 			}
 			/*
 			 * do not call key_freesav() here.
 			 * sav should already be freed, and sav->refcnt
 			 * shows other references to sav
 			 * (such as from SPD).
 			 */
 		}
 	}
 	SAHTREE_UNLOCK();
 }
 
 static void
 key_flush_acq(time_t now)
 {
 	struct secacq *acq, *nextacq;
 
 	/* ACQ tree */
 	ACQ_LOCK();
 	for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) {
 		nextacq = LIST_NEXT(acq, chain);
 		if (now - acq->created > V_key_blockacq_lifetime
 		 && __LIST_CHAINED(acq)) {
 			LIST_REMOVE(acq, chain);
 			free(acq, M_IPSEC_SAQ);
 		}
 	}
 	ACQ_UNLOCK();
 }
 
 static void
 key_flush_spacq(time_t now)
 {
 	struct secspacq *acq, *nextacq;
 
 	/* SP ACQ tree */
 	SPACQ_LOCK();
 	for (acq = LIST_FIRST(&V_spacqtree); acq != NULL; acq = nextacq) {
 		nextacq = LIST_NEXT(acq, chain);
 		if (now - acq->created > V_key_blockacq_lifetime
 		 && __LIST_CHAINED(acq)) {
 			LIST_REMOVE(acq, chain);
 			free(acq, M_IPSEC_SAQ);
 		}
 	}
 	SPACQ_UNLOCK();
 }
 
 /*
  * time handler.
  * scanning SPD and SAD to check status for each entries,
  * and do to remove or to expire.
  * XXX: year 2038 problem may remain.
  */
 static void
 key_timehandler(void *arg)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	time_t now = time_second;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		key_flush_spd(now);
 		key_flush_sad(now);
 		key_flush_acq(now);
 		key_flush_spacq(now);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 
 #ifndef IPSEC_DEBUG2
 	/* do exchange to tick time !! */
 	callout_schedule(&key_timer, hz);
 #endif /* IPSEC_DEBUG2 */
 }
 
 u_long
 key_random()
 {
 	u_long value;
 
 	key_randomfill(&value, sizeof(value));
 	return value;
 }
 
 void
 key_randomfill(void *p, size_t l)
 {
 	size_t n;
 	u_long v;
 	static int warn = 1;
 
 	n = 0;
 	n = (size_t)read_random(p, (u_int)l);
 	/* last resort */
 	while (n < l) {
 		v = random();
 		bcopy(&v, (u_int8_t *)p + n,
 		    l - n < sizeof(v) ? l - n : sizeof(v));
 		n += sizeof(v);
 
 		if (warn) {
 			printf("WARNING: pseudo-random number generator "
 			    "used for IPsec processing\n");
 			warn = 0;
 		}
 	}
 }
 
 /*
  * map SADB_SATYPE_* to IPPROTO_*.
  * if satype == SADB_SATYPE then satype is mapped to ~0.
  * OUT:
  *	0: invalid satype.
  */
 static u_int16_t
 key_satype2proto(u_int8_t satype)
 {
 	switch (satype) {
 	case SADB_SATYPE_UNSPEC:
 		return IPSEC_PROTO_ANY;
 	case SADB_SATYPE_AH:
 		return IPPROTO_AH;
 	case SADB_SATYPE_ESP:
 		return IPPROTO_ESP;
 	case SADB_X_SATYPE_IPCOMP:
 		return IPPROTO_IPCOMP;
 	case SADB_X_SATYPE_TCPSIGNATURE:
 		return IPPROTO_TCP;
 	default:
 		return 0;
 	}
 	/* NOTREACHED */
 }
 
 /*
  * map IPPROTO_* to SADB_SATYPE_*
  * OUT:
  *	0: invalid protocol type.
  */
 static u_int8_t
 key_proto2satype(u_int16_t proto)
 {
 	switch (proto) {
 	case IPPROTO_AH:
 		return SADB_SATYPE_AH;
 	case IPPROTO_ESP:
 		return SADB_SATYPE_ESP;
 	case IPPROTO_IPCOMP:
 		return SADB_X_SATYPE_IPCOMP;
 	case IPPROTO_TCP:
 		return SADB_X_SATYPE_TCPSIGNATURE;
 	default:
 		return 0;
 	}
 	/* NOTREACHED */
 }
 
 /* %%% PF_KEY */
 /*
  * SADB_GETSPI processing is to receive
  *	<base, (SA2), src address, dst address, (SPI range)>
  * from the IKMPd, to assign a unique spi value, to hang on the INBOUND
  * tree with the status of LARVAL, and send
  *	<base, SA(*), address(SD)>
  * to the IKMPd.
  *
  * IN:	mhp: pointer to the pointer to each header.
  * OUT:	NULL if fail.
  *	other if success, return pointer to the message to send.
  */
 static int
 key_getspi(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
 	struct secashead *newsah;
 	struct secasvar *newsav;
 	u_int8_t proto;
 	u_int32_t spi;
 	u_int8_t mode;
 	u_int32_t reqid;
 	int error;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
 		mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
 		reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
 	} else {
 		mode = IPSEC_MODE_ANY;
 		reqid = 0;
 	}
 
 	src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
 	dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/*
 	 * Make sure the port numbers are zero.
 	 * In case of NAT-T we will update them later if needed.
 	 */
 	switch (((struct sockaddr *)(src0 + 1))->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr *)(src0 + 1))->sa_len !=
 		    sizeof(struct sockaddr_in))
 			return key_senderror(so, m, EINVAL);
 		((struct sockaddr_in *)(src0 + 1))->sin_port = 0;
 		break;
 	case AF_INET6:
 		if (((struct sockaddr *)(src0 + 1))->sa_len !=
 		    sizeof(struct sockaddr_in6))
 			return key_senderror(so, m, EINVAL);
 		((struct sockaddr_in6 *)(src0 + 1))->sin6_port = 0;
 		break;
 	default:
 		; /*???*/
 	}
 	switch (((struct sockaddr *)(dst0 + 1))->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr *)(dst0 + 1))->sa_len !=
 		    sizeof(struct sockaddr_in))
 			return key_senderror(so, m, EINVAL);
 		((struct sockaddr_in *)(dst0 + 1))->sin_port = 0;
 		break;
 	case AF_INET6:
 		if (((struct sockaddr *)(dst0 + 1))->sa_len !=
 		    sizeof(struct sockaddr_in6))
 			return key_senderror(so, m, EINVAL);
 		((struct sockaddr_in6 *)(dst0 + 1))->sin6_port = 0;
 		break;
 	default:
 		; /*???*/
 	}
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle NAT-T info if present.
 	 * We made sure the port numbers are zero above, so we do
 	 * not have to worry in case we do not update them.
 	 */
 	if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL)
 		ipseclog((LOG_DEBUG, "%s: NAT-T OAi present\n", __func__));
 	if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL)
 		ipseclog((LOG_DEBUG, "%s: NAT-T OAr present\n", __func__));
 
 	if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
 		struct sadb_x_nat_t_type *type;
 		struct sadb_x_nat_t_port *sport, *dport;
 
 		if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
 			ipseclog((LOG_DEBUG, "%s: invalid nat-t message "
 			    "passed.\n", __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		sport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
 		dport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
 
 		if (sport)
 			KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port);
 		if (dport)
 			KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port);
 	}
 #endif
 
 	/* SPI allocation */
 	spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE],
 	                       &saidx);
 	if (spi == 0)
 		return key_senderror(so, m, EINVAL);
 
 	/* get a SA index */
 	if ((newsah = key_getsah(&saidx)) == NULL) {
 		/* create a new SA index */
 		if ((newsah = key_newsah(&saidx)) == NULL) {
 			ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__));
 			return key_senderror(so, m, ENOBUFS);
 		}
 	}
 
 	/* get a new SA */
 	/* XXX rewrite */
 	newsav = KEY_NEWSAV(m, mhp, newsah, &error);
 	if (newsav == NULL) {
 		/* XXX don't free new SA index allocated in above. */
 		return key_senderror(so, m, error);
 	}
 
 	/* set spi */
 	newsav->spi = htonl(spi);
 
 	/* delete the entry in acqtree */
 	if (mhp->msg->sadb_msg_seq != 0) {
 		struct secacq *acq;
 		if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) != NULL) {
 			/* reset counter in order to deletion by timehandler. */
 			acq->created = time_second;
 			acq->count = 0;
 		}
     	}
 
     {
 	struct mbuf *n, *nn;
 	struct sadb_sa *m_sa;
 	struct sadb_msg *newmsg;
 	int off, len;
 
 	/* create new sadb_msg to reply. */
 	len = PFKEY_ALIGN8(sizeof(struct sadb_msg)) +
 	    PFKEY_ALIGN8(sizeof(struct sadb_sa));
 
 	MGETHDR(n, M_NOWAIT, MT_DATA);
 	if (len > MHLEN) {
 		if (!(MCLGET(n, M_NOWAIT))) {
 			m_freem(n);
 			n = NULL;
 		}
 	}
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	n->m_len = len;
 	n->m_next = NULL;
 	off = 0;
 
 	m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off);
 	off += PFKEY_ALIGN8(sizeof(struct sadb_msg));
 
 	m_sa = (struct sadb_sa *)(mtod(n, caddr_t) + off);
 	m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa));
 	m_sa->sadb_sa_exttype = SADB_EXT_SA;
 	m_sa->sadb_sa_spi = htonl(spi);
 	off += PFKEY_ALIGN8(sizeof(struct sadb_sa));
 
 	IPSEC_ASSERT(off == len,
 		("length inconsistency (off %u len %u)", off, len));
 
 	n->m_next = key_gather_mbuf(m, mhp, 0, 2, SADB_EXT_ADDRESS_SRC,
 	    SADB_EXT_ADDRESS_DST);
 	if (!n->m_next) {
 		m_freem(n);
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	if (n->m_len < sizeof(struct sadb_msg)) {
 		n = m_pullup(n, sizeof(struct sadb_msg));
 		if (n == NULL)
 			return key_sendup_mbuf(so, m, KEY_SENDUP_ONE);
 	}
 
 	n->m_pkthdr.len = 0;
 	for (nn = n; nn; nn = nn->m_next)
 		n->m_pkthdr.len += nn->m_len;
 
 	newmsg = mtod(n, struct sadb_msg *);
 	newmsg->sadb_msg_seq = newsav->seq;
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len);
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
     }
 }
 
 /*
  * allocating new SPI
  * called by key_getspi().
  * OUT:
  *	0:	failure.
  *	others: success.
  */
 static u_int32_t
 key_do_getnewspi(struct sadb_spirange *spirange, struct secasindex *saidx)
 {
 	u_int32_t newspi;
 	u_int32_t min, max;
 	int count = V_key_spi_trycnt;
 
 	/* set spi range to allocate */
 	if (spirange != NULL) {
 		min = spirange->sadb_spirange_min;
 		max = spirange->sadb_spirange_max;
 	} else {
 		min = V_key_spi_minval;
 		max = V_key_spi_maxval;
 	}
 	/* IPCOMP needs 2-byte SPI */
 	if (saidx->proto == IPPROTO_IPCOMP) {
 		u_int32_t t;
 		if (min >= 0x10000)
 			min = 0xffff;
 		if (max >= 0x10000)
 			max = 0xffff;
 		if (min > max) {
 			t = min; min = max; max = t;
 		}
 	}
 
 	if (min == max) {
 		if (key_checkspidup(saidx, min) != NULL) {
 			ipseclog((LOG_DEBUG, "%s: SPI %u exists already.\n",
 				__func__, min));
 			return 0;
 		}
 
 		count--; /* taking one cost. */
 		newspi = min;
 
 	} else {
 
 		/* init SPI */
 		newspi = 0;
 
 		/* when requesting to allocate spi ranged */
 		while (count--) {
 			/* generate pseudo-random SPI value ranged. */
 			newspi = min + (key_random() % (max - min + 1));
 
 			if (key_checkspidup(saidx, newspi) == NULL)
 				break;
 		}
 
 		if (count == 0 || newspi == 0) {
 			ipseclog((LOG_DEBUG, "%s: to allocate spi is failed.\n",
 				__func__));
 			return 0;
 		}
 	}
 
 	/* statistics */
 	keystat.getspi_count =
 		(keystat.getspi_count + V_key_spi_trycnt - count) / 2;
 
 	return newspi;
 }
 
 /*
  * SADB_UPDATE processing
  * receive
  *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
  *       key(AE), (identity(SD),) (sensitivity)>
  * from the ikmpd, and update a secasvar entry whose status is SADB_SASTATE_LARVAL.
  * and send
  *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
  *       (identity(SD),) (sensitivity)>
  * to the ikmpd.
  *
  * m will always be freed.
  */
 static int
 key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
 #ifdef IPSEC_NAT_T
 	struct sadb_x_nat_t_type *type;
 	struct sadb_x_nat_t_port *sport, *dport;
 	struct sadb_address *iaddr, *raddr;
 	struct sadb_x_nat_t_frag *frag;
 #endif
 	struct secasindex saidx;
 	struct secashead *sah;
 	struct secasvar *sav;
 	u_int16_t proto;
 	u_int8_t mode;
 	u_int32_t reqid;
 	int error;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	if (mhp->ext[SADB_EXT_SA] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
 	    (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP &&
 	     mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) ||
 	    (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH &&
 	     mhp->ext[SADB_EXT_KEY_AUTH] == NULL) ||
 	    (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL &&
 	     mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) ||
 	    (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL &&
 	     mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
 		mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
 		reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
 	} else {
 		mode = IPSEC_MODE_ANY;
 		reqid = 0;
 	}
 	/* XXX boundary checking for other extensions */
 
 	sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
 	src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
 	dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
 	/*
 	 * Make sure the port numbers are zero.
 	 * In case of NAT-T we will update them later if needed.
 	 */
 	KEY_PORTTOSADDR(&saidx.src, 0);
 	KEY_PORTTOSADDR(&saidx.dst, 0);
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle NAT-T info if present.
 	 */
 	if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
 
 		if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		type = (struct sadb_x_nat_t_type *)
 		    mhp->ext[SADB_X_EXT_NAT_T_TYPE];
 		sport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
 		dport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
 	} else {
 		type = 0;
 		sport = dport = 0;
 	}
 	if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) {
 		if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 		iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI];
 		raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR];
 		ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__));
 	} else {
 		iaddr = raddr = NULL;
 	}
 	if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) {
 		if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 		frag = (struct sadb_x_nat_t_frag *)
 		    mhp->ext[SADB_X_EXT_NAT_T_FRAG];
 	} else {
 		frag = 0;
 	}
 #endif
 
 	/* get a SA header */
 	if ((sah = key_getsah(&saidx)) == NULL) {
 		ipseclog((LOG_DEBUG, "%s: no SA index found.\n", __func__));
 		return key_senderror(so, m, ENOENT);
 	}
 
 	/* set spidx if there */
 	/* XXX rewrite */
 	error = key_setident(sah, m, mhp);
 	if (error)
 		return key_senderror(so, m, error);
 
 	/* find a SA with sequence number. */
 #ifdef IPSEC_DOSEQCHECK
 	if (mhp->msg->sadb_msg_seq != 0
 	 && (sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq)) == NULL) {
 		ipseclog((LOG_DEBUG, "%s: no larval SA with sequence %u "
 			"exists.\n", __func__, mhp->msg->sadb_msg_seq));
 		return key_senderror(so, m, ENOENT);
 	}
 #else
 	SAHTREE_LOCK();
 	sav = key_getsavbyspi(sah, sa0->sadb_sa_spi);
 	SAHTREE_UNLOCK();
 	if (sav == NULL) {
 		ipseclog((LOG_DEBUG, "%s: no such a SA found (spi:%u)\n",
 			__func__, (u_int32_t)ntohl(sa0->sadb_sa_spi)));
 		return key_senderror(so, m, EINVAL);
 	}
 #endif
 
 	/* validity check */
 	if (sav->sah->saidx.proto != proto) {
 		ipseclog((LOG_DEBUG, "%s: protocol mismatched "
 			"(DB=%u param=%u)\n", __func__,
 			sav->sah->saidx.proto, proto));
 		return key_senderror(so, m, EINVAL);
 	}
 #ifdef IPSEC_DOSEQCHECK
 	if (sav->spi != sa0->sadb_sa_spi) {
 		ipseclog((LOG_DEBUG, "%s: SPI mismatched (DB:%u param:%u)\n",
 		    __func__,
 		    (u_int32_t)ntohl(sav->spi),
 		    (u_int32_t)ntohl(sa0->sadb_sa_spi)));
 		return key_senderror(so, m, EINVAL);
 	}
 #endif
 	if (sav->pid != mhp->msg->sadb_msg_pid) {
 		ipseclog((LOG_DEBUG, "%s: pid mismatched (DB:%u param:%u)\n",
 		    __func__, sav->pid, mhp->msg->sadb_msg_pid));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/* copy sav values */
 	error = key_setsaval(sav, m, mhp);
 	if (error) {
 		KEY_FREESAV(&sav);
 		return key_senderror(so, m, error);
 	}
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle more NAT-T info if present,
 	 * now that we have a sav to fill.
 	 */
 	if (type)
 		sav->natt_type = type->sadb_x_nat_t_type_type;
 
 	if (sport)
 		KEY_PORTTOSADDR(&sav->sah->saidx.src,
 		    sport->sadb_x_nat_t_port_port);
 	if (dport)
 		KEY_PORTTOSADDR(&sav->sah->saidx.dst,
 		    dport->sadb_x_nat_t_port_port);
 
 #if 0
 	/*
 	 * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0.
 	 * We should actually check for a minimum MTU here, if we
 	 * want to support it in ip_output.
 	 */
 	if (frag)
 		sav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen;
 #endif
 #endif
 
 	/* check SA values to be mature. */
 	if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) {
 		KEY_FREESAV(&sav);
 		return key_senderror(so, m, 0);
 	}
 
     {
 	struct mbuf *n;
 
 	/* set msg buf from mhp */
 	n = key_getmsgbuf_x1(m, mhp);
 	if (n == NULL) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
     }
 }
 
 /*
  * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL.
  * only called by key_update().
  * OUT:
  *	NULL	: not found
  *	others	: found, pointer to a SA.
  */
 #ifdef IPSEC_DOSEQCHECK
 static struct secasvar *
 key_getsavbyseq(struct secashead *sah, u_int32_t seq)
 {
 	struct secasvar *sav;
 	u_int state;
 
 	state = SADB_SASTATE_LARVAL;
 
 	/* search SAD with sequence number ? */
 	LIST_FOREACH(sav, &sah->savtree[state], chain) {
 
 		KEY_CHKSASTATE(state, sav->state, __func__);
 
 		if (sav->seq == seq) {
 			sa_addref(sav);
 			KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 				printf("DP %s cause refcnt++:%d SA:%p\n",
 					__func__, sav->refcnt, sav));
 			return sav;
 		}
 	}
 
 	return NULL;
 }
 #endif
 
 /*
  * SADB_ADD processing
  * add an entry to SA database, when received
  *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
  *       key(AE), (identity(SD),) (sensitivity)>
  * from the ikmpd,
  * and send
  *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
  *       (identity(SD),) (sensitivity)>
  * to the ikmpd.
  *
  * IGNORE identity and sensitivity messages.
  *
  * m will always be freed.
  */
 static int
 key_add(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
 #ifdef IPSEC_NAT_T
 	struct sadb_x_nat_t_type *type;
 	struct sadb_address *iaddr, *raddr;
 	struct sadb_x_nat_t_frag *frag;
 #endif
 	struct secasindex saidx;
 	struct secashead *newsah;
 	struct secasvar *newsav;
 	u_int16_t proto;
 	u_int8_t mode;
 	u_int32_t reqid;
 	int error;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	if (mhp->ext[SADB_EXT_SA] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
 	    (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP &&
 	     mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) ||
 	    (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH &&
 	     mhp->ext[SADB_EXT_KEY_AUTH] == NULL) ||
 	    (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL &&
 	     mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) ||
 	    (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL &&
 	     mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
 		/* XXX need more */
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
 		mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
 		reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
 	} else {
 		mode = IPSEC_MODE_ANY;
 		reqid = 0;
 	}
 
 	sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
 	src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
 	/*
 	 * Make sure the port numbers are zero.
 	 * In case of NAT-T we will update them later if needed.
 	 */
 	KEY_PORTTOSADDR(&saidx.src, 0);
 	KEY_PORTTOSADDR(&saidx.dst, 0);
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle NAT-T info if present.
 	 */
 	if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
 		struct sadb_x_nat_t_port *sport, *dport;
 
 		if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		type = (struct sadb_x_nat_t_type *)
 		    mhp->ext[SADB_X_EXT_NAT_T_TYPE];
 		sport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
 		dport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
 
 		if (sport)
 			KEY_PORTTOSADDR(&saidx.src,
 			    sport->sadb_x_nat_t_port_port);
 		if (dport)
 			KEY_PORTTOSADDR(&saidx.dst,
 			    dport->sadb_x_nat_t_port_port);
 	} else {
 		type = 0;
 	}
 	if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) {
 		if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 		iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI];
 		raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR];
 		ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__));
 	} else {
 		iaddr = raddr = NULL;
 	}
 	if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) {
 		if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 		frag = (struct sadb_x_nat_t_frag *)
 		    mhp->ext[SADB_X_EXT_NAT_T_FRAG];
 	} else {
 		frag = 0;
 	}
 #endif
 
 	/* get a SA header */
 	if ((newsah = key_getsah(&saidx)) == NULL) {
 		/* create a new SA header */
 		if ((newsah = key_newsah(&saidx)) == NULL) {
 			ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__));
 			return key_senderror(so, m, ENOBUFS);
 		}
 	}
 
 	/* set spidx if there */
 	/* XXX rewrite */
 	error = key_setident(newsah, m, mhp);
 	if (error) {
 		return key_senderror(so, m, error);
 	}
 
 	/* create new SA entry. */
 	/* We can create new SA only if SPI is differenct. */
 	SAHTREE_LOCK();
 	newsav = key_getsavbyspi(newsah, sa0->sadb_sa_spi);
 	SAHTREE_UNLOCK();
 	if (newsav != NULL) {
 		ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__));
 		return key_senderror(so, m, EEXIST);
 	}
 	newsav = KEY_NEWSAV(m, mhp, newsah, &error);
 	if (newsav == NULL) {
 		return key_senderror(so, m, error);
 	}
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle more NAT-T info if present,
 	 * now that we have a sav to fill.
 	 */
 	if (type)
 		newsav->natt_type = type->sadb_x_nat_t_type_type;
 
 #if 0
 	/*
 	 * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0.
 	 * We should actually check for a minimum MTU here, if we
 	 * want to support it in ip_output.
 	 */
 	if (frag)
 		newsav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen;
 #endif
 #endif
 
 	/* check SA values to be mature. */
 	if ((error = key_mature(newsav)) != 0) {
 		KEY_FREESAV(&newsav);
 		return key_senderror(so, m, error);
 	}
 
 	/*
 	 * don't call key_freesav() here, as we would like to keep the SA
 	 * in the database on success.
 	 */
 
     {
 	struct mbuf *n;
 
 	/* set msg buf from mhp */
 	n = key_getmsgbuf_x1(m, mhp);
 	if (n == NULL) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
     }
 }
 
 /* m is retained */
 static int
 key_setident(struct secashead *sah, struct mbuf *m,
     const struct sadb_msghdr *mhp)
 {
 	const struct sadb_ident *idsrc, *iddst;
 	int idsrclen, iddstlen;
 
 	IPSEC_ASSERT(sah != NULL, ("null secashead"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* don't make buffer if not there */
 	if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL &&
 	    mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) {
 		sah->idents = NULL;
 		sah->identd = NULL;
 		return 0;
 	}
 	
 	if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) {
 		ipseclog((LOG_DEBUG, "%s: invalid identity.\n", __func__));
 		return EINVAL;
 	}
 
 	idsrc = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_SRC];
 	iddst = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_DST];
 	idsrclen = mhp->extlen[SADB_EXT_IDENTITY_SRC];
 	iddstlen = mhp->extlen[SADB_EXT_IDENTITY_DST];
 
 	/* validity check */
 	if (idsrc->sadb_ident_type != iddst->sadb_ident_type) {
 		ipseclog((LOG_DEBUG, "%s: ident type mismatch.\n", __func__));
 		return EINVAL;
 	}
 
 	switch (idsrc->sadb_ident_type) {
 	case SADB_IDENTTYPE_PREFIX:
 	case SADB_IDENTTYPE_FQDN:
 	case SADB_IDENTTYPE_USERFQDN:
 	default:
 		/* XXX do nothing */
 		sah->idents = NULL;
 		sah->identd = NULL;
 	 	return 0;
 	}
 
 	/* make structure */
 	sah->idents = malloc(sizeof(struct secident), M_IPSEC_MISC, M_NOWAIT);
 	if (sah->idents == NULL) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return ENOBUFS;
 	}
 	sah->identd = malloc(sizeof(struct secident), M_IPSEC_MISC, M_NOWAIT);
 	if (sah->identd == NULL) {
 		free(sah->idents, M_IPSEC_MISC);
 		sah->idents = NULL;
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return ENOBUFS;
 	}
 	sah->idents->type = idsrc->sadb_ident_type;
 	sah->idents->id = idsrc->sadb_ident_id;
 
 	sah->identd->type = iddst->sadb_ident_type;
 	sah->identd->id = iddst->sadb_ident_id;
 
 	return 0;
 }
 
 /*
  * m will not be freed on return.
  * it is caller's responsibility to free the result. 
  */
 static struct mbuf *
 key_getmsgbuf_x1(struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct mbuf *n;
 
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* create new sadb_msg to reply. */
 	n = key_gather_mbuf(m, mhp, 1, 9, SADB_EXT_RESERVED,
 	    SADB_EXT_SA, SADB_X_EXT_SA2,
 	    SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST,
 	    SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT,
 	    SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST);
 	if (!n)
 		return NULL;
 
 	if (n->m_len < sizeof(struct sadb_msg)) {
 		n = m_pullup(n, sizeof(struct sadb_msg));
 		if (n == NULL)
 			return NULL;
 	}
 	mtod(n, struct sadb_msg *)->sadb_msg_errno = 0;
 	mtod(n, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(n->m_pkthdr.len);
 
 	return n;
 }
 
 /*
  * SADB_DELETE processing
  * receive
  *   <base, SA(*), address(SD)>
  * from the ikmpd, and set SADB_SASTATE_DEAD,
  * and send,
  *   <base, SA(*), address(SD)>
  * to the ikmpd.
  *
  * m will always be freed.
  */
 static int
 key_delete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
 	struct secashead *sah;
 	struct secasvar *sav = NULL;
 	u_int16_t proto;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	if (mhp->ext[SADB_EXT_SA] == NULL) {
 		/*
 		 * Caller wants us to delete all non-LARVAL SAs
 		 * that match the src/dst.  This is used during
 		 * IKE INITIAL-CONTACT.
 		 */
 		ipseclog((LOG_DEBUG, "%s: doing delete all.\n", __func__));
 		return key_delete_all(so, m, mhp, proto);
 	} else if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
 	src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
 	dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx);
 
 	/*
 	 * Make sure the port numbers are zero.
 	 * In case of NAT-T we will update them later if needed.
 	 */
 	KEY_PORTTOSADDR(&saidx.src, 0);
 	KEY_PORTTOSADDR(&saidx.dst, 0);
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle NAT-T info if present.
 	 */
 	if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
 		struct sadb_x_nat_t_port *sport, *dport;
 
 		if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		sport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
 		dport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
 
 		if (sport)
 			KEY_PORTTOSADDR(&saidx.src,
 			    sport->sadb_x_nat_t_port_port);
 		if (dport)
 			KEY_PORTTOSADDR(&saidx.dst,
 			    dport->sadb_x_nat_t_port_port);
 	}
 #endif
 
 	/* get a SA header */
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (sah->state == SADB_SASTATE_DEAD)
 			continue;
 		if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0)
 			continue;
 
 		/* get a SA with SPI. */
 		sav = key_getsavbyspi(sah, sa0->sadb_sa_spi);
 		if (sav)
 			break;
 	}
 	if (sah == NULL) {
 		SAHTREE_UNLOCK();
 		ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__));
 		return key_senderror(so, m, ENOENT);
 	}
 
 	key_sa_chgstate(sav, SADB_SASTATE_DEAD);
 	KEY_FREESAV(&sav);
 	SAHTREE_UNLOCK();
 
     {
 	struct mbuf *n;
 	struct sadb_msg *newmsg;
 
 	/* create new sadb_msg to reply. */
 	/* XXX-BZ NAT-T extensions? */
 	n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED,
 	    SADB_EXT_SA, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	if (n->m_len < sizeof(struct sadb_msg)) {
 		n = m_pullup(n, sizeof(struct sadb_msg));
 		if (n == NULL)
 			return key_senderror(so, m, ENOBUFS);
 	}
 	newmsg = mtod(n, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len);
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
     }
 }
 
 /*
  * delete all SAs for src/dst.  Called from key_delete().
  */
 static int
 key_delete_all(struct socket *so, struct mbuf *m,
     const struct sadb_msghdr *mhp, u_int16_t proto)
 {
 	struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
 	struct secashead *sah;
 	struct secasvar *sav, *nextsav;
 	u_int stateidx, state;
 
 	src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
 	dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx);
 
 	/*
 	 * Make sure the port numbers are zero.
 	 * In case of NAT-T we will update them later if needed.
 	 */
 	KEY_PORTTOSADDR(&saidx.src, 0);
 	KEY_PORTTOSADDR(&saidx.dst, 0);
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle NAT-T info if present.
 	 */
 
 	if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
 		struct sadb_x_nat_t_port *sport, *dport;
 
 		if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		sport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
 		dport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
 
 		if (sport)
 			KEY_PORTTOSADDR(&saidx.src,
 			    sport->sadb_x_nat_t_port_port);
 		if (dport)
 			KEY_PORTTOSADDR(&saidx.dst,
 			    dport->sadb_x_nat_t_port_port);
 	}
 #endif
 
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (sah->state == SADB_SASTATE_DEAD)
 			continue;
 		if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0)
 			continue;
 
 		/* Delete all non-LARVAL SAs. */
 		for (stateidx = 0;
 		     stateidx < _ARRAYLEN(saorder_state_alive);
 		     stateidx++) {
 			state = saorder_state_alive[stateidx];
 			if (state == SADB_SASTATE_LARVAL)
 				continue;
 			for (sav = LIST_FIRST(&sah->savtree[state]);
 			     sav != NULL; sav = nextsav) {
 				nextsav = LIST_NEXT(sav, chain);
 				/* sanity check */
 				if (sav->state != state) {
 					ipseclog((LOG_DEBUG, "%s: invalid "
 						"sav->state (queue %d SA %d)\n",
 						__func__, state, sav->state));
 					continue;
 				}
 				
 				key_sa_chgstate(sav, SADB_SASTATE_DEAD);
 				KEY_FREESAV(&sav);
 			}
 		}
 	}
 	SAHTREE_UNLOCK();
     {
 	struct mbuf *n;
 	struct sadb_msg *newmsg;
 
 	/* create new sadb_msg to reply. */
 	/* XXX-BZ NAT-T extensions? */
 	n = key_gather_mbuf(m, mhp, 1, 3, SADB_EXT_RESERVED,
 	    SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	if (n->m_len < sizeof(struct sadb_msg)) {
 		n = m_pullup(n, sizeof(struct sadb_msg));
 		if (n == NULL)
 			return key_senderror(so, m, ENOBUFS);
 	}
 	newmsg = mtod(n, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len);
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
     }
 }
 
 /*
  * SADB_GET processing
  * receive
  *   <base, SA(*), address(SD)>
  * from the ikmpd, and get a SP and a SA to respond,
  * and send,
  *   <base, SA, (lifetime(HSC),) address(SD), (address(P),) key(AE),
  *       (identity(SD),) (sensitivity)>
  * to the ikmpd.
  *
  * m will always be freed.
  */
 static int
 key_get(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
 	struct secashead *sah;
 	struct secasvar *sav = NULL;
 	u_int16_t proto;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	if (mhp->ext[SADB_EXT_SA] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
 	src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx);
 
 	/*
 	 * Make sure the port numbers are zero.
 	 * In case of NAT-T we will update them later if needed.
 	 */
 	KEY_PORTTOSADDR(&saidx.src, 0);
 	KEY_PORTTOSADDR(&saidx.dst, 0);
 
 #ifdef IPSEC_NAT_T
 	/*
 	 * Handle NAT-T info if present.
 	 */
 
 	if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
 		struct sadb_x_nat_t_port *sport, *dport;
 
 		if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		sport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
 		dport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
 
 		if (sport)
 			KEY_PORTTOSADDR(&saidx.src,
 			    sport->sadb_x_nat_t_port_port);
 		if (dport)
 			KEY_PORTTOSADDR(&saidx.dst,
 			    dport->sadb_x_nat_t_port_port);
 	}
 #endif
 
 	/* get a SA header */
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (sah->state == SADB_SASTATE_DEAD)
 			continue;
 		if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0)
 			continue;
 
 		/* get a SA with SPI. */
 		sav = key_getsavbyspi(sah, sa0->sadb_sa_spi);
 		if (sav)
 			break;
 	}
 	SAHTREE_UNLOCK();
 	if (sah == NULL) {
 		ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__));
 		return key_senderror(so, m, ENOENT);
 	}
 
     {
 	struct mbuf *n;
 	u_int8_t satype;
 
 	/* map proto to satype */
 	if ((satype = key_proto2satype(sah->saidx.proto)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: there was invalid proto in SAD.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/* create new sadb_msg to reply. */
 	n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq,
 	    mhp->msg->sadb_msg_pid);
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
     }
 }
 
 /* XXX make it sysctl-configurable? */
 static void
 key_getcomb_setlifetime(struct sadb_comb *comb)
 {
 
 	comb->sadb_comb_soft_allocations = 1;
 	comb->sadb_comb_hard_allocations = 1;
 	comb->sadb_comb_soft_bytes = 0;
 	comb->sadb_comb_hard_bytes = 0;
 	comb->sadb_comb_hard_addtime = 86400;	/* 1 day */
 	comb->sadb_comb_soft_addtime = comb->sadb_comb_soft_addtime * 80 / 100;
 	comb->sadb_comb_soft_usetime = 28800;	/* 8 hours */
 	comb->sadb_comb_hard_usetime = comb->sadb_comb_hard_usetime * 80 / 100;
 }
 
 /*
  * XXX reorder combinations by preference
  * XXX no idea if the user wants ESP authentication or not
  */
 static struct mbuf *
 key_getcomb_esp()
 {
 	struct sadb_comb *comb;
 	struct enc_xform *algo;
 	struct mbuf *result = NULL, *m, *n;
 	int encmin;
 	int i, off, o;
 	int totlen;
 	const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb));
 
 	m = NULL;
 	for (i = 1; i <= SADB_EALG_MAX; i++) {
 		algo = esp_algorithm_lookup(i);
 		if (algo == NULL)
 			continue;
 
 		/* discard algorithms with key size smaller than system min */
 		if (_BITS(algo->maxkey) < V_ipsec_esp_keymin)
 			continue;
 		if (_BITS(algo->minkey) < V_ipsec_esp_keymin)
 			encmin = V_ipsec_esp_keymin;
 		else
 			encmin = _BITS(algo->minkey);
 
 		if (V_ipsec_esp_auth)
 			m = key_getcomb_ah();
 		else {
 			IPSEC_ASSERT(l <= MLEN,
 				("l=%u > MLEN=%lu", l, (u_long) MLEN));
 			MGET(m, M_NOWAIT, MT_DATA);
 			if (m) {
 				M_ALIGN(m, l);
 				m->m_len = l;
 				m->m_next = NULL;
 				bzero(mtod(m, caddr_t), m->m_len);
 			}
 		}
 		if (!m)
 			goto fail;
 
 		totlen = 0;
 		for (n = m; n; n = n->m_next)
 			totlen += n->m_len;
 		IPSEC_ASSERT((totlen % l) == 0, ("totlen=%u, l=%u", totlen, l));
 
 		for (off = 0; off < totlen; off += l) {
 			n = m_pulldown(m, off, l, &o);
 			if (!n) {
 				/* m is already freed */
 				goto fail;
 			}
 			comb = (struct sadb_comb *)(mtod(n, caddr_t) + o);
 			bzero(comb, sizeof(*comb));
 			key_getcomb_setlifetime(comb);
 			comb->sadb_comb_encrypt = i;
 			comb->sadb_comb_encrypt_minbits = encmin;
 			comb->sadb_comb_encrypt_maxbits = _BITS(algo->maxkey);
 		}
 
 		if (!result)
 			result = m;
 		else
 			m_cat(result, m);
 	}
 
 	return result;
 
  fail:
 	if (result)
 		m_freem(result);
 	return NULL;
 }
 
 static void
 key_getsizes_ah(const struct auth_hash *ah, int alg, u_int16_t* min,
     u_int16_t* max)
 {
 
 	*min = *max = ah->keysize;
 	if (ah->keysize == 0) {
 		/*
 		 * Transform takes arbitrary key size but algorithm
 		 * key size is restricted.  Enforce this here.
 		 */
 		switch (alg) {
 		case SADB_X_AALG_MD5:	*min = *max = 16; break;
 		case SADB_X_AALG_SHA:	*min = *max = 20; break;
 		case SADB_X_AALG_NULL:	*min = 1; *max = 256; break;
 		case SADB_X_AALG_SHA2_256: *min = *max = 32; break;
 		case SADB_X_AALG_SHA2_384: *min = *max = 48; break;
 		case SADB_X_AALG_SHA2_512: *min = *max = 64; break;
 		default:
 			DPRINTF(("%s: unknown AH algorithm %u\n",
 				__func__, alg));
 			break;
 		}
 	}
 }
 
 /*
  * XXX reorder combinations by preference
  */
 static struct mbuf *
 key_getcomb_ah()
 {
 	struct sadb_comb *comb;
 	struct auth_hash *algo;
 	struct mbuf *m;
 	u_int16_t minkeysize, maxkeysize;
 	int i;
 	const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb));
 
 	m = NULL;
 	for (i = 1; i <= SADB_AALG_MAX; i++) {
 #if 1
 		/* we prefer HMAC algorithms, not old algorithms */
 		if (i != SADB_AALG_SHA1HMAC &&
 		    i != SADB_AALG_MD5HMAC  &&
 		    i != SADB_X_AALG_SHA2_256 &&
 		    i != SADB_X_AALG_SHA2_384 &&
 		    i != SADB_X_AALG_SHA2_512)
 			continue;
 #endif
 		algo = ah_algorithm_lookup(i);
 		if (!algo)
 			continue;
 		key_getsizes_ah(algo, i, &minkeysize, &maxkeysize);
 		/* discard algorithms with key size smaller than system min */
 		if (_BITS(minkeysize) < V_ipsec_ah_keymin)
 			continue;
 
 		if (!m) {
 			IPSEC_ASSERT(l <= MLEN,
 				("l=%u > MLEN=%lu", l, (u_long) MLEN));
 			MGET(m, M_NOWAIT, MT_DATA);
 			if (m) {
 				M_ALIGN(m, l);
 				m->m_len = l;
 				m->m_next = NULL;
 			}
 		} else
 			M_PREPEND(m, l, M_NOWAIT);
 		if (!m)
 			return NULL;
 
 		comb = mtod(m, struct sadb_comb *);
 		bzero(comb, sizeof(*comb));
 		key_getcomb_setlifetime(comb);
 		comb->sadb_comb_auth = i;
 		comb->sadb_comb_auth_minbits = _BITS(minkeysize);
 		comb->sadb_comb_auth_maxbits = _BITS(maxkeysize);
 	}
 
 	return m;
 }
 
 /*
  * not really an official behavior.  discussed in pf_key@inner.net in Sep2000.
  * XXX reorder combinations by preference
  */
 static struct mbuf *
 key_getcomb_ipcomp()
 {
 	struct sadb_comb *comb;
 	struct comp_algo *algo;
 	struct mbuf *m;
 	int i;
 	const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb));
 
 	m = NULL;
 	for (i = 1; i <= SADB_X_CALG_MAX; i++) {
 		algo = ipcomp_algorithm_lookup(i);
 		if (!algo)
 			continue;
 
 		if (!m) {
 			IPSEC_ASSERT(l <= MLEN,
 				("l=%u > MLEN=%lu", l, (u_long) MLEN));
 			MGET(m, M_NOWAIT, MT_DATA);
 			if (m) {
 				M_ALIGN(m, l);
 				m->m_len = l;
 				m->m_next = NULL;
 			}
 		} else
 			M_PREPEND(m, l, M_NOWAIT);
 		if (!m)
 			return NULL;
 
 		comb = mtod(m, struct sadb_comb *);
 		bzero(comb, sizeof(*comb));
 		key_getcomb_setlifetime(comb);
 		comb->sadb_comb_encrypt = i;
 		/* what should we set into sadb_comb_*_{min,max}bits? */
 	}
 
 	return m;
 }
 
 /*
  * XXX no way to pass mode (transport/tunnel) to userland
  * XXX replay checking?
  * XXX sysctl interface to ipsec_{ah,esp}_keymin
  */
 static struct mbuf *
 key_getprop(const struct secasindex *saidx)
 {
 	struct sadb_prop *prop;
 	struct mbuf *m, *n;
 	const int l = PFKEY_ALIGN8(sizeof(struct sadb_prop));
 	int totlen;
 
 	switch (saidx->proto)  {
 	case IPPROTO_ESP:
 		m = key_getcomb_esp();
 		break;
 	case IPPROTO_AH:
 		m = key_getcomb_ah();
 		break;
 	case IPPROTO_IPCOMP:
 		m = key_getcomb_ipcomp();
 		break;
 	default:
 		return NULL;
 	}
 
 	if (!m)
 		return NULL;
 	M_PREPEND(m, l, M_NOWAIT);
 	if (!m)
 		return NULL;
 
 	totlen = 0;
 	for (n = m; n; n = n->m_next)
 		totlen += n->m_len;
 
 	prop = mtod(m, struct sadb_prop *);
 	bzero(prop, sizeof(*prop));
 	prop->sadb_prop_len = PFKEY_UNIT64(totlen);
 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
 	prop->sadb_prop_replay = 32;	/* XXX */
 
 	return m;
 }
 
 /*
  * SADB_ACQUIRE processing called by key_checkrequest() and key_acquire2().
  * send
  *   <base, SA, address(SD), (address(P)), x_policy,
  *       (identity(SD),) (sensitivity,) proposal>
  * to KMD, and expect to receive
  *   <base> with SADB_ACQUIRE if error occured,
  * or
  *   <base, src address, dst address, (SPI range)> with SADB_GETSPI
  * from KMD by PF_KEY.
  *
  * XXX x_policy is outside of RFC2367 (KAME extension).
  * XXX sensitivity is not supported.
  * XXX for ipcomp, RFC2367 does not define how to fill in proposal.
  * see comment for key_getcomb_ipcomp().
  *
  * OUT:
  *    0     : succeed
  *    others: error number
  */
 static int
 key_acquire(const struct secasindex *saidx, struct secpolicy *sp)
 {
 	union sockaddr_union addr;
 	struct mbuf *result, *m;
 	struct secacq *newacq;
 	u_int32_t seq;
 	int error;
 	u_int16_t ul_proto;
 	u_int8_t mask, satype;
 
 	IPSEC_ASSERT(saidx != NULL, ("null saidx"));
 	satype = key_proto2satype(saidx->proto);
 	IPSEC_ASSERT(satype != 0, ("null satype, protocol %u", saidx->proto));
 
 	error = -1;
 	result = NULL;
 	ul_proto = IPSEC_ULPROTO_ANY;
 	/*
 	 * We never do anything about acquirng SA.  There is anather
 	 * solution that kernel blocks to send SADB_ACQUIRE message until
 	 * getting something message from IKEd.  In later case, to be
 	 * managed with ACQUIRING list.
 	 */
 	/* Get an entry to check whether sending message or not. */
 	if ((newacq = key_getacq(saidx)) != NULL) {
 		if (V_key_blockacq_count < newacq->count) {
 			/* reset counter and do send message. */
 			newacq->count = 0;
 		} else {
 			/* increment counter and do nothing. */
 			newacq->count++;
 			return 0;
 		}
 	} else {
 		/* make new entry for blocking to send SADB_ACQUIRE. */
 		if ((newacq = key_newacq(saidx)) == NULL)
 			return ENOBUFS;
 	}
 
 
 	seq = newacq->seq;
 	m = key_setsadbmsg(SADB_ACQUIRE, 0, satype, seq, 0, 0);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	result = m;
 
 	/*
 	 * No SADB_X_EXT_NAT_T_* here: we do not know
 	 * anything related to NAT-T at this time.
 	 */
 
 	/*
 	 * set sadb_address for saidx's.
 	 *
 	 * Note that if sp is supplied, then we're being called from
 	 * key_checkrequest and should supply port and protocol information.
 	 */
 	if (sp != NULL && (sp->spidx.ul_proto == IPPROTO_TCP ||
 	    sp->spidx.ul_proto == IPPROTO_UDP))
 		ul_proto = sp->spidx.ul_proto;
 
 	addr = saidx->src;
 	mask = FULLMASK;
 	if (ul_proto != IPSEC_ULPROTO_ANY) {
 		switch (sp->spidx.src.sa.sa_family) {
 		case AF_INET:
 			if (sp->spidx.src.sin.sin_port != IPSEC_PORT_ANY) {
 				addr.sin.sin_port = sp->spidx.src.sin.sin_port;
 				mask = sp->spidx.prefs;
 			}
 			break;
 		case AF_INET6:
 			if (sp->spidx.src.sin6.sin6_port != IPSEC_PORT_ANY) {
 				addr.sin6.sin6_port = sp->spidx.src.sin6.sin6_port;
 				mask = sp->spidx.prefs;
 			}
 			break;
 		default:
 			break;
 		}
 	}
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &addr.sa, mask, ul_proto);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	addr = saidx->dst;
 	mask = FULLMASK;
 	if (ul_proto != IPSEC_ULPROTO_ANY) {
 		switch (sp->spidx.dst.sa.sa_family) {
 		case AF_INET:
 			if (sp->spidx.dst.sin.sin_port != IPSEC_PORT_ANY) {
 				addr.sin.sin_port = sp->spidx.dst.sin.sin_port;
 				mask = sp->spidx.prefd;
 			}
 			break;
 		case AF_INET6:
 			if (sp->spidx.dst.sin6.sin6_port != IPSEC_PORT_ANY) {
 				addr.sin6.sin6_port = sp->spidx.dst.sin6.sin6_port;
 				mask = sp->spidx.prefd;
 			}
 			break;
 		default:
 			break;
 		}
 	}
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &addr.sa, mask, ul_proto);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	/* XXX proxy address (optional) */
 
 	/* set sadb_x_policy */
 	if (sp) {
-		m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id);
+		m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, sp->priority);
 		if (!m) {
 			error = ENOBUFS;
 			goto fail;
 		}
 		m_cat(result, m);
 	}
 
 	/* XXX identity (optional) */
 #if 0
 	if (idexttype && fqdn) {
 		/* create identity extension (FQDN) */
 		struct sadb_ident *id;
 		int fqdnlen;
 
 		fqdnlen = strlen(fqdn) + 1;	/* +1 for terminating-NUL */
 		id = (struct sadb_ident *)p;
 		bzero(id, sizeof(*id) + PFKEY_ALIGN8(fqdnlen));
 		id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(fqdnlen));
 		id->sadb_ident_exttype = idexttype;
 		id->sadb_ident_type = SADB_IDENTTYPE_FQDN;
 		bcopy(fqdn, id + 1, fqdnlen);
 		p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(fqdnlen);
 	}
 
 	if (idexttype) {
 		/* create identity extension (USERFQDN) */
 		struct sadb_ident *id;
 		int userfqdnlen;
 
 		if (userfqdn) {
 			/* +1 for terminating-NUL */
 			userfqdnlen = strlen(userfqdn) + 1;
 		} else
 			userfqdnlen = 0;
 		id = (struct sadb_ident *)p;
 		bzero(id, sizeof(*id) + PFKEY_ALIGN8(userfqdnlen));
 		id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(userfqdnlen));
 		id->sadb_ident_exttype = idexttype;
 		id->sadb_ident_type = SADB_IDENTTYPE_USERFQDN;
 		/* XXX is it correct? */
 		if (curproc && curproc->p_cred)
 			id->sadb_ident_id = curproc->p_cred->p_ruid;
 		if (userfqdn && userfqdnlen)
 			bcopy(userfqdn, id + 1, userfqdnlen);
 		p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(userfqdnlen);
 	}
 #endif
 
 	/* XXX sensitivity (optional) */
 
 	/* create proposal/combination extension */
 	m = key_getprop(saidx);
 #if 0
 	/*
 	 * spec conformant: always attach proposal/combination extension,
 	 * the problem is that we have no way to attach it for ipcomp,
 	 * due to the way sadb_comb is declared in RFC2367.
 	 */
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 #else
 	/*
 	 * outside of spec; make proposal/combination extension optional.
 	 */
 	if (m)
 		m_cat(result, m);
 #endif
 
 	if ((result->m_flags & M_PKTHDR) == 0) {
 		error = EINVAL;
 		goto fail;
 	}
 
 	if (result->m_len < sizeof(struct sadb_msg)) {
 		result = m_pullup(result, sizeof(struct sadb_msg));
 		if (result == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
 	result->m_pkthdr.len = 0;
 	for (m = result; m; m = m->m_next)
 		result->m_pkthdr.len += m->m_len;
 
 	mtod(result, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(result->m_pkthdr.len);
 
 	return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);
 
  fail:
 	if (result)
 		m_freem(result);
 	return error;
 }
 
 static struct secacq *
 key_newacq(const struct secasindex *saidx)
 {
 	struct secacq *newacq;
 
 	/* get new entry */
 	newacq = malloc(sizeof(struct secacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO);
 	if (newacq == NULL) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return NULL;
 	}
 
 	/* copy secindex */
 	bcopy(saidx, &newacq->saidx, sizeof(newacq->saidx));
 	newacq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq);
 	newacq->created = time_second;
 	newacq->count = 0;
 
 	/* add to acqtree */
 	ACQ_LOCK();
 	LIST_INSERT_HEAD(&V_acqtree, newacq, chain);
 	ACQ_UNLOCK();
 
 	return newacq;
 }
 
 static struct secacq *
 key_getacq(const struct secasindex *saidx)
 {
 	struct secacq *acq;
 
 	ACQ_LOCK();
 	LIST_FOREACH(acq, &V_acqtree, chain) {
 		if (key_cmpsaidx(saidx, &acq->saidx, CMP_EXACTLY))
 			break;
 	}
 	ACQ_UNLOCK();
 
 	return acq;
 }
 
 static struct secacq *
 key_getacqbyseq(u_int32_t seq)
 {
 	struct secacq *acq;
 
 	ACQ_LOCK();
 	LIST_FOREACH(acq, &V_acqtree, chain) {
 		if (acq->seq == seq)
 			break;
 	}
 	ACQ_UNLOCK();
 
 	return acq;
 }
 
 static struct secspacq *
 key_newspacq(struct secpolicyindex *spidx)
 {
 	struct secspacq *acq;
 
 	/* get new entry */
 	acq = malloc(sizeof(struct secspacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO);
 	if (acq == NULL) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return NULL;
 	}
 
 	/* copy secindex */
 	bcopy(spidx, &acq->spidx, sizeof(acq->spidx));
 	acq->created = time_second;
 	acq->count = 0;
 
 	/* add to spacqtree */
 	SPACQ_LOCK();
 	LIST_INSERT_HEAD(&V_spacqtree, acq, chain);
 	SPACQ_UNLOCK();
 
 	return acq;
 }
 
 static struct secspacq *
 key_getspacq(struct secpolicyindex *spidx)
 {
 	struct secspacq *acq;
 
 	SPACQ_LOCK();
 	LIST_FOREACH(acq, &V_spacqtree, chain) {
 		if (key_cmpspidx_exactly(spidx, &acq->spidx)) {
 			/* NB: return holding spacq_lock */
 			return acq;
 		}
 	}
 	SPACQ_UNLOCK();
 
 	return NULL;
 }
 
 /*
  * SADB_ACQUIRE processing,
  * in first situation, is receiving
  *   <base>
  * from the ikmpd, and clear sequence of its secasvar entry.
  *
  * In second situation, is receiving
  *   <base, address(SD), (address(P),) (identity(SD),) (sensitivity,) proposal>
  * from a user land process, and return
  *   <base, address(SD), (address(P),) (identity(SD),) (sensitivity,) proposal>
  * to the socket.
  *
  * m will always be freed.
  */
 static int
 key_acquire2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	const struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
 	struct secashead *sah;
 	u_int16_t proto;
 	int error;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/*
 	 * Error message from KMd.
 	 * We assume that if error was occured in IKEd, the length of PFKEY
 	 * message is equal to the size of sadb_msg structure.
 	 * We do not raise error even if error occured in this function.
 	 */
 	if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) {
 		struct secacq *acq;
 
 		/* check sequence number */
 		if (mhp->msg->sadb_msg_seq == 0) {
 			ipseclog((LOG_DEBUG, "%s: must specify sequence "
 				"number.\n", __func__));
 			m_freem(m);
 			return 0;
 		}
 
 		if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) == NULL) {
 			/*
 			 * the specified larval SA is already gone, or we got
 			 * a bogus sequence number.  we can silently ignore it.
 			 */
 			m_freem(m);
 			return 0;
 		}
 
 		/* reset acq counter in order to deletion by timehander. */
 		acq->created = time_second;
 		acq->count = 0;
 		m_freem(m);
 		return 0;
 	}
 
 	/*
 	 * This message is from user land.
 	 */
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
 	    mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
 	    mhp->ext[SADB_EXT_PROPOSAL] == NULL) {
 		/* error */
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 	if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) ||
 	    mhp->extlen[SADB_EXT_PROPOSAL] < sizeof(struct sadb_prop)) {
 		/* error */
 		ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n",	
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx);
 
 	/*
 	 * Make sure the port numbers are zero.
 	 * In case of NAT-T we will update them later if needed.
 	 */
 	KEY_PORTTOSADDR(&saidx.src, 0);
 	KEY_PORTTOSADDR(&saidx.dst, 0);
 
 #ifndef IPSEC_NAT_T
 	/*
 	 * Handle NAT-T info if present.
 	 */
 
 	if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
 	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
 		struct sadb_x_nat_t_port *sport, *dport;
 
 		if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
 		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
 			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
 			    __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		sport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
 		dport = (struct sadb_x_nat_t_port *)
 		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
 
 		if (sport)
 			KEY_PORTTOSADDR(&saidx.src,
 			    sport->sadb_x_nat_t_port_port);
 		if (dport)
 			KEY_PORTTOSADDR(&saidx.dst,
 			    dport->sadb_x_nat_t_port_port);
 	}
 #endif
 
 	/* get a SA index */
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (sah->state == SADB_SASTATE_DEAD)
 			continue;
 		if (key_cmpsaidx(&sah->saidx, &saidx, CMP_MODE_REQID))
 			break;
 	}
 	SAHTREE_UNLOCK();
 	if (sah != NULL) {
 		ipseclog((LOG_DEBUG, "%s: a SA exists already.\n", __func__));
 		return key_senderror(so, m, EEXIST);
 	}
 
 	error = key_acquire(&saidx, NULL);
 	if (error != 0) {
 		ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n",
 			__func__, mhp->msg->sadb_msg_errno));
 		return key_senderror(so, m, error);
 	}
 
 	return key_sendup_mbuf(so, m, KEY_SENDUP_REGISTERED);
 }
 
 /*
  * SADB_REGISTER processing.
  * If SATYPE_UNSPEC has been passed as satype, only return sabd_supported.
  * receive
  *   <base>
  * from the ikmpd, and register a socket to send PF_KEY messages,
  * and send
  *   <base, supported>
  * to KMD by PF_KEY.
  * If socket is detached, must free from regnode.
  *
  * m will always be freed.
  */
 static int
 key_register(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct secreg *reg, *newreg = 0;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* check for invalid register message */
 	if (mhp->msg->sadb_msg_satype >= sizeof(V_regtree)/sizeof(V_regtree[0]))
 		return key_senderror(so, m, EINVAL);
 
 	/* When SATYPE_UNSPEC is specified, only return sabd_supported. */
 	if (mhp->msg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
 		goto setmsg;
 
 	/* check whether existing or not */
 	REGTREE_LOCK();
 	LIST_FOREACH(reg, &V_regtree[mhp->msg->sadb_msg_satype], chain) {
 		if (reg->so == so) {
 			REGTREE_UNLOCK();
 			ipseclog((LOG_DEBUG, "%s: socket exists already.\n",
 				__func__));
 			return key_senderror(so, m, EEXIST);
 		}
 	}
 
 	/* create regnode */
 	newreg =  malloc(sizeof(struct secreg), M_IPSEC_SAR, M_NOWAIT|M_ZERO);
 	if (newreg == NULL) {
 		REGTREE_UNLOCK();
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	newreg->so = so;
 	((struct keycb *)sotorawcb(so))->kp_registered++;
 
 	/* add regnode to regtree. */
 	LIST_INSERT_HEAD(&V_regtree[mhp->msg->sadb_msg_satype], newreg, chain);
 	REGTREE_UNLOCK();
 
   setmsg:
     {
 	struct mbuf *n;
 	struct sadb_msg *newmsg;
 	struct sadb_supported *sup;
 	u_int len, alen, elen;
 	int off;
 	int i;
 	struct sadb_alg *alg;
 
 	/* create new sadb_msg to reply. */
 	alen = 0;
 	for (i = 1; i <= SADB_AALG_MAX; i++) {
 		if (ah_algorithm_lookup(i))
 			alen += sizeof(struct sadb_alg);
 	}
 	if (alen)
 		alen += sizeof(struct sadb_supported);
 	elen = 0;
 	for (i = 1; i <= SADB_EALG_MAX; i++) {
 		if (esp_algorithm_lookup(i))
 			elen += sizeof(struct sadb_alg);
 	}
 	if (elen)
 		elen += sizeof(struct sadb_supported);
 
 	len = sizeof(struct sadb_msg) + alen + elen;
 
 	if (len > MCLBYTES)
 		return key_senderror(so, m, ENOBUFS);
 
 	MGETHDR(n, M_NOWAIT, MT_DATA);
 	if (len > MHLEN) {
 		if (!(MCLGET(n, M_NOWAIT))) {
 			m_freem(n);
 			n = NULL;
 		}
 	}
 	if (!n)
 		return key_senderror(so, m, ENOBUFS);
 
 	n->m_pkthdr.len = n->m_len = len;
 	n->m_next = NULL;
 	off = 0;
 
 	m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off);
 	newmsg = mtod(n, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(len);
 	off += PFKEY_ALIGN8(sizeof(struct sadb_msg));
 
 	/* for authentication algorithm */
 	if (alen) {
 		sup = (struct sadb_supported *)(mtod(n, caddr_t) + off);
 		sup->sadb_supported_len = PFKEY_UNIT64(alen);
 		sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
 		off += PFKEY_ALIGN8(sizeof(*sup));
 
 		for (i = 1; i <= SADB_AALG_MAX; i++) {
 			struct auth_hash *aalgo;
 			u_int16_t minkeysize, maxkeysize;
 
 			aalgo = ah_algorithm_lookup(i);
 			if (!aalgo)
 				continue;
 			alg = (struct sadb_alg *)(mtod(n, caddr_t) + off);
 			alg->sadb_alg_id = i;
 			alg->sadb_alg_ivlen = 0;
 			key_getsizes_ah(aalgo, i, &minkeysize, &maxkeysize);
 			alg->sadb_alg_minbits = _BITS(minkeysize);
 			alg->sadb_alg_maxbits = _BITS(maxkeysize);
 			off += PFKEY_ALIGN8(sizeof(*alg));
 		}
 	}
 
 	/* for encryption algorithm */
 	if (elen) {
 		sup = (struct sadb_supported *)(mtod(n, caddr_t) + off);
 		sup->sadb_supported_len = PFKEY_UNIT64(elen);
 		sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT;
 		off += PFKEY_ALIGN8(sizeof(*sup));
 
 		for (i = 1; i <= SADB_EALG_MAX; i++) {
 			struct enc_xform *ealgo;
 
 			ealgo = esp_algorithm_lookup(i);
 			if (!ealgo)
 				continue;
 			alg = (struct sadb_alg *)(mtod(n, caddr_t) + off);
 			alg->sadb_alg_id = i;
-			alg->sadb_alg_ivlen = ealgo->blocksize;
+			alg->sadb_alg_ivlen = ealgo->ivsize;
 			alg->sadb_alg_minbits = _BITS(ealgo->minkey);
 			alg->sadb_alg_maxbits = _BITS(ealgo->maxkey);
 			off += PFKEY_ALIGN8(sizeof(struct sadb_alg));
 		}
 	}
 
 	IPSEC_ASSERT(off == len,
 		("length assumption failed (off %u len %u)", off, len));
 
 	m_freem(m);
 	return key_sendup_mbuf(so, n, KEY_SENDUP_REGISTERED);
     }
 }
 
 /*
  * free secreg entry registered.
  * XXX: I want to do free a socket marked done SADB_RESIGER to socket.
  */
 void
 key_freereg(struct socket *so)
 {
 	struct secreg *reg;
 	int i;
 
 	IPSEC_ASSERT(so != NULL, ("NULL so"));
 
 	/*
 	 * check whether existing or not.
 	 * check all type of SA, because there is a potential that
 	 * one socket is registered to multiple type of SA.
 	 */
 	REGTREE_LOCK();
 	for (i = 0; i <= SADB_SATYPE_MAX; i++) {
 		LIST_FOREACH(reg, &V_regtree[i], chain) {
 			if (reg->so == so && __LIST_CHAINED(reg)) {
 				LIST_REMOVE(reg, chain);
 				free(reg, M_IPSEC_SAR);
 				break;
 			}
 		}
 	}
 	REGTREE_UNLOCK();
 }
 
 /*
  * SADB_EXPIRE processing
  * send
  *   <base, SA, SA2, lifetime(C and one of HS), address(SD)>
  * to KMD by PF_KEY.
  * NOTE: We send only soft lifetime extension.
  *
  * OUT:	0	: succeed
  *	others	: error number
  */
 static int
 key_expire(struct secasvar *sav, int hard)
 {
 	int satype;
 	struct mbuf *result = NULL, *m;
 	int len;
 	int error = -1;
 	struct sadb_lifetime *lt;
 
 	IPSEC_ASSERT (sav != NULL, ("null sav"));
 	IPSEC_ASSERT (sav->sah != NULL, ("null sa header"));
 
 	/* set msg header */
 	satype = key_proto2satype(sav->sah->saidx.proto);
 	IPSEC_ASSERT(satype != 0, ("invalid proto, satype %u", satype));
 	m = key_setsadbmsg(SADB_EXPIRE, 0, satype, sav->seq, 0, sav->refcnt);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	result = m;
 
 	/* create SA extension */
 	m = key_setsadbsa(sav);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	/* create SA extension */
 	m = key_setsadbxsa2(sav->sah->saidx.mode,
 			sav->replay ? sav->replay->count : 0,
 			sav->sah->saidx.reqid);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	/* create lifetime extension (current and soft) */
 	len = PFKEY_ALIGN8(sizeof(*lt)) * 2;
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_align(m, len);
 	m->m_len = len;
 	bzero(mtod(m, caddr_t), len);
 	lt = mtod(m, struct sadb_lifetime *);
 	lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
 	lt->sadb_lifetime_allocations = sav->lft_c->allocations;
 	lt->sadb_lifetime_bytes = sav->lft_c->bytes;
 	lt->sadb_lifetime_addtime = sav->lft_c->addtime;
 	lt->sadb_lifetime_usetime = sav->lft_c->usetime;
 	lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2);
 	lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
 	if (hard) {
 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
 		lt->sadb_lifetime_allocations = sav->lft_h->allocations;
 		lt->sadb_lifetime_bytes = sav->lft_h->bytes;
 		lt->sadb_lifetime_addtime = sav->lft_h->addtime;
 		lt->sadb_lifetime_usetime = sav->lft_h->usetime;
 	} else {
 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
 		lt->sadb_lifetime_allocations = sav->lft_s->allocations;
 		lt->sadb_lifetime_bytes = sav->lft_s->bytes;
 		lt->sadb_lifetime_addtime = sav->lft_s->addtime;
 		lt->sadb_lifetime_usetime = sav->lft_s->usetime;
 	}
 	m_cat(result, m);
 
 	/* set sadb_address for source */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 	    &sav->sah->saidx.src.sa,
 	    FULLMASK, IPSEC_ULPROTO_ANY);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	/* set sadb_address for destination */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_DST,
 	    &sav->sah->saidx.dst.sa,
 	    FULLMASK, IPSEC_ULPROTO_ANY);
 	if (!m) {
 		error = ENOBUFS;
 		goto fail;
 	}
 	m_cat(result, m);
 
 	/*
 	 * XXX-BZ Handle NAT-T extensions here.
 	 */
 
 	if ((result->m_flags & M_PKTHDR) == 0) {
 		error = EINVAL;
 		goto fail;
 	}
 
 	if (result->m_len < sizeof(struct sadb_msg)) {
 		result = m_pullup(result, sizeof(struct sadb_msg));
 		if (result == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
 	result->m_pkthdr.len = 0;
 	for (m = result; m; m = m->m_next)
 		result->m_pkthdr.len += m->m_len;
 
 	mtod(result, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(result->m_pkthdr.len);
 
 	return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);
 
  fail:
 	if (result)
 		m_freem(result);
 	return error;
 }
 
 /*
  * SADB_FLUSH processing
  * receive
  *   <base>
  * from the ikmpd, and free all entries in secastree.
  * and send,
  *   <base>
  * to the ikmpd.
  * NOTE: to do is only marking SADB_SASTATE_DEAD.
  *
  * m will always be freed.
  */
 static int
 key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct sadb_msg *newmsg;
 	struct secashead *sah, *nextsah;
 	struct secasvar *sav, *nextsav;
 	u_int16_t proto;
 	u_int8_t state;
 	u_int stateidx;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/* no SATYPE specified, i.e. flushing all SA. */
 	SAHTREE_LOCK();
 	for (sah = LIST_FIRST(&V_sahtree);
 	     sah != NULL;
 	     sah = nextsah) {
 		nextsah = LIST_NEXT(sah, chain);
 
 		if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC
 		 && proto != sah->saidx.proto)
 			continue;
 
 		for (stateidx = 0;
 		     stateidx < _ARRAYLEN(saorder_state_alive);
 		     stateidx++) {
 			state = saorder_state_any[stateidx];
 			for (sav = LIST_FIRST(&sah->savtree[state]);
 			     sav != NULL;
 			     sav = nextsav) {
 
 				nextsav = LIST_NEXT(sav, chain);
 
 				key_sa_chgstate(sav, SADB_SASTATE_DEAD);
 				KEY_FREESAV(&sav);
 			}
 		}
 
 		sah->state = SADB_SASTATE_DEAD;
 	}
 	SAHTREE_UNLOCK();
 
 	if (m->m_len < sizeof(struct sadb_msg) ||
 	    sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) {
 		ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__));
 		return key_senderror(so, m, ENOBUFS);
 	}
 
 	if (m->m_next)
 		m_freem(m->m_next);
 	m->m_next = NULL;
 	m->m_pkthdr.len = m->m_len = sizeof(struct sadb_msg);
 	newmsg = mtod(m, struct sadb_msg *);
 	newmsg->sadb_msg_errno = 0;
 	newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);
 
 	return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
 }
 
 /*
  * SADB_DUMP processing
  * dump all entries including status of DEAD in SAD.
  * receive
  *   <base>
  * from the ikmpd, and dump all secasvar leaves
  * and send,
  *   <base> .....
  * to the ikmpd.
  *
  * m will always be freed.
  */
 static int
 key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	struct secashead *sah;
 	struct secasvar *sav;
 	u_int16_t proto;
 	u_int stateidx;
 	u_int8_t satype;
 	u_int8_t state;
 	int cnt;
 	struct sadb_msg *newmsg;
 	struct mbuf *n;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	/* map satype to proto */
 	if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
 		ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n",
 			__func__));
 		return key_senderror(so, m, EINVAL);
 	}
 
 	/* count sav entries to be sent to the userland. */
 	cnt = 0;
 	SAHTREE_LOCK();
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC
 		 && proto != sah->saidx.proto)
 			continue;
 
 		for (stateidx = 0;
 		     stateidx < _ARRAYLEN(saorder_state_any);
 		     stateidx++) {
 			state = saorder_state_any[stateidx];
 			LIST_FOREACH(sav, &sah->savtree[state], chain) {
 				cnt++;
 			}
 		}
 	}
 
 	if (cnt == 0) {
 		SAHTREE_UNLOCK();
 		return key_senderror(so, m, ENOENT);
 	}
 
 	/* send this to the userland, one at a time. */
 	newmsg = NULL;
 	LIST_FOREACH(sah, &V_sahtree, chain) {
 		if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC
 		 && proto != sah->saidx.proto)
 			continue;
 
 		/* map proto to satype */
 		if ((satype = key_proto2satype(sah->saidx.proto)) == 0) {
 			SAHTREE_UNLOCK();
 			ipseclog((LOG_DEBUG, "%s: there was invalid proto in "
 				"SAD.\n", __func__));
 			return key_senderror(so, m, EINVAL);
 		}
 
 		for (stateidx = 0;
 		     stateidx < _ARRAYLEN(saorder_state_any);
 		     stateidx++) {
 			state = saorder_state_any[stateidx];
 			LIST_FOREACH(sav, &sah->savtree[state], chain) {
 				n = key_setdumpsa(sav, SADB_DUMP, satype,
 				    --cnt, mhp->msg->sadb_msg_pid);
 				if (!n) {
 					SAHTREE_UNLOCK();
 					return key_senderror(so, m, ENOBUFS);
 				}
 				key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
 			}
 		}
 	}
 	SAHTREE_UNLOCK();
 
 	m_freem(m);
 	return 0;
 }
 
 /*
  * SADB_X_PROMISC processing
  *
  * m will always be freed.
  */
 static int
 key_promisc(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 {
 	int olen;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(mhp->msg != NULL, ("null msg"));
 
 	olen = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len);
 
 	if (olen < sizeof(struct sadb_msg)) {
 #if 1
 		return key_senderror(so, m, EINVAL);
 #else
 		m_freem(m);
 		return 0;
 #endif
 	} else if (olen == sizeof(struct sadb_msg)) {
 		/* enable/disable promisc mode */
 		struct keycb *kp;
 
 		if ((kp = (struct keycb *)sotorawcb(so)) == NULL)
 			return key_senderror(so, m, EINVAL);
 		mhp->msg->sadb_msg_errno = 0;
 		switch (mhp->msg->sadb_msg_satype) {
 		case 0:
 		case 1:
 			kp->kp_promisc = mhp->msg->sadb_msg_satype;
 			break;
 		default:
 			return key_senderror(so, m, EINVAL);
 		}
 
 		/* send the original message back to everyone */
 		mhp->msg->sadb_msg_errno = 0;
 		return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
 	} else {
 		/* send packet as is */
 
 		m_adj(m, PFKEY_ALIGN8(sizeof(struct sadb_msg)));
 
 		/* TODO: if sadb_msg_seq is specified, send to specific pid */
 		return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
 	}
 }
 
 static int (*key_typesw[])(struct socket *, struct mbuf *,
 		const struct sadb_msghdr *) = {
 	NULL,		/* SADB_RESERVED */
 	key_getspi,	/* SADB_GETSPI */
 	key_update,	/* SADB_UPDATE */
 	key_add,	/* SADB_ADD */
 	key_delete,	/* SADB_DELETE */
 	key_get,	/* SADB_GET */
 	key_acquire2,	/* SADB_ACQUIRE */
 	key_register,	/* SADB_REGISTER */
 	NULL,		/* SADB_EXPIRE */
 	key_flush,	/* SADB_FLUSH */
 	key_dump,	/* SADB_DUMP */
 	key_promisc,	/* SADB_X_PROMISC */
 	NULL,		/* SADB_X_PCHANGE */
 	key_spdadd,	/* SADB_X_SPDUPDATE */
 	key_spdadd,	/* SADB_X_SPDADD */
 	key_spddelete,	/* SADB_X_SPDDELETE */
 	key_spdget,	/* SADB_X_SPDGET */
 	NULL,		/* SADB_X_SPDACQUIRE */
 	key_spddump,	/* SADB_X_SPDDUMP */
 	key_spdflush,	/* SADB_X_SPDFLUSH */
 	key_spdadd,	/* SADB_X_SPDSETIDX */
 	NULL,		/* SADB_X_SPDEXPIRE */
 	key_spddelete2,	/* SADB_X_SPDDELETE2 */
 };
 
 /*
  * parse sadb_msg buffer to process PFKEYv2,
  * and create a data to response if needed.
  * I think to be dealed with mbuf directly.
  * IN:
  *     msgp  : pointer to pointer to a received buffer pulluped.
  *             This is rewrited to response.
  *     so    : pointer to socket.
  * OUT:
  *    length for buffer to send to user process.
  */
 int
 key_parse(struct mbuf *m, struct socket *so)
 {
 	struct sadb_msg *msg;
 	struct sadb_msghdr mh;
 	u_int orglen;
 	int error;
 	int target;
 
 	IPSEC_ASSERT(so != NULL, ("null socket"));
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 
 #if 0	/*kdebug_sadb assumes msg in linear buffer*/
 	KEYDEBUG(KEYDEBUG_KEY_DUMP,
 		ipseclog((LOG_DEBUG, "%s: passed sadb_msg\n", __func__));
 		kdebug_sadb(msg));
 #endif
 
 	if (m->m_len < sizeof(struct sadb_msg)) {
 		m = m_pullup(m, sizeof(struct sadb_msg));
 		if (!m)
 			return ENOBUFS;
 	}
 	msg = mtod(m, struct sadb_msg *);
 	orglen = PFKEY_UNUNIT64(msg->sadb_msg_len);
 	target = KEY_SENDUP_ONE;
 
 	if ((m->m_flags & M_PKTHDR) == 0 ||
 	    m->m_pkthdr.len != m->m_pkthdr.len) {
 		ipseclog((LOG_DEBUG, "%s: invalid message length.\n",__func__));
 		PFKEYSTAT_INC(out_invlen);
 		error = EINVAL;
 		goto senderror;
 	}
 
 	if (msg->sadb_msg_version != PF_KEY_V2) {
 		ipseclog((LOG_DEBUG, "%s: PF_KEY version %u is mismatched.\n",
 		    __func__, msg->sadb_msg_version));
 		PFKEYSTAT_INC(out_invver);
 		error = EINVAL;
 		goto senderror;
 	}
 
 	if (msg->sadb_msg_type > SADB_MAX) {
 		ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n",
 		    __func__, msg->sadb_msg_type));
 		PFKEYSTAT_INC(out_invmsgtype);
 		error = EINVAL;
 		goto senderror;
 	}
 
 	/* for old-fashioned code - should be nuked */
 	if (m->m_pkthdr.len > MCLBYTES) {
 		m_freem(m);
 		return ENOBUFS;
 	}
 	if (m->m_next) {
 		struct mbuf *n;
 
 		MGETHDR(n, M_NOWAIT, MT_DATA);
 		if (n && m->m_pkthdr.len > MHLEN) {
 			if (!(MCLGET(n, M_NOWAIT))) {
 				m_free(n);
 				n = NULL;
 			}
 		}
 		if (!n) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
 		n->m_next = NULL;
 		m_freem(m);
 		m = n;
 	}
 
 	/* align the mbuf chain so that extensions are in contiguous region. */
 	error = key_align(m, &mh);
 	if (error)
 		return error;
 
 	msg = mh.msg;
 
 	/* check SA type */
 	switch (msg->sadb_msg_satype) {
 	case SADB_SATYPE_UNSPEC:
 		switch (msg->sadb_msg_type) {
 		case SADB_GETSPI:
 		case SADB_UPDATE:
 		case SADB_ADD:
 		case SADB_DELETE:
 		case SADB_GET:
 		case SADB_ACQUIRE:
 		case SADB_EXPIRE:
 			ipseclog((LOG_DEBUG, "%s: must specify satype "
 			    "when msg type=%u.\n", __func__,
 			    msg->sadb_msg_type));
 			PFKEYSTAT_INC(out_invsatype);
 			error = EINVAL;
 			goto senderror;
 		}
 		break;
 	case SADB_SATYPE_AH:
 	case SADB_SATYPE_ESP:
 	case SADB_X_SATYPE_IPCOMP:
 	case SADB_X_SATYPE_TCPSIGNATURE:
 		switch (msg->sadb_msg_type) {
 		case SADB_X_SPDADD:
 		case SADB_X_SPDDELETE:
 		case SADB_X_SPDGET:
 		case SADB_X_SPDDUMP:
 		case SADB_X_SPDFLUSH:
 		case SADB_X_SPDSETIDX:
 		case SADB_X_SPDUPDATE:
 		case SADB_X_SPDDELETE2:
 			ipseclog((LOG_DEBUG, "%s: illegal satype=%u\n",
 				__func__, msg->sadb_msg_type));
 			PFKEYSTAT_INC(out_invsatype);
 			error = EINVAL;
 			goto senderror;
 		}
 		break;
 	case SADB_SATYPE_RSVP:
 	case SADB_SATYPE_OSPFV2:
 	case SADB_SATYPE_RIPV2:
 	case SADB_SATYPE_MIP:
 		ipseclog((LOG_DEBUG, "%s: type %u isn't supported.\n",
 			__func__, msg->sadb_msg_satype));
 		PFKEYSTAT_INC(out_invsatype);
 		error = EOPNOTSUPP;
 		goto senderror;
 	case 1:	/* XXX: What does it do? */
 		if (msg->sadb_msg_type == SADB_X_PROMISC)
 			break;
 		/*FALLTHROUGH*/
 	default:
 		ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n",
 			__func__, msg->sadb_msg_satype));
 		PFKEYSTAT_INC(out_invsatype);
 		error = EINVAL;
 		goto senderror;
 	}
 
 	/* check field of upper layer protocol and address family */
 	if (mh.ext[SADB_EXT_ADDRESS_SRC] != NULL
 	 && mh.ext[SADB_EXT_ADDRESS_DST] != NULL) {
 		struct sadb_address *src0, *dst0;
 		u_int plen;
 
 		src0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_SRC]);
 		dst0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_DST]);
 
 		/* check upper layer protocol */
 		if (src0->sadb_address_proto != dst0->sadb_address_proto) {
 			ipseclog((LOG_DEBUG, "%s: upper layer protocol "
 				"mismatched.\n", __func__));
 			PFKEYSTAT_INC(out_invaddr);
 			error = EINVAL;
 			goto senderror;
 		}
 
 		/* check family */
 		if (PFKEY_ADDR_SADDR(src0)->sa_family !=
 		    PFKEY_ADDR_SADDR(dst0)->sa_family) {
 			ipseclog((LOG_DEBUG, "%s: address family mismatched.\n",
 				__func__));
 			PFKEYSTAT_INC(out_invaddr);
 			error = EINVAL;
 			goto senderror;
 		}
 		if (PFKEY_ADDR_SADDR(src0)->sa_len !=
 		    PFKEY_ADDR_SADDR(dst0)->sa_len) {
 			ipseclog((LOG_DEBUG, "%s: address struct size "
 				"mismatched.\n", __func__));
 			PFKEYSTAT_INC(out_invaddr);
 			error = EINVAL;
 			goto senderror;
 		}
 
 		switch (PFKEY_ADDR_SADDR(src0)->sa_family) {
 		case AF_INET:
 			if (PFKEY_ADDR_SADDR(src0)->sa_len !=
 			    sizeof(struct sockaddr_in)) {
 				PFKEYSTAT_INC(out_invaddr);
 				error = EINVAL;
 				goto senderror;
 			}
 			break;
 		case AF_INET6:
 			if (PFKEY_ADDR_SADDR(src0)->sa_len !=
 			    sizeof(struct sockaddr_in6)) {
 				PFKEYSTAT_INC(out_invaddr);
 				error = EINVAL;
 				goto senderror;
 			}
 			break;
 		default:
 			ipseclog((LOG_DEBUG, "%s: unsupported address family\n",
 				__func__));
 			PFKEYSTAT_INC(out_invaddr);
 			error = EAFNOSUPPORT;
 			goto senderror;
 		}
 
 		switch (PFKEY_ADDR_SADDR(src0)->sa_family) {
 		case AF_INET:
 			plen = sizeof(struct in_addr) << 3;
 			break;
 		case AF_INET6:
 			plen = sizeof(struct in6_addr) << 3;
 			break;
 		default:
 			plen = 0;	/*fool gcc*/
 			break;
 		}
 
 		/* check max prefix length */
 		if (src0->sadb_address_prefixlen > plen ||
 		    dst0->sadb_address_prefixlen > plen) {
 			ipseclog((LOG_DEBUG, "%s: illegal prefixlen.\n",
 				__func__));
 			PFKEYSTAT_INC(out_invaddr);
 			error = EINVAL;
 			goto senderror;
 		}
 
 		/*
 		 * prefixlen == 0 is valid because there can be a case when
 		 * all addresses are matched.
 		 */
 	}
 
 	if (msg->sadb_msg_type >= sizeof(key_typesw)/sizeof(key_typesw[0]) ||
 	    key_typesw[msg->sadb_msg_type] == NULL) {
 		PFKEYSTAT_INC(out_invmsgtype);
 		error = EINVAL;
 		goto senderror;
 	}
 
 	return (*key_typesw[msg->sadb_msg_type])(so, m, &mh);
 
 senderror:
 	msg->sadb_msg_errno = error;
 	return key_sendup_mbuf(so, m, target);
 }
 
 static int
 key_senderror(struct socket *so, struct mbuf *m, int code)
 {
 	struct sadb_msg *msg;
 
 	IPSEC_ASSERT(m->m_len >= sizeof(struct sadb_msg),
 		("mbuf too small, len %u", m->m_len));
 
 	msg = mtod(m, struct sadb_msg *);
 	msg->sadb_msg_errno = code;
 	return key_sendup_mbuf(so, m, KEY_SENDUP_ONE);
 }
 
 /*
  * set the pointer to each header into message buffer.
  * m will be freed on error.
  * XXX larger-than-MCLBYTES extension?
  */
 static int
 key_align(struct mbuf *m, struct sadb_msghdr *mhp)
 {
 	struct mbuf *n;
 	struct sadb_ext *ext;
 	size_t off, end;
 	int extlen;
 	int toff;
 
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(mhp != NULL, ("null msghdr"));
 	IPSEC_ASSERT(m->m_len >= sizeof(struct sadb_msg),
 		("mbuf too small, len %u", m->m_len));
 
 	/* initialize */
 	bzero(mhp, sizeof(*mhp));
 
 	mhp->msg = mtod(m, struct sadb_msg *);
 	mhp->ext[0] = (struct sadb_ext *)mhp->msg;	/*XXX backward compat */
 
 	end = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len);
 	extlen = end;	/*just in case extlen is not updated*/
 	for (off = sizeof(struct sadb_msg); off < end; off += extlen) {
 		n = m_pulldown(m, off, sizeof(struct sadb_ext), &toff);
 		if (!n) {
 			/* m is already freed */
 			return ENOBUFS;
 		}
 		ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff);
 
 		/* set pointer */
 		switch (ext->sadb_ext_type) {
 		case SADB_EXT_SA:
 		case SADB_EXT_ADDRESS_SRC:
 		case SADB_EXT_ADDRESS_DST:
 		case SADB_EXT_ADDRESS_PROXY:
 		case SADB_EXT_LIFETIME_CURRENT:
 		case SADB_EXT_LIFETIME_HARD:
 		case SADB_EXT_LIFETIME_SOFT:
 		case SADB_EXT_KEY_AUTH:
 		case SADB_EXT_KEY_ENCRYPT:
 		case SADB_EXT_IDENTITY_SRC:
 		case SADB_EXT_IDENTITY_DST:
 		case SADB_EXT_SENSITIVITY:
 		case SADB_EXT_PROPOSAL:
 		case SADB_EXT_SUPPORTED_AUTH:
 		case SADB_EXT_SUPPORTED_ENCRYPT:
 		case SADB_EXT_SPIRANGE:
 		case SADB_X_EXT_POLICY:
 		case SADB_X_EXT_SA2:
 #ifdef IPSEC_NAT_T
 		case SADB_X_EXT_NAT_T_TYPE:
 		case SADB_X_EXT_NAT_T_SPORT:
 		case SADB_X_EXT_NAT_T_DPORT:
 		case SADB_X_EXT_NAT_T_OAI:
 		case SADB_X_EXT_NAT_T_OAR:
 		case SADB_X_EXT_NAT_T_FRAG:
 #endif
 			/* duplicate check */
 			/*
 			 * XXX Are there duplication payloads of either
 			 * KEY_AUTH or KEY_ENCRYPT ?
 			 */
 			if (mhp->ext[ext->sadb_ext_type] != NULL) {
 				ipseclog((LOG_DEBUG, "%s: duplicate ext_type "
 					"%u\n", __func__, ext->sadb_ext_type));
 				m_freem(m);
 				PFKEYSTAT_INC(out_dupext);
 				return EINVAL;
 			}
 			break;
 		default:
 			ipseclog((LOG_DEBUG, "%s: invalid ext_type %u\n",
 				__func__, ext->sadb_ext_type));
 			m_freem(m);
 			PFKEYSTAT_INC(out_invexttype);
 			return EINVAL;
 		}
 
 		extlen = PFKEY_UNUNIT64(ext->sadb_ext_len);
 
 		if (key_validate_ext(ext, extlen)) {
 			m_freem(m);
 			PFKEYSTAT_INC(out_invlen);
 			return EINVAL;
 		}
 
 		n = m_pulldown(m, off, extlen, &toff);
 		if (!n) {
 			/* m is already freed */
 			return ENOBUFS;
 		}
 		ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff);
 
 		mhp->ext[ext->sadb_ext_type] = ext;
 		mhp->extoff[ext->sadb_ext_type] = off;
 		mhp->extlen[ext->sadb_ext_type] = extlen;
 	}
 
 	if (off != end) {
 		m_freem(m);
 		PFKEYSTAT_INC(out_invlen);
 		return EINVAL;
 	}
 
 	return 0;
 }
 
 static int
 key_validate_ext(const struct sadb_ext *ext, int len)
 {
 	const struct sockaddr *sa;
 	enum { NONE, ADDR } checktype = NONE;
 	int baselen = 0;
 	const int sal = offsetof(struct sockaddr, sa_len) + sizeof(sa->sa_len);
 
 	if (len != PFKEY_UNUNIT64(ext->sadb_ext_len))
 		return EINVAL;
 
 	/* if it does not match minimum/maximum length, bail */
 	if (ext->sadb_ext_type >= sizeof(minsize) / sizeof(minsize[0]) ||
 	    ext->sadb_ext_type >= sizeof(maxsize) / sizeof(maxsize[0]))
 		return EINVAL;
 	if (!minsize[ext->sadb_ext_type] || len < minsize[ext->sadb_ext_type])
 		return EINVAL;
 	if (maxsize[ext->sadb_ext_type] && len > maxsize[ext->sadb_ext_type])
 		return EINVAL;
 
 	/* more checks based on sadb_ext_type XXX need more */
 	switch (ext->sadb_ext_type) {
 	case SADB_EXT_ADDRESS_SRC:
 	case SADB_EXT_ADDRESS_DST:
 	case SADB_EXT_ADDRESS_PROXY:
 		baselen = PFKEY_ALIGN8(sizeof(struct sadb_address));
 		checktype = ADDR;
 		break;
 	case SADB_EXT_IDENTITY_SRC:
 	case SADB_EXT_IDENTITY_DST:
 		if (((const struct sadb_ident *)ext)->sadb_ident_type ==
 		    SADB_X_IDENTTYPE_ADDR) {
 			baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident));
 			checktype = ADDR;
 		} else
 			checktype = NONE;
 		break;
 	default:
 		checktype = NONE;
 		break;
 	}
 
 	switch (checktype) {
 	case NONE:
 		break;
 	case ADDR:
 		sa = (const struct sockaddr *)(((const u_int8_t*)ext)+baselen);
 		if (len < baselen + sal)
 			return EINVAL;
 		if (baselen + PFKEY_ALIGN8(sa->sa_len) != len)
 			return EINVAL;
 		break;
 	}
 
 	return 0;
 }
 
 void
 key_init(void)
 {
 	int i;
 
 	for (i = 0; i < IPSEC_DIR_MAX; i++)
 		TAILQ_INIT(&V_sptree[i]);
 
 	LIST_INIT(&V_sahtree);
 
 	for (i = 0; i <= SADB_SATYPE_MAX; i++)
 		LIST_INIT(&V_regtree[i]);
 
 	LIST_INIT(&V_acqtree);
 	LIST_INIT(&V_spacqtree);
 
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	SPTREE_LOCK_INIT();
 	REGTREE_LOCK_INIT();
 	SAHTREE_LOCK_INIT();
 	ACQ_LOCK_INIT();
 	SPACQ_LOCK_INIT();
 
 #ifndef IPSEC_DEBUG2
 	callout_init(&key_timer, 1);
 	callout_reset(&key_timer, hz, key_timehandler, NULL);
 #endif /*IPSEC_DEBUG2*/
 
 	/* initialize key statistics */
 	keystat.getspi_count = 1;
 
 	printf("IPsec: Initialized Security Association Processing.\n");
 }
 
 #ifdef VIMAGE
 void
 key_destroy(void)
 {
 	TAILQ_HEAD(, secpolicy) drainq;
 	struct secpolicy *sp, *nextsp;
 	struct secacq *acq, *nextacq;
 	struct secspacq *spacq, *nextspacq;
 	struct secashead *sah, *nextsah;
 	struct secreg *reg;
 	int i;
 
 	TAILQ_INIT(&drainq);
 	SPTREE_WLOCK();
 	for (i = 0; i < IPSEC_DIR_MAX; i++) {
 		TAILQ_CONCAT(&drainq, &V_sptree[i], chain);
 	}
 	SPTREE_WUNLOCK();
 	sp = TAILQ_FIRST(&drainq);
 	while (sp != NULL) {
 		nextsp = TAILQ_NEXT(sp, chain);
 		KEY_FREESP(&sp);
 		sp = nextsp;
 	}
 
 	SAHTREE_LOCK();
 	for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) {
 		nextsah = LIST_NEXT(sah, chain);
 		if (__LIST_CHAINED(sah)) {
 			LIST_REMOVE(sah, chain);
 			free(sah, M_IPSEC_SAH);
 		}
 	}
 	SAHTREE_UNLOCK();
 
 	REGTREE_LOCK();
 	for (i = 0; i <= SADB_SATYPE_MAX; i++) {
 		LIST_FOREACH(reg, &V_regtree[i], chain) {
 			if (__LIST_CHAINED(reg)) {
 				LIST_REMOVE(reg, chain);
 				free(reg, M_IPSEC_SAR);
 				break;
 			}
 		}
 	}
 	REGTREE_UNLOCK();
 
 	ACQ_LOCK();
 	for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) {
 		nextacq = LIST_NEXT(acq, chain);
 		if (__LIST_CHAINED(acq)) {
 			LIST_REMOVE(acq, chain);
 			free(acq, M_IPSEC_SAQ);
 		}
 	}
 	ACQ_UNLOCK();
 
 	SPACQ_LOCK();
 	for (spacq = LIST_FIRST(&V_spacqtree); spacq != NULL;
 	    spacq = nextspacq) {
 		nextspacq = LIST_NEXT(spacq, chain);
 		if (__LIST_CHAINED(spacq)) {
 			LIST_REMOVE(spacq, chain);
 			free(spacq, M_IPSEC_SAQ);
 		}
 	}
 	SPACQ_UNLOCK();
 }
 #endif
 
 /*
  * XXX: maybe This function is called after INBOUND IPsec processing.
  *
  * Special check for tunnel-mode packets.
  * We must make some checks for consistency between inner and outer IP header.
  *
  * xxx more checks to be provided
  */
 int
 key_checktunnelsanity(struct secasvar *sav, u_int family, caddr_t src,
     caddr_t dst)
 {
 	IPSEC_ASSERT(sav->sah != NULL, ("null SA header"));
 
 	/* XXX: check inner IP header */
 
 	return 1;
 }
 
 /* record data transfer on SA, and update timestamps */
 void
 key_sa_recordxfer(struct secasvar *sav, struct mbuf *m)
 {
 	IPSEC_ASSERT(sav != NULL, ("Null secasvar"));
 	IPSEC_ASSERT(m != NULL, ("Null mbuf"));
 	if (!sav->lft_c)
 		return;
 
 	/*
 	 * XXX Currently, there is a difference of bytes size
 	 * between inbound and outbound processing.
 	 */
 	sav->lft_c->bytes += m->m_pkthdr.len;
 	/* to check bytes lifetime is done in key_timehandler(). */
 
 	/*
 	 * We use the number of packets as the unit of
 	 * allocations.  We increment the variable
 	 * whenever {esp,ah}_{in,out}put is called.
 	 */
 	sav->lft_c->allocations++;
 	/* XXX check for expires? */
 
 	/*
 	 * NOTE: We record CURRENT usetime by using wall clock,
 	 * in seconds.  HARD and SOFT lifetime are measured by the time
 	 * difference (again in seconds) from usetime.
 	 *
 	 *	usetime
 	 *	v     expire   expire
 	 * -----+-----+--------+---> t
 	 *	<--------------> HARD
 	 *	<-----> SOFT
 	 */
 	sav->lft_c->usetime = time_second;
 	/* XXX check for expires? */
 
 	return;
 }
 
 static void
 key_sa_chgstate(struct secasvar *sav, u_int8_t state)
 {
 	IPSEC_ASSERT(sav != NULL, ("NULL sav"));
 	SAHTREE_LOCK_ASSERT();
 
 	if (sav->state != state) {
 		if (__LIST_CHAINED(sav))
 			LIST_REMOVE(sav, chain);
 		sav->state = state;
 		LIST_INSERT_HEAD(&sav->sah->savtree[state], sav, chain);
 	}
 }
 
 /*
  * Take one of the kernel's security keys and convert it into a PF_KEY
  * structure within an mbuf, suitable for sending up to a waiting
  * application in user land.
  * 
  * IN: 
  *    src: A pointer to a kernel security key.
  *    exttype: Which type of key this is. Refer to the PF_KEY data structures.
  * OUT:
  *    a valid mbuf or NULL indicating an error
  *
  */
 
 static struct mbuf *
 key_setkey(struct seckey *src, u_int16_t exttype) 
 {
 	struct mbuf *m;
 	struct sadb_key *p;
 	int len;
 
 	if (src == NULL)
 		return NULL;
 
 	len = PFKEY_ALIGN8(sizeof(struct sadb_key) + _KEYLEN(src));
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return NULL;
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_key *);
 	bzero(p, len);
 	p->sadb_key_len = PFKEY_UNIT64(len);
 	p->sadb_key_exttype = exttype;
 	p->sadb_key_bits = src->bits;
 	bcopy(src->key_data, _KEYBUF(p), _KEYLEN(src));
 
 	return m;
 }
 
 /*
  * Take one of the kernel's lifetime data structures and convert it
  * into a PF_KEY structure within an mbuf, suitable for sending up to
  * a waiting application in user land.
  * 
  * IN: 
  *    src: A pointer to a kernel lifetime structure.
  *    exttype: Which type of lifetime this is. Refer to the PF_KEY 
  *             data structures for more information.
  * OUT:
  *    a valid mbuf or NULL indicating an error
  *
  */
 
 static struct mbuf *
 key_setlifetime(struct seclifetime *src, u_int16_t exttype)
 {
 	struct mbuf *m = NULL;
 	struct sadb_lifetime *p;
 	int len = PFKEY_ALIGN8(sizeof(struct sadb_lifetime));
 
 	if (src == NULL)
 		return NULL;
 
 	m = m_get2(len, M_NOWAIT, MT_DATA, 0);
 	if (m == NULL)
 		return m;
 	m_align(m, len);
 	m->m_len = len;
 	p = mtod(m, struct sadb_lifetime *);
 
 	bzero(p, len);
 	p->sadb_lifetime_len = PFKEY_UNIT64(len);
 	p->sadb_lifetime_exttype = exttype;
 	p->sadb_lifetime_allocations = src->allocations;
 	p->sadb_lifetime_bytes = src->bytes;
 	p->sadb_lifetime_addtime = src->addtime;
 	p->sadb_lifetime_usetime = src->usetime;
 	
 	return m;
 
 }
Index: projects/powernv/netipsec/xform_esp.c
===================================================================
--- projects/powernv/netipsec/xform_esp.c	(revision 290990)
+++ projects/powernv/netipsec/xform_esp.c	(revision 290991)
@@ -1,1026 +1,1020 @@
 /*	$FreeBSD$	*/
 /*	$OpenBSD: ip_esp.c,v 1.69 2001/06/26 06:18:59 angelos Exp $ */
 /*-
  * The authors of this code are John Ioannidis (ji@tla.org),
  * Angelos D. Keromytis (kermit@csd.uch.gr) and
  * Niels Provos (provos@physnet.uni-hamburg.de).
  *
  * The original version of this code was written by John Ioannidis
  * for BSD/OS in Athens, Greece, in November 1995.
  *
  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
  * by Angelos D. Keromytis.
  *
  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
  * and Niels Provos.
  *
  * Additional features in 1999 by Angelos D. Keromytis.
  *
  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
  * Angelos D. Keromytis and Niels Provos.
  * Copyright (c) 2001 Angelos D. Keromytis.
  *
  * Permission to use, copy, and modify this software with or without fee
  * is hereby granted, provided that this entire notice is included in
  * all copies of any software which is or includes a copy or
  * modification of this software.
  * You may use this code under the GNU public license if you so wish. Please
  * contribute changes back to the authors under this freer than GPL license
  * so that we may further the use of strong encryption without limitations to
  * all.
  *
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
  * PURPOSE.
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/random.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <machine/atomic.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_ecn.h>
 #include <netinet/ip6.h>
 
 #include <netipsec/ipsec.h>
 #include <netipsec/ah.h>
 #include <netipsec/ah_var.h>
 #include <netipsec/esp.h>
 #include <netipsec/esp_var.h>
 #include <netipsec/xform.h>
 
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #include <netipsec/ipsec6.h>
 #include <netinet6/ip6_ecn.h>
 #endif
 
 #include <netipsec/key.h>
 #include <netipsec/key_debug.h>
 
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>
 
 VNET_DEFINE(int, esp_enable) = 1;
 VNET_PCPUSTAT_DEFINE(struct espstat, espstat);
 VNET_PCPUSTAT_SYSINIT(espstat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(espstat);
 #endif /* VIMAGE */
 
 SYSCTL_DECL(_net_inet_esp);
 SYSCTL_INT(_net_inet_esp, OID_AUTO, esp_enable,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(esp_enable), 0, "");
 SYSCTL_VNET_PCPUSTAT(_net_inet_esp, IPSECCTL_STATS, stats,
     struct espstat, espstat,
     "ESP statistics (struct espstat, netipsec/esp_var.h");
 
 static int esp_input_cb(struct cryptop *op);
 static int esp_output_cb(struct cryptop *crp);
 
 /*
  * NB: this is public for use by the PF_KEY support.
  * NB: if you add support here; be sure to add code to esp_attach below!
  */
 struct enc_xform *
 esp_algorithm_lookup(int alg)
 {
 	if (alg >= ESP_ALG_MAX)
 		return NULL;
 	switch (alg) {
 	case SADB_EALG_DESCBC:
 		return &enc_xform_des;
 	case SADB_EALG_3DESCBC:
 		return &enc_xform_3des;
 	case SADB_X_EALG_AES:
 		return &enc_xform_rijndael128;
 	case SADB_X_EALG_BLOWFISHCBC:
 		return &enc_xform_blf;
 	case SADB_X_EALG_CAST128CBC:
 		return &enc_xform_cast5;
 	case SADB_EALG_NULL:
 		return &enc_xform_null;
 	case SADB_X_EALG_CAMELLIACBC:
 		return &enc_xform_camellia;
 	case SADB_X_EALG_AESCTR:
 		return &enc_xform_aes_icm;
 	case SADB_X_EALG_AESGCM16:
 		return &enc_xform_aes_nist_gcm;
 	case SADB_X_EALG_AESGMAC:
 		return &enc_xform_aes_nist_gmac;
 	}
 	return NULL;
 }
 
 size_t
 esp_hdrsiz(struct secasvar *sav)
 {
 	size_t size;
 
 	if (sav != NULL) {
 		/*XXX not right for null algorithm--does it matter??*/
 		IPSEC_ASSERT(sav->tdb_encalgxform != NULL,
 			("SA with null xform"));
 		if (sav->flags & SADB_X_EXT_OLD)
 			size = sizeof (struct esp);
 		else
 			size = sizeof (struct newesp);
 		size += sav->tdb_encalgxform->blocksize + 9;
 		/*XXX need alg check???*/
 		if (sav->tdb_authalgxform != NULL && sav->replay)
 			size += ah_hdrsiz(sav);
 	} else {
 		/*
 		 *   base header size
 		 * + max iv length for CBC mode
 		 * + max pad length
 		 * + sizeof (pad length field)
 		 * + sizeof (next header field)
 		 * + max icv supported.
 		 */
 		size = sizeof (struct newesp) + EALG_MAX_BLOCK_LEN + 9 + 16;
 	}
 	return size;
 }
 
 /*
  * esp_init() is called when an SPI is being set up.
  */
 static int
 esp_init(struct secasvar *sav, struct xformsw *xsp)
 {
 	struct enc_xform *txform;
 	struct cryptoini cria, crie;
 	int keylen;
 	int error;
 
 	txform = esp_algorithm_lookup(sav->alg_enc);
 	if (txform == NULL) {
 		DPRINTF(("%s: unsupported encryption algorithm %d\n",
 			__func__, sav->alg_enc));
 		return EINVAL;
 	}
 	if (sav->key_enc == NULL) {
 		DPRINTF(("%s: no encoding key for %s algorithm\n",
 			 __func__, txform->name));
 		return EINVAL;
 	}
 	if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_IV4B)) ==
 	    SADB_X_EXT_IV4B) {
 		DPRINTF(("%s: 4-byte IV not supported with protocol\n",
 			__func__));
 		return EINVAL;
 	}
 	/* subtract off the salt, RFC4106, 8.1 and RFC3686, 5.1 */
 	keylen = _KEYLEN(sav->key_enc) - SAV_ISCTRORGCM(sav) * 4;
 	if (txform->minkey > keylen || keylen > txform->maxkey) {
 		DPRINTF(("%s: invalid key length %u, must be in the range "
 			"[%u..%u] for algorithm %s\n", __func__,
 			keylen, txform->minkey, txform->maxkey,
 			txform->name));
 		return EINVAL;
 	}
 
-	/*
-	 * NB: The null xform needs a non-zero blocksize to keep the
-	 *      crypto code happy but if we use it to set ivlen then
-	 *      the ESP header will be processed incorrectly.  The
-	 *      compromise is to force it to zero here.
-	 */
 	if (SAV_ISCTRORGCM(sav))
 		sav->ivlen = 8;	/* RFC4106 3.1 and RFC3686 3.1 */
 	else
-		sav->ivlen = (txform == &enc_xform_null ? 0 : txform->ivsize);
+		sav->ivlen = txform->ivsize;
 
 	/*
 	 * Setup AH-related state.
 	 */
 	if (sav->alg_auth != 0) {
 		error = ah_init0(sav, xsp, &cria);
 		if (error)
 			return error;
 	}
 
 	/* NB: override anything set in ah_init0 */
 	sav->tdb_xform = xsp;
 	sav->tdb_encalgxform = txform;
 
 	/*
 	 * Whenever AES-GCM is used for encryption, one
 	 * of the AES authentication algorithms is chosen
 	 * as well, based on the key size.
 	 */
 	if (sav->alg_enc == SADB_X_EALG_AESGCM16) {
 		switch (keylen) {
 		case AES_128_GMAC_KEY_LEN:
 			sav->alg_auth = SADB_X_AALG_AES128GMAC;
 			sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_128;
 			break;
 		case AES_192_GMAC_KEY_LEN:
 			sav->alg_auth = SADB_X_AALG_AES192GMAC;
 			sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_192;
 			break;
 		case AES_256_GMAC_KEY_LEN:
 			sav->alg_auth = SADB_X_AALG_AES256GMAC;
 			sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_256;
 			break;
 		default:
 			DPRINTF(("%s: invalid key length %u"
 				 "for algorithm %s\n", __func__,
 				 keylen, txform->name));
 			return EINVAL;
 		}
 		bzero(&cria, sizeof(cria));
 		cria.cri_alg = sav->tdb_authalgxform->type;
 		cria.cri_key = sav->key_enc->key_data;
 		cria.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISGCM(sav) * 32;
 	}
 
 	/* Initialize crypto session. */
 	bzero(&crie, sizeof(crie));
 	crie.cri_alg = sav->tdb_encalgxform->type;
 	crie.cri_key = sav->key_enc->key_data;
 	crie.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISCTRORGCM(sav) * 32;
 
 	if (sav->tdb_authalgxform && sav->tdb_encalgxform) {
 		/* init both auth & enc */
 		crie.cri_next = &cria;
 		error = crypto_newsession(&sav->tdb_cryptoid,
 					  &crie, V_crypto_support);
 	} else if (sav->tdb_encalgxform) {
 		error = crypto_newsession(&sav->tdb_cryptoid,
 					  &crie, V_crypto_support);
 	} else if (sav->tdb_authalgxform) {
 		error = crypto_newsession(&sav->tdb_cryptoid,
 					  &cria, V_crypto_support);
 	} else {
 		/* XXX cannot happen? */
 		DPRINTF(("%s: no encoding OR authentication xform!\n",
 			__func__));
 		error = EINVAL;
 	}
 	return error;
 }
 
 /*
  * Paranoia.
  */
 static int
 esp_zeroize(struct secasvar *sav)
 {
 	/* NB: ah_zerorize free's the crypto session state */
 	int error = ah_zeroize(sav);
 
 	if (sav->key_enc)
 		bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc));
 	sav->tdb_encalgxform = NULL;
 	sav->tdb_xform = NULL;
 	return error;
 }
 
 /*
  * ESP input processing, called (eventually) through the protocol switch.
  */
 static int
 esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
 {
 	char buf[128];
 	struct auth_hash *esph;
 	struct enc_xform *espx;
 	struct tdb_crypto *tc;
 	uint8_t *ivp;
 	int plen, alen, hlen;
 	struct newesp *esp;
 	struct cryptodesc *crde;
 	struct cryptop *crp;
 
 	IPSEC_ASSERT(sav != NULL, ("null SA"));
 	IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("null encoding xform"));
 
 	/* Valid IP Packet length ? */
 	if ( (skip&3) || (m->m_pkthdr.len&3) ){
 		DPRINTF(("%s: misaligned packet, skip %u pkt len %u",
 				__func__, skip, m->m_pkthdr.len));
 		ESPSTAT_INC(esps_badilen);
 		m_freem(m);
 		return EINVAL;
 	}
 	/* XXX don't pullup, just copy header */
 	IP6_EXTHDR_GET(esp, struct newesp *, m, skip, sizeof (struct newesp));
 
 	esph = sav->tdb_authalgxform;
 	espx = sav->tdb_encalgxform;
 
 	/* Determine the ESP header and auth length */
 	if (sav->flags & SADB_X_EXT_OLD)
 		hlen = sizeof (struct esp) + sav->ivlen;
 	else
 		hlen = sizeof (struct newesp) + sav->ivlen;
 
 	alen = xform_ah_authsize(esph);
 
 	/*
 	 * Verify payload length is multiple of encryption algorithm
 	 * block size.
 	 *
 	 * NB: This works for the null algorithm because the blocksize
 	 *     is 4 and all packets must be 4-byte aligned regardless
 	 *     of the algorithm.
 	 */
 	plen = m->m_pkthdr.len - (skip + hlen + alen);
 	if ((plen & (espx->blocksize - 1)) || (plen <= 0)) {
 		DPRINTF(("%s: payload of %d octets not a multiple of %d octets,"
 		    "  SA %s/%08lx\n", __func__, plen, espx->blocksize,
 		    ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
 		    (u_long)ntohl(sav->spi)));
 		ESPSTAT_INC(esps_badilen);
 		m_freem(m);
 		return EINVAL;
 	}
 
 	/*
 	 * Check sequence number.
 	 */
 	if (esph != NULL && sav->replay != NULL &&
 	    !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) {
 		DPRINTF(("%s: packet replay check for %s\n", __func__,
 		    ipsec_logsastr(sav, buf, sizeof(buf))));	/*XXX*/
 		ESPSTAT_INC(esps_replay);
 		m_freem(m);
 		return ENOBUFS;		/*XXX*/
 	}
 
 	/* Update the counters */
 	ESPSTAT_ADD(esps_ibytes, m->m_pkthdr.len - (skip + hlen + alen));
 
 	/* Get crypto descriptors */
 	crp = crypto_getreq(esph && espx ? 2 : 1);
 	if (crp == NULL) {
 		DPRINTF(("%s: failed to acquire crypto descriptors\n",
 			__func__));
 		ESPSTAT_INC(esps_crypto);
 		m_freem(m);
 		return ENOBUFS;
 	}
 
 	/* Get IPsec-specific opaque pointer */
 	tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto) + alen,
 	    M_XDATA, M_NOWAIT | M_ZERO);
 	if (tc == NULL) {
 		crypto_freereq(crp);
 		DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__));
 		ESPSTAT_INC(esps_crypto);
 		m_freem(m);
 		return ENOBUFS;
 	}
 
 	if (esph != NULL) {
 		struct cryptodesc *crda = crp->crp_desc;
 
 		IPSEC_ASSERT(crda != NULL, ("null ah crypto descriptor"));
 
 		/* Authentication descriptor */
 		crda->crd_skip = skip;
 		if (SAV_ISGCM(sav))
 			crda->crd_len = 8;	/* RFC4106 5, SPI + SN */
 		else
 			crda->crd_len = m->m_pkthdr.len - (skip + alen);
 		crda->crd_inject = m->m_pkthdr.len - alen;
 
 		crda->crd_alg = esph->type;
 
 		/* Copy the authenticator */
 		m_copydata(m, m->m_pkthdr.len - alen, alen,
 		    (caddr_t) (tc + 1));
 
 		/* Chain authentication request */
 		crde = crda->crd_next;
 	} else {
 		crde = crp->crp_desc;
 	}
 
 	/* Crypto operation descriptor */
 	crp->crp_ilen = m->m_pkthdr.len; /* Total input length */
 	crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC;
 	crp->crp_buf = (caddr_t) m;
 	crp->crp_callback = esp_input_cb;
 	crp->crp_sid = sav->tdb_cryptoid;
 	crp->crp_opaque = (caddr_t) tc;
 
 	/* These are passed as-is to the callback */
 	tc->tc_spi = sav->spi;
 	tc->tc_dst = sav->sah->saidx.dst;
 	tc->tc_proto = sav->sah->saidx.proto;
 	tc->tc_protoff = protoff;
 	tc->tc_skip = skip;
 	KEY_ADDREFSA(sav);
 	tc->tc_sav = sav;
 
 	/* Decryption descriptor */
 	IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor"));
 	crde->crd_skip = skip + hlen;
 	crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen);
 	crde->crd_inject = skip + hlen - sav->ivlen;
 
 	if (SAV_ISCTRORGCM(sav)) {
 		ivp = &crde->crd_iv[0];
 
 		/* GCM IV Format: RFC4106 4 */
 		/* CTR IV Format: RFC3686 4 */
 		/* Salt is last four bytes of key, RFC4106 8.1 */
 		/* Nonce is last four bytes of key, RFC3686 5.1 */
 		memcpy(ivp, sav->key_enc->key_data +
 		    _KEYLEN(sav->key_enc) - 4, 4);
 
 		if (SAV_ISCTR(sav)) {
 			/* Initial block counter is 1, RFC3686 4 */
 			be32enc(&ivp[sav->ivlen + 4], 1);
 		}
 
 		m_copydata(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]);
 		crde->crd_flags |= CRD_F_IV_EXPLICIT;
 	}
 
 	crde->crd_alg = espx->type;
 
 	return (crypto_dispatch(crp));
 }
 
 /*
  * ESP input callback from the crypto driver.
  */
 static int
 esp_input_cb(struct cryptop *crp)
 {
 	char buf[128];
 	u_int8_t lastthree[3], aalg[AH_HMAC_MAXHASHLEN];
 	int hlen, skip, protoff, error, alen;
 	struct mbuf *m;
 	struct cryptodesc *crd;
 	struct auth_hash *esph;
 	struct enc_xform *espx;
 	struct tdb_crypto *tc;
 	struct secasvar *sav;
 	struct secasindex *saidx;
 	caddr_t ptr;
 
 	crd = crp->crp_desc;
 	IPSEC_ASSERT(crd != NULL, ("null crypto descriptor!"));
 
 	tc = (struct tdb_crypto *) crp->crp_opaque;
 	IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!"));
 	skip = tc->tc_skip;
 	protoff = tc->tc_protoff;
 	m = (struct mbuf *) crp->crp_buf;
 
 	sav = tc->tc_sav;
 	IPSEC_ASSERT(sav != NULL, ("null SA!"));
 
 	saidx = &sav->sah->saidx;
 	IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET ||
 		saidx->dst.sa.sa_family == AF_INET6,
 		("unexpected protocol family %u", saidx->dst.sa.sa_family));
 
 	esph = sav->tdb_authalgxform;
 	espx = sav->tdb_encalgxform;
 
 	/* Check for crypto errors */
 	if (crp->crp_etype) {
 		/* Reset the session ID */
 		if (sav->tdb_cryptoid != 0)
 			sav->tdb_cryptoid = crp->crp_sid;
 
 		if (crp->crp_etype == EAGAIN)
 			return (crypto_dispatch(crp));
 
 		ESPSTAT_INC(esps_noxform);
 		DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
 		error = crp->crp_etype;
 		goto bad;
 	}
 
 	/* Shouldn't happen... */
 	if (m == NULL) {
 		ESPSTAT_INC(esps_crypto);
 		DPRINTF(("%s: bogus returned buffer from crypto\n", __func__));
 		error = EINVAL;
 		goto bad;
 	}
 	ESPSTAT_INC(esps_hist[sav->alg_enc]);
 
 	/* If authentication was performed, check now. */
 	if (esph != NULL) {
 		alen = xform_ah_authsize(esph);
 		AHSTAT_INC(ahs_hist[sav->alg_auth]);
 		/* Copy the authenticator from the packet */
 		m_copydata(m, m->m_pkthdr.len - alen, alen, aalg);
 		ptr = (caddr_t) (tc + 1);
 
 		/* Verify authenticator */
 		if (timingsafe_bcmp(ptr, aalg, alen) != 0) {
 			DPRINTF(("%s: authentication hash mismatch for "
 			    "packet in SA %s/%08lx\n", __func__,
 			    ipsec_address(&saidx->dst, buf, sizeof(buf)),
 			    (u_long) ntohl(sav->spi)));
 			ESPSTAT_INC(esps_badauth);
 			error = EACCES;
 			goto bad;
 		}
 
 		/* Remove trailing authenticator */
 		m_adj(m, -alen);
 	}
 
 	/* Release the crypto descriptors */
 	free(tc, M_XDATA), tc = NULL;
 	crypto_freereq(crp), crp = NULL;
 
 	/*
 	 * Packet is now decrypted.
 	 */
 	m->m_flags |= M_DECRYPTED;
 
 	/*
 	 * Update replay sequence number, if appropriate.
 	 */
 	if (sav->replay) {
 		u_int32_t seq;
 
 		m_copydata(m, skip + offsetof(struct newesp, esp_seq),
 			   sizeof (seq), (caddr_t) &seq);
 		if (ipsec_updatereplay(ntohl(seq), sav)) {
 			DPRINTF(("%s: packet replay check for %s\n", __func__,
 			    ipsec_logsastr(sav, buf, sizeof(buf))));
 			ESPSTAT_INC(esps_replay);
 			error = ENOBUFS;
 			goto bad;
 		}
 	}
 
 	/* Determine the ESP header length */
 	if (sav->flags & SADB_X_EXT_OLD)
 		hlen = sizeof (struct esp) + sav->ivlen;
 	else
 		hlen = sizeof (struct newesp) + sav->ivlen;
 
 	/* Remove the ESP header and IV from the mbuf. */
 	error = m_striphdr(m, skip, hlen);
 	if (error) {
 		ESPSTAT_INC(esps_hdrops);
 		DPRINTF(("%s: bad mbuf chain, SA %s/%08lx\n", __func__,
 		    ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi)));
 		goto bad;
 	}
 
 	/* Save the last three bytes of decrypted data */
 	m_copydata(m, m->m_pkthdr.len - 3, 3, lastthree);
 
 	/* Verify pad length */
 	if (lastthree[1] + 2 > m->m_pkthdr.len - skip) {
 		ESPSTAT_INC(esps_badilen);
 		DPRINTF(("%s: invalid padding length %d for %u byte packet "
 		    "in SA %s/%08lx\n", __func__, lastthree[1],
 		    m->m_pkthdr.len - skip,
 		    ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi)));
 		error = EINVAL;
 		goto bad;
 	}
 
 	/* Verify correct decryption by checking the last padding bytes */
 	if ((sav->flags & SADB_X_EXT_PMASK) != SADB_X_EXT_PRAND) {
 		if (lastthree[1] != lastthree[0] && lastthree[1] != 0) {
 			ESPSTAT_INC(esps_badenc);
 			DPRINTF(("%s: decryption failed for packet in "
 			    "SA %s/%08lx\n", __func__, ipsec_address(
 			    &sav->sah->saidx.dst, buf, sizeof(buf)),
 			    (u_long) ntohl(sav->spi)));
 			error = EINVAL;
 			goto bad;
 		}
 	}
 
 	/* Trim the mbuf chain to remove trailing authenticator and padding */
 	m_adj(m, -(lastthree[1] + 2));
 
 	/* Restore the Next Protocol field */
 	m_copyback(m, protoff, sizeof (u_int8_t), lastthree + 2);
 
 	switch (saidx->dst.sa.sa_family) {
 #ifdef INET6
 	case AF_INET6:
 		error = ipsec6_common_input_cb(m, sav, skip, protoff);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		error = ipsec4_common_input_cb(m, sav, skip, protoff);
 		break;
 #endif
 	default:
 		panic("%s: Unexpected address family: %d saidx=%p", __func__,
 		    saidx->dst.sa.sa_family, saidx);
 	}
 
 	KEY_FREESAV(&sav);
 	return error;
 bad:
 	if (sav)
 		KEY_FREESAV(&sav);
 	if (m != NULL)
 		m_freem(m);
 	if (tc != NULL)
 		free(tc, M_XDATA);
 	if (crp != NULL)
 		crypto_freereq(crp);
 	return error;
 }
 
 /*
  * ESP output routine, called by ipsec[46]_process_packet().
  */
 static int
 esp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp,
     int skip, int protoff)
 {
 	char buf[INET6_ADDRSTRLEN];
 	struct enc_xform *espx;
 	struct auth_hash *esph;
 	uint8_t *ivp;
 	uint64_t cntr;
 	int hlen, rlen, padding, blks, alen, i, roff;
 	struct mbuf *mo = (struct mbuf *) NULL;
 	struct tdb_crypto *tc;
 	struct secasvar *sav;
 	struct secasindex *saidx;
 	unsigned char *pad;
 	u_int8_t prot;
 	int error, maxpacketsize;
 
 	struct cryptodesc *crde = NULL, *crda = NULL;
 	struct cryptop *crp;
 
 	sav = isr->sav;
 	IPSEC_ASSERT(sav != NULL, ("null SA"));
 	esph = sav->tdb_authalgxform;
 	espx = sav->tdb_encalgxform;
 	IPSEC_ASSERT(espx != NULL, ("null encoding xform"));
 
 	if (sav->flags & SADB_X_EXT_OLD)
 		hlen = sizeof (struct esp) + sav->ivlen;
 	else
 		hlen = sizeof (struct newesp) + sav->ivlen;
 
 	rlen = m->m_pkthdr.len - skip;	/* Raw payload length. */
 	/*
 	 * RFC4303 2.4 Requires 4 byte alignment.
 	 */
 	blks = MAX(4, espx->blocksize);		/* Cipher blocksize */
 
 	/* XXX clamp padding length a la KAME??? */
 	padding = ((blks - ((rlen + 2) % blks)) % blks) + 2;
 
 	alen = xform_ah_authsize(esph);
 
 	ESPSTAT_INC(esps_output);
 
 	saidx = &sav->sah->saidx;
 	/* Check for maximum packet size violations. */
 	switch (saidx->dst.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		maxpacketsize = IP_MAXPACKET;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		maxpacketsize = IPV6_MAXPACKET;
 		break;
 #endif /* INET6 */
 	default:
 		DPRINTF(("%s: unknown/unsupported protocol "
 		    "family %d, SA %s/%08lx\n", __func__,
 		    saidx->dst.sa.sa_family, ipsec_address(&saidx->dst,
 			buf, sizeof(buf)), (u_long) ntohl(sav->spi)));
 		ESPSTAT_INC(esps_nopf);
 		error = EPFNOSUPPORT;
 		goto bad;
 	}
 	DPRINTF(("%s: skip %d hlen %d rlen %d padding %d alen %d blksd %d\n",
 		__func__, skip, hlen, rlen, padding, alen, blks));
 	if (skip + hlen + rlen + padding + alen > maxpacketsize) {
 		DPRINTF(("%s: packet in SA %s/%08lx got too big "
 		    "(len %u, max len %u)\n", __func__,
 		    ipsec_address(&saidx->dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi),
 		    skip + hlen + rlen + padding + alen, maxpacketsize));
 		ESPSTAT_INC(esps_toobig);
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/* Update the counters. */
 	ESPSTAT_ADD(esps_obytes, m->m_pkthdr.len - skip);
 
 	m = m_unshare(m, M_NOWAIT);
 	if (m == NULL) {
 		DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
 		    ipsec_address(&saidx->dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi)));
 		ESPSTAT_INC(esps_hdrops);
 		error = ENOBUFS;
 		goto bad;
 	}
 
 	/* Inject ESP header. */
 	mo = m_makespace(m, skip, hlen, &roff);
 	if (mo == NULL) {
 		DPRINTF(("%s: %u byte ESP hdr inject failed for SA %s/%08lx\n",
 		    __func__, hlen, ipsec_address(&saidx->dst, buf,
 		    sizeof(buf)), (u_long) ntohl(sav->spi)));
 		ESPSTAT_INC(esps_hdrops);		/* XXX diffs from openbsd */
 		error = ENOBUFS;
 		goto bad;
 	}
 
 	/* Initialize ESP header. */
 	bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, sizeof(u_int32_t));
 	if (sav->replay) {
 		u_int32_t replay;
 
 #ifdef REGRESSION
 		/* Emulate replay attack when ipsec_replay is TRUE. */
 		if (!V_ipsec_replay)
 #endif
 			sav->replay->count++;
 		replay = htonl(sav->replay->count);
 		bcopy((caddr_t) &replay,
 		    mtod(mo, caddr_t) + roff + sizeof(u_int32_t),
 		    sizeof(u_int32_t));
 	}
 
 	/*
 	 * Add padding -- better to do it ourselves than use the crypto engine,
 	 * although if/when we support compression, we'd have to do that.
 	 */
 	pad = (u_char *) m_pad(m, padding + alen);
 	if (pad == NULL) {
 		DPRINTF(("%s: m_pad failed for SA %s/%08lx\n", __func__,
 		    ipsec_address(&saidx->dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi)));
 		m = NULL;		/* NB: free'd by m_pad */
 		error = ENOBUFS;
 		goto bad;
 	}
 
 	/*
 	 * Add padding: random, zero, or self-describing.
 	 * XXX catch unexpected setting
 	 */
 	switch (sav->flags & SADB_X_EXT_PMASK) {
 	case SADB_X_EXT_PRAND:
 		(void) read_random(pad, padding - 2);
 		break;
 	case SADB_X_EXT_PZERO:
 		bzero(pad, padding - 2);
 		break;
 	case SADB_X_EXT_PSEQ:
 		for (i = 0; i < padding - 2; i++)
 			pad[i] = i+1;
 		break;
 	}
 
 	/* Fix padding length and Next Protocol in padding itself. */
 	pad[padding - 2] = padding - 2;
 	m_copydata(m, protoff, sizeof(u_int8_t), pad + padding - 1);
 
 	/* Fix Next Protocol in IPv4/IPv6 header. */
 	prot = IPPROTO_ESP;
 	m_copyback(m, protoff, sizeof(u_int8_t), (u_char *) &prot);
 
 	/* Get crypto descriptors. */
 	crp = crypto_getreq(esph != NULL ? 2 : 1);
 	if (crp == NULL) {
 		DPRINTF(("%s: failed to acquire crypto descriptors\n",
 			__func__));
 		ESPSTAT_INC(esps_crypto);
 		error = ENOBUFS;
 		goto bad;
 	}
 
 	/* IPsec-specific opaque crypto info. */
 	tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto),
 	    M_XDATA, M_NOWAIT|M_ZERO);
 	if (tc == NULL) {
 		crypto_freereq(crp);
 		DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__));
 		ESPSTAT_INC(esps_crypto);
 		error = ENOBUFS;
 		goto bad;
 	}
 
 	crde = crp->crp_desc;
 	crda = crde->crd_next;
 
 	/* Encryption descriptor. */
 	crde->crd_skip = skip + hlen;
 	crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen);
 	crde->crd_flags = CRD_F_ENCRYPT;
 	crde->crd_inject = skip + hlen - sav->ivlen;
 
 	/* Encryption operation. */
 	crde->crd_alg = espx->type;
 	if (SAV_ISCTRORGCM(sav)) {
 		ivp = &crde->crd_iv[0];
 
 		/* GCM IV Format: RFC4106 4 */
 		/* CTR IV Format: RFC3686 4 */
 		/* Salt is last four bytes of key, RFC4106 8.1 */
 		/* Nonce is last four bytes of key, RFC3686 5.1 */
 		memcpy(ivp, sav->key_enc->key_data +
 		    _KEYLEN(sav->key_enc) - 4, 4);
 		SECASVAR_LOCK(sav);
 		cntr = sav->cntr++;
 		SECASVAR_UNLOCK(sav);
 		be64enc(&ivp[4], cntr);
 
 		if (SAV_ISCTR(sav)) {
 			/* Initial block counter is 1, RFC3686 4 */
 			be32enc(&ivp[sav->ivlen + 4], 1);
 		}
 
 		m_copyback(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]);
 		crde->crd_flags |= CRD_F_IV_EXPLICIT|CRD_F_IV_PRESENT;
 	}
 
 	/* Callback parameters */
 	key_addref(isr->sp);
 	tc->tc_isr = isr;
 	KEY_ADDREFSA(sav);
 	tc->tc_sav = sav;
 	tc->tc_spi = sav->spi;
 	tc->tc_dst = saidx->dst;
 	tc->tc_proto = saidx->proto;
 
 	/* Crypto operation descriptor. */
 	crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
 	crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC;
 	crp->crp_buf = (caddr_t) m;
 	crp->crp_callback = esp_output_cb;
 	crp->crp_opaque = (caddr_t) tc;
 	crp->crp_sid = sav->tdb_cryptoid;
 
 	if (esph) {
 		/* Authentication descriptor. */
 		crda->crd_alg = esph->type;
 		crda->crd_skip = skip;
 		if (SAV_ISGCM(sav))
 			crda->crd_len = 8;	/* RFC4106 5, SPI + SN */
 		else
 			crda->crd_len = m->m_pkthdr.len - (skip + alen);
 		crda->crd_inject = m->m_pkthdr.len - alen;
 	}
 
 	return crypto_dispatch(crp);
 bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * ESP output callback from the crypto driver.
  */
 static int
 esp_output_cb(struct cryptop *crp)
 {
 	char buf[INET6_ADDRSTRLEN];
 	struct tdb_crypto *tc;
 	struct ipsecrequest *isr;
 	struct secasvar *sav;
 	struct mbuf *m;
 	int error;
 
 	tc = (struct tdb_crypto *) crp->crp_opaque;
 	IPSEC_ASSERT(tc != NULL, ("null opaque data area!"));
 	m = (struct mbuf *) crp->crp_buf;
 
 	isr = tc->tc_isr;
 	IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp"));
 	IPSECREQUEST_LOCK(isr);
 	sav = tc->tc_sav;
 
 	/* With the isr lock released, SA pointer may have changed. */
 	if (sav != isr->sav) {
 		ESPSTAT_INC(esps_notdb);
 		DPRINTF(("%s: SA gone during crypto (SA %s/%08lx proto %u)\n",
 		    __func__, ipsec_address(&tc->tc_dst, buf, sizeof(buf)),
 		    (u_long) ntohl(tc->tc_spi), tc->tc_proto));
 		error = ENOBUFS;		/*XXX*/
 		goto bad;
 	}
 
 	/* Check for crypto errors. */
 	if (crp->crp_etype) {
 		/* Reset session ID. */
 		if (sav->tdb_cryptoid != 0)
 			sav->tdb_cryptoid = crp->crp_sid;
 
 		if (crp->crp_etype == EAGAIN) {
 			IPSECREQUEST_UNLOCK(isr);
 			return (crypto_dispatch(crp));
 		}
 
 		ESPSTAT_INC(esps_noxform);
 		DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
 		error = crp->crp_etype;
 		goto bad;
 	}
 
 	/* Shouldn't happen... */
 	if (m == NULL) {
 		ESPSTAT_INC(esps_crypto);
 		DPRINTF(("%s: bogus returned buffer from crypto\n", __func__));
 		error = EINVAL;
 		goto bad;
 	}
 	ESPSTAT_INC(esps_hist[sav->alg_enc]);
 	if (sav->tdb_authalgxform != NULL)
 		AHSTAT_INC(ahs_hist[sav->alg_auth]);
 
 	/* Release crypto descriptors. */
 	free(tc, M_XDATA);
 	crypto_freereq(crp);
 
 #ifdef REGRESSION
 	/* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */
 	if (V_ipsec_integrity) {
 		static unsigned char ipseczeroes[AH_HMAC_MAXHASHLEN];
 		struct auth_hash *esph;
 
 		/*
 		 * Corrupt HMAC if we want to test integrity verification of
 		 * the other side.
 		 */
 		esph = sav->tdb_authalgxform;
 		if (esph !=  NULL) {
 			int alen;
 
 			alen = xform_ah_authsize(esph);
 			m_copyback(m, m->m_pkthdr.len - alen,
 			    alen, ipseczeroes);
 		}
 	}
 #endif
 
 	/* NB: m is reclaimed by ipsec_process_done. */
 	error = ipsec_process_done(m, isr);
 	KEY_FREESAV(&sav);
 	IPSECREQUEST_UNLOCK(isr);
 	KEY_FREESP(&isr->sp);
 	return (error);
 bad:
 	if (sav)
 		KEY_FREESAV(&sav);
 	IPSECREQUEST_UNLOCK(isr);
 	KEY_FREESP(&isr->sp);
 	if (m)
 		m_freem(m);
 	free(tc, M_XDATA);
 	crypto_freereq(crp);
 	return (error);
 }
 
 static struct xformsw esp_xformsw = {
 	XF_ESP,		XFT_CONF|XFT_AUTH,	"IPsec ESP",
 	esp_init,	esp_zeroize,		esp_input,
 	esp_output
 };
 
 static void
 esp_attach(void)
 {
 
 	xform_register(&esp_xformsw);
 }
 SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, esp_attach, NULL);
Index: projects/powernv/netpfil/pf/pf.c
===================================================================
--- projects/powernv/netpfil/pf/pf.c	(revision 290990)
+++ projects/powernv/netpfil/pf/pf.c	(revision 290991)
@@ -1,6526 +1,6526 @@
 /*-
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2002 - 2008 Henning Brauer
  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  *	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_bpf.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/hash.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/md5.h>
 #include <sys/random.h>
 #include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/radix_mpath.h>
 #include <net/vnet.h>
 
 #include <net/pfvar.h>
 #include <net/if_pflog.h>
 #include <net/if_pfsync.h>
 
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #endif /* INET6 */
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
 
 /*
  * Global variables
  */
 
 /* state tables */
 VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
 VNET_DEFINE(struct pf_palist,		 pf_pabuf);
 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
 VNET_DEFINE(struct pf_kstatus,		 pf_status);
 
 VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
 VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
 VNET_DEFINE(int,			 altqs_inactive_open);
 VNET_DEFINE(u_int32_t,			 ticket_pabuf);
 
 VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
 #define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
 VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
 #define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
 VNET_DEFINE(int,			 pf_tcp_secret_init);
 #define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
 VNET_DEFINE(int,			 pf_tcp_iss_off);
 #define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
 
 /*
  * Queue for pf_intr() sends.
  */
 static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
 struct pf_send_entry {
 	STAILQ_ENTRY(pf_send_entry)	pfse_next;
 	struct mbuf			*pfse_m;
 	enum {
 		PFSE_IP,
 		PFSE_IP6,
 		PFSE_ICMP,
 		PFSE_ICMP6,
 	}				pfse_type;
 	struct {
 		int		type;
 		int		code;
 		int		mtu;
 	} icmpopts;
 };
 
 STAILQ_HEAD(pf_send_head, pf_send_entry);
 static VNET_DEFINE(struct pf_send_head, pf_sendqueue);
 #define	V_pf_sendqueue	VNET(pf_sendqueue)
 
 static struct mtx pf_sendqueue_mtx;
 MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
 #define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
 #define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
 
 /*
  * Queue for pf_overload_task() tasks.
  */
 struct pf_overload_entry {
 	SLIST_ENTRY(pf_overload_entry)	next;
 	struct pf_addr  		addr;
 	sa_family_t			af;
 	uint8_t				dir;
 	struct pf_rule  		*rule;
 };
 
 SLIST_HEAD(pf_overload_head, pf_overload_entry);
 static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
 #define V_pf_overloadqueue	VNET(pf_overloadqueue)
 static VNET_DEFINE(struct task, pf_overloadtask);
 #define	V_pf_overloadtask	VNET(pf_overloadtask)
 
 static struct mtx pf_overloadqueue_mtx;
 MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
     "pf overload/flush queue", MTX_DEF);
 #define	PF_OVERLOADQ_LOCK()	mtx_lock(&pf_overloadqueue_mtx)
 #define	PF_OVERLOADQ_UNLOCK()	mtx_unlock(&pf_overloadqueue_mtx)
 
 VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
 struct mtx pf_unlnkdrules_mtx;
 MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
     MTX_DEF);
 
 static VNET_DEFINE(uma_zone_t,	pf_sources_z);
 #define	V_pf_sources_z	VNET(pf_sources_z)
 uma_zone_t		pf_mtag_z;
 VNET_DEFINE(uma_zone_t,	 pf_state_z);
 VNET_DEFINE(uma_zone_t,	 pf_state_key_z);
 
 VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
 #define	PFID_CPUBITS	8
 #define	PFID_CPUSHIFT	(sizeof(uint64_t) * NBBY - PFID_CPUBITS)
 #define	PFID_CPUMASK	((uint64_t)((1 << PFID_CPUBITS) - 1) <<	PFID_CPUSHIFT)
 #define	PFID_MAXID	(~PFID_CPUMASK)
 CTASSERT((1 << PFID_CPUBITS) >= MAXCPU);
 
 static void		 pf_src_tree_remove_state(struct pf_state *);
 static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
 			    u_int32_t);
 static void		 pf_add_threshold(struct pf_threshold *);
 static int		 pf_check_threshold(struct pf_threshold *);
 
 static void		 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
 			    u_int16_t *, u_int16_t *, struct pf_addr *,
 			    u_int16_t, u_int8_t, sa_family_t);
 static int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
 			    struct tcphdr *, struct pf_state_peer *);
 static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, struct pf_addr *, u_int16_t,
 			    u_int16_t *, u_int16_t *, u_int16_t *,
 			    u_int16_t *, u_int8_t, sa_family_t);
 static void		 pf_send_tcp(struct mbuf *,
 			    const struct pf_rule *, sa_family_t,
 			    const struct pf_addr *, const struct pf_addr *,
 			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
 			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
 			    u_int16_t, struct ifnet *);
 static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
 			    sa_family_t, struct pf_rule *);
 static void		 pf_detach_state(struct pf_state *);
 static int		 pf_state_key_attach(struct pf_state_key *,
 			    struct pf_state_key *, struct pf_state *);
 static void		 pf_state_key_detach(struct pf_state *, int);
 static int		 pf_state_key_ctor(void *, int, void *, int);
 static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
 static int		 pf_test_rule(struct pf_rule **, struct pf_state **,
 			    int, struct pfi_kif *, struct mbuf *, int,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **, struct inpcb *);
 static int		 pf_create_state(struct pf_rule *, struct pf_rule *,
 			    struct pf_rule *, struct pf_pdesc *,
 			    struct pf_src_node *, struct pf_state_key *,
 			    struct pf_state_key *, struct mbuf *, int,
 			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
 			    struct pf_state **, int, u_int16_t, u_int16_t,
 			    int);
 static int		 pf_test_fragment(struct pf_rule **, int,
 			    struct pfi_kif *, struct mbuf *, void *,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **);
 static int		 pf_tcp_track_full(struct pf_state_peer *,
 			    struct pf_state_peer *, struct pf_state **,
 			    struct pfi_kif *, struct mbuf *, int,
 			    struct pf_pdesc *, u_short *, int *);
 static int		 pf_tcp_track_sloppy(struct pf_state_peer *,
 			    struct pf_state_peer *, struct pf_state **,
 			    struct pf_pdesc *, u_short *);
 static int		 pf_test_state_tcp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 static int		 pf_test_state_udp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *);
 static int		 pf_test_state_icmp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 static int		 pf_test_state_other(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
 static u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 static u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
 				int, u_int16_t);
 static int		 pf_check_proto_cksum(struct mbuf *, int, int,
 			    u_int8_t, sa_family_t);
 static void		 pf_print_state_parts(struct pf_state *,
 			    struct pf_state_key *, struct pf_state_key *);
 static int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
 			    struct pf_addr_wrap *);
 static struct pf_state	*pf_find_state(struct pfi_kif *,
 			    struct pf_state_key_cmp *, u_int);
 static int		 pf_src_connlimit(struct pf_state **);
 static void		 pf_overload_task(void *v, int pending);
 static int		 pf_insert_src_node(struct pf_src_node **,
 			    struct pf_rule *, struct pf_addr *, sa_family_t);
 static u_int		 pf_purge_expired_states(u_int, int);
 static void		 pf_purge_unlinked_rules(void);
 static int		 pf_mtag_uminit(void *, int, int);
 static void		 pf_mtag_free(struct m_tag *);
 #ifdef INET
 static void		 pf_route(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 #endif /* INET */
 #ifdef INET6
 static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, u_int8_t);
 static void		 pf_route6(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 #endif /* INET6 */
 
 int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
 
 VNET_DECLARE(int, pf_end_threads);
 
 VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
 
 #define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
 				 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
 
 #define	STATE_LOOKUP(i, k, d, s, pd)					\
 	do {								\
 		(s) = pf_find_state((i), (k), (d));			\
 		if ((s) == NULL)					\
 			return (PF_DROP);				\
 		if (PACKET_LOOPED(pd))					\
 			return (PF_PASS);				\
 		if ((d) == PF_OUT &&					\
 		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
 		    (s)->rule.ptr->direction == PF_OUT) ||		\
 		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
 		    (s)->rule.ptr->direction == PF_IN)) &&		\
 		    (s)->rt_kif != NULL &&				\
 		    (s)->rt_kif != (i))					\
 			return (PF_PASS);				\
 	} while (0)
 
 #define	BOUND_IFACE(r, k) \
 	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
 
 #define	STATE_INC_COUNTERS(s)						\
 	do {								\
 		counter_u64_add(s->rule.ptr->states_cur, 1);		\
 		counter_u64_add(s->rule.ptr->states_tot, 1);		\
 		if (s->anchor.ptr != NULL) {				\
 			counter_u64_add(s->anchor.ptr->states_cur, 1);	\
 			counter_u64_add(s->anchor.ptr->states_tot, 1);	\
 		}							\
 		if (s->nat_rule.ptr != NULL) {				\
 			counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
 			counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
 		}							\
 	} while (0)
 
 #define	STATE_DEC_COUNTERS(s)						\
 	do {								\
 		if (s->nat_rule.ptr != NULL)				\
 			counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
 		if (s->anchor.ptr != NULL)				\
 			counter_u64_add(s->anchor.ptr->states_cur, -1);	\
 		counter_u64_add(s->rule.ptr->states_cur, -1);		\
 	} while (0)
 
 static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
 VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
 VNET_DEFINE(struct pf_idhash *, pf_idhash);
 VNET_DEFINE(struct pf_srchash *, pf_srchash);
 
 SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
 
 u_long	pf_hashmask;
 u_long	pf_srchashmask;
 static u_long	pf_hashsize;
 static u_long	pf_srchashsize;
 
 SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
     &pf_hashsize, 0, "Size of pf(4) states hashtable");
 SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
     &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
 
 VNET_DEFINE(void *, pf_swi_cookie);
 
 VNET_DEFINE(uint32_t, pf_hashseed);
 #define	V_pf_hashseed	VNET(pf_hashseed)
 
 int
 pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
 {
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if (a->addr32[0] > b->addr32[0])
 			return (1);
 		if (a->addr32[0] < b->addr32[0])
 			return (-1);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (a->addr32[3] > b->addr32[3])
 			return (1);
 		if (a->addr32[3] < b->addr32[3])
 			return (-1);
 		if (a->addr32[2] > b->addr32[2])
 			return (1);
 		if (a->addr32[2] < b->addr32[2])
 			return (-1);
 		if (a->addr32[1] > b->addr32[1])
 			return (1);
 		if (a->addr32[1] < b->addr32[1])
 			return (-1);
 		if (a->addr32[0] > b->addr32[0])
 			return (1);
 		if (a->addr32[0] < b->addr32[0])
 			return (-1);
 		break;
 #endif /* INET6 */
 	default:
 		panic("%s: unknown address family %u", __func__, af);
 	}
 	return (0);
 }
 
 static __inline uint32_t
 pf_hashkey(struct pf_state_key *sk)
 {
 	uint32_t h;
 
 	h = murmur3_32_hash32((uint32_t *)sk,
 	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
 	    V_pf_hashseed);
 
 	return (h & pf_hashmask);
 }
 
 static __inline uint32_t
 pf_hashsrc(struct pf_addr *addr, sa_family_t af)
 {
 	uint32_t h;
 
 	switch (af) {
 	case AF_INET:
 		h = murmur3_32_hash32((uint32_t *)&addr->v4,
 		    sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	case AF_INET6:
 		h = murmur3_32_hash32((uint32_t *)&addr->v6,
 		    sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	default:
 		panic("%s: unknown address family %u", __func__, af);
 	}
 
 	return (h & pf_srchashmask);
 }
 
 #ifdef ALTQ
 static int
 pf_state_hash(struct pf_state *s)
 {
 	u_int32_t hv = (intptr_t)s / sizeof(*s);
 
 	hv ^= crc32(&s->src, sizeof(s->src));
 	hv ^= crc32(&s->dst, sizeof(s->dst));
 	if (hv == 0)
 		hv = 1;
 	return (hv);
 }
 #endif
 
 #ifdef INET6
 void
 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		dst->addr32[0] = src->addr32[0];
 		break;
 #endif /* INET */
 	case AF_INET6:
 		dst->addr32[0] = src->addr32[0];
 		dst->addr32[1] = src->addr32[1];
 		dst->addr32[2] = src->addr32[2];
 		dst->addr32[3] = src->addr32[3];
 		break;
 	}
 }
 #endif /* INET6 */
 
 static void
 pf_init_threshold(struct pf_threshold *threshold,
     u_int32_t limit, u_int32_t seconds)
 {
 	threshold->limit = limit * PF_THRESHOLD_MULT;
 	threshold->seconds = seconds;
 	threshold->count = 0;
 	threshold->last = time_uptime;
 }
 
 static void
 pf_add_threshold(struct pf_threshold *threshold)
 {
 	u_int32_t t = time_uptime, diff = t - threshold->last;
 
 	if (diff >= threshold->seconds)
 		threshold->count = 0;
 	else
 		threshold->count -= threshold->count * diff /
 		    threshold->seconds;
 	threshold->count += PF_THRESHOLD_MULT;
 	threshold->last = t;
 }
 
 static int
 pf_check_threshold(struct pf_threshold *threshold)
 {
 	return (threshold->count > threshold->limit);
 }
 
 static int
 pf_src_connlimit(struct pf_state **state)
 {
 	struct pf_overload_entry *pfoe;
 	int bad = 0;
 
 	PF_STATE_LOCK_ASSERT(*state);
 
 	(*state)->src_node->conn++;
 	(*state)->src.tcp_est = 1;
 	pf_add_threshold(&(*state)->src_node->conn_rate);
 
 	if ((*state)->rule.ptr->max_src_conn &&
 	    (*state)->rule.ptr->max_src_conn <
 	    (*state)->src_node->conn) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
 		bad++;
 	}
 
 	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
 	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
 		bad++;
 	}
 
 	if (!bad)
 		return (0);
 
 	/* Kill this state. */
 	(*state)->timeout = PFTM_PURGE;
 	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
 
 	if ((*state)->rule.ptr->overload_tbl == NULL)
 		return (1);
 
 	/* Schedule overloading and flushing task. */
 	pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
 	if (pfoe == NULL)
 		return (1);	/* too bad :( */
 
 	bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
 	pfoe->af = (*state)->key[PF_SK_WIRE]->af;
 	pfoe->rule = (*state)->rule.ptr;
 	pfoe->dir = (*state)->direction;
 	PF_OVERLOADQ_LOCK();
 	SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
 	PF_OVERLOADQ_UNLOCK();
 	taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
 
 	return (1);
 }
 
 static void
 pf_overload_task(void *v, int pending)
 {
 	struct pf_overload_head queue;
 	struct pfr_addr p;
 	struct pf_overload_entry *pfoe, *pfoe1;
 	uint32_t killed = 0;
 
 	CURVNET_SET((struct vnet *)v);
 
 	PF_OVERLOADQ_LOCK();
 	queue = V_pf_overloadqueue;
 	SLIST_INIT(&V_pf_overloadqueue);
 	PF_OVERLOADQ_UNLOCK();
 
 	bzero(&p, sizeof(p));
 	SLIST_FOREACH(pfoe, &queue, next) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("%s: blocking address ", __func__);
 			pf_print_host(&pfoe->addr, 0, pfoe->af);
 			printf("\n");
 		}
 
 		p.pfra_af = pfoe->af;
 		switch (pfoe->af) {
 #ifdef INET
 		case AF_INET:
 			p.pfra_net = 32;
 			p.pfra_ip4addr = pfoe->addr.v4;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			p.pfra_net = 128;
 			p.pfra_ip6addr = pfoe->addr.v6;
 			break;
 #endif
 		}
 
 		PF_RULES_WLOCK();
 		pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
 		PF_RULES_WUNLOCK();
 	}
 
 	/*
 	 * Remove those entries, that don't need flushing.
 	 */
 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
 		if (pfoe->rule->flush == 0) {
 			SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
 			free(pfoe, M_PFTEMP);
 		} else
 			counter_u64_add(
 			    V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
 
 	/* If nothing to flush, return. */
 	if (SLIST_EMPTY(&queue)) {
 		CURVNET_RESTORE();
 		return;
 	}
 
 	for (int i = 0; i <= pf_hashmask; i++) {
 		struct pf_idhash *ih = &V_pf_idhash[i];
 		struct pf_state_key *sk;
 		struct pf_state *s;
 
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
 		    sk = s->key[PF_SK_WIRE];
 		    SLIST_FOREACH(pfoe, &queue, next)
 			if (sk->af == pfoe->af &&
 			    ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
 			    pfoe->rule == s->rule.ptr) &&
 			    ((pfoe->dir == PF_OUT &&
 			    PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
 			    (pfoe->dir == PF_IN &&
 			    PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
 				s->timeout = PFTM_PURGE;
 				s->src.state = s->dst.state = TCPS_CLOSED;
 				killed++;
 			}
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
 		free(pfoe, M_PFTEMP);
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("%s: %u states killed", __func__, killed);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * Can return locked on failure, so that we can consistently
  * allocate and insert a new one.
  */
 struct pf_src_node *
 pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
 	int returnlocked)
 {
 	struct pf_srchash *sh;
 	struct pf_src_node *n;
 
 	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
 
 	sh = &V_pf_srchash[pf_hashsrc(src, af)];
 	PF_HASHROW_LOCK(sh);
 	LIST_FOREACH(n, &sh->nodes, entry)
 		if (n->rule.ptr == rule && n->af == af &&
 		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
 		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
 			break;
 	if (n != NULL) {
 		n->states++;
 		PF_HASHROW_UNLOCK(sh);
 	} else if (returnlocked == 0)
 		PF_HASHROW_UNLOCK(sh);
 
 	return (n);
 }
 
 static int
 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
     struct pf_addr *src, sa_family_t af)
 {
 
 	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
 	    rule->rpool.opts & PF_POOL_STICKYADDR),
 	    ("%s for non-tracking rule %p", __func__, rule));
 
 	if (*sn == NULL)
 		*sn = pf_find_src_node(src, rule, af, 1);
 
 	if (*sn == NULL) {
 		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
 
 		PF_HASHROW_ASSERT(sh);
 
 		if (!rule->max_src_nodes ||
 		    counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
 			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
 		else
 			counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES],
 			    1);
 		if ((*sn) == NULL) {
 			PF_HASHROW_UNLOCK(sh);
 			return (-1);
 		}
 
 		pf_init_threshold(&(*sn)->conn_rate,
 		    rule->max_src_conn_rate.limit,
 		    rule->max_src_conn_rate.seconds);
 
 		(*sn)->af = af;
 		(*sn)->rule.ptr = rule;
 		PF_ACPY(&(*sn)->addr, src, af);
 		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
 		(*sn)->creation = time_uptime;
 		(*sn)->ruletype = rule->action;
 		(*sn)->states = 1;
 		if ((*sn)->rule.ptr != NULL)
 			counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
 		PF_HASHROW_UNLOCK(sh);
 		counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
 	} else {
 		if (rule->max_src_states &&
 		    (*sn)->states >= rule->max_src_states) {
 			counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
 			    1);
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 void
 pf_unlink_src_node(struct pf_src_node *src)
 {
 
 	PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]);
 	LIST_REMOVE(src, entry);
 	if (src->rule.ptr)
 		counter_u64_add(src->rule.ptr->src_nodes, -1);
 }
 
 u_int
 pf_free_src_nodes(struct pf_src_node_list *head)
 {
 	struct pf_src_node *sn, *tmp;
 	u_int count = 0;
 
 	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
 		uma_zfree(V_pf_sources_z, sn);
 		count++;
 	}
 
 	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
 
 	return (count);
 }
 
 void
 pf_mtag_initialize()
 {
 
 	pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
 	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
 	    UMA_ALIGN_PTR, 0);
 }
 
 /* Per-vnet data storage structures initialization. */
 void
 pf_initialize()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
 	struct pf_srchash	*sh;
 	u_int i;
 
 	if (pf_hashsize == 0 || !powerof2(pf_hashsize))
 		pf_hashsize = PF_HASHSIZ;
 	if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
 		pf_srchashsize = PF_HASHSIZ / 4;
 
 	V_pf_hashseed = arc4random();
 
 	/* States and state keys storage. */
 	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
 	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
 	uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
 
 	V_pf_state_key_z = uma_zcreate("pf state keys",
 	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash),
 	    M_PFHASH, M_WAITOK | M_ZERO);
 	V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash),
 	    M_PFHASH, M_WAITOK | M_ZERO);
 	pf_hashmask = pf_hashsize - 1;
 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
 	    i++, kh++, ih++) {
 		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
 		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
 	}
 
 	/* Source nodes. */
 	V_pf_sources_z = uma_zcreate("pf source nodes",
 	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    0);
 	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
 	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
 	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
 	V_pf_srchash = malloc(pf_srchashsize * sizeof(struct pf_srchash),
 	  M_PFHASH, M_WAITOK|M_ZERO);
 	pf_srchashmask = pf_srchashsize - 1;
 	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++)
 		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
 
 	/* ALTQ */
 	TAILQ_INIT(&V_pf_altqs[0]);
 	TAILQ_INIT(&V_pf_altqs[1]);
 	TAILQ_INIT(&V_pf_pabuf);
 	V_pf_altqs_active = &V_pf_altqs[0];
 	V_pf_altqs_inactive = &V_pf_altqs[1];
 
 	/* Send & overload+flush queues. */
 	STAILQ_INIT(&V_pf_sendqueue);
 	SLIST_INIT(&V_pf_overloadqueue);
 	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
 
 	/* Unlinked, but may be referenced rules. */
 	TAILQ_INIT(&V_pf_unlinked_rules);
 }
 
 void
 pf_mtag_cleanup()
 {
 
 	uma_zdestroy(pf_mtag_z);
 }
 
 void
 pf_cleanup()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
 	struct pf_srchash	*sh;
 	struct pf_send_entry	*pfse, *next;
 	u_int i;
 
 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
 	    i++, kh++, ih++) {
 		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
 		    __func__));
 		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
 		    __func__));
 		mtx_destroy(&kh->lock);
 		mtx_destroy(&ih->lock);
 	}
 	free(V_pf_keyhash, M_PFHASH);
 	free(V_pf_idhash, M_PFHASH);
 
 	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
 		KASSERT(LIST_EMPTY(&sh->nodes),
 		    ("%s: source node hash not empty", __func__));
 		mtx_destroy(&sh->lock);
 	}
 	free(V_pf_srchash, M_PFHASH);
 
 	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
 		m_freem(pfse->pfse_m);
 		free(pfse, M_PFTEMP);
 	}
 
 	uma_zdestroy(V_pf_sources_z);
 	uma_zdestroy(V_pf_state_z);
 	uma_zdestroy(V_pf_state_key_z);
 }
 
 static int
 pf_mtag_uminit(void *mem, int size, int how)
 {
 	struct m_tag *t;
 
 	t = (struct m_tag *)mem;
 	t->m_tag_cookie = MTAG_ABI_COMPAT;
 	t->m_tag_id = PACKET_TAG_PF;
 	t->m_tag_len = sizeof(struct pf_mtag);
 	t->m_tag_free = pf_mtag_free;
 
 	return (0);
 }
 
 static void
 pf_mtag_free(struct m_tag *t)
 {
 
 	uma_zfree(pf_mtag_z, t);
 }
 
 struct pf_mtag *
 pf_get_mtag(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
 	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
 		return ((struct pf_mtag *)(mtag + 1));
 
 	mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
 	if (mtag == NULL)
 		return (NULL);
 	bzero(mtag + 1, sizeof(struct pf_mtag));
 	m_tag_prepend(m, mtag);
 
 	return ((struct pf_mtag *)(mtag + 1));
 }
 
 static int
 pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
     struct pf_state *s)
 {
 	struct pf_keyhash	*khs, *khw, *kh;
 	struct pf_state_key	*sk, *cur;
 	struct pf_state		*si, *olds = NULL;
 	int idx;
 
 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
 	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
 	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
 
 	/*
 	 * We need to lock hash slots of both keys. To avoid deadlock
 	 * we always lock the slot with lower address first. Unlock order
 	 * isn't important.
 	 *
 	 * We also need to lock ID hash slot before dropping key
 	 * locks. On success we return with ID hash slot locked.
 	 */
 
 	if (skw == sks) {
 		khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
 		PF_HASHROW_LOCK(khs);
 	} else {
 		khs = &V_pf_keyhash[pf_hashkey(sks)];
 		khw = &V_pf_keyhash[pf_hashkey(skw)];
 		if (khs == khw) {
 			PF_HASHROW_LOCK(khs);
 		} else if (khs < khw) {
 			PF_HASHROW_LOCK(khs);
 			PF_HASHROW_LOCK(khw);
 		} else {
 			PF_HASHROW_LOCK(khw);
 			PF_HASHROW_LOCK(khs);
 		}
 	}
 
 #define	KEYS_UNLOCK()	do {			\
 	if (khs != khw) {			\
 		PF_HASHROW_UNLOCK(khs);		\
 		PF_HASHROW_UNLOCK(khw);		\
 	} else					\
 		PF_HASHROW_UNLOCK(khs);		\
 } while (0)
 
 	/*
 	 * First run: start with wire key.
 	 */
 	sk = skw;
 	kh = khw;
 	idx = PF_SK_WIRE;
 
 keyattach:
 	LIST_FOREACH(cur, &kh->keys, entry)
 		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 
 	if (cur != NULL) {
 		/* Key exists. Check for same kif, if none, add to key. */
 		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
 			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
 
 			PF_HASHROW_LOCK(ih);
 			if (si->kif == s->kif &&
 			    si->direction == s->direction) {
 				if (sk->proto == IPPROTO_TCP &&
 				    si->src.state >= TCPS_FIN_WAIT_2 &&
 				    si->dst.state >= TCPS_FIN_WAIT_2) {
 					/*
 					 * New state matches an old >FIN_WAIT_2
 					 * state. We can't drop key hash locks,
 					 * thus we can't unlink it properly.
 					 *
 					 * As a workaround we drop it into
 					 * TCPS_CLOSED state, schedule purge
 					 * ASAP and push it into the very end
 					 * of the slot TAILQ, so that it won't
 					 * conflict with our new state.
 					 */
 					si->src.state = si->dst.state =
 					    TCPS_CLOSED;
 					si->timeout = PFTM_PURGE;
 					olds = si;
 				} else {
 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
 						printf("pf: %s key attach "
 						    "failed on %s: ",
 						    (idx == PF_SK_WIRE) ?
 						    "wire" : "stack",
 						    s->kif->pfik_name);
 						pf_print_state_parts(s,
 						    (idx == PF_SK_WIRE) ?
 						    sk : NULL,
 						    (idx == PF_SK_STACK) ?
 						    sk : NULL);
 						printf(", existing: ");
 						pf_print_state_parts(si,
 						    (idx == PF_SK_WIRE) ?
 						    sk : NULL,
 						    (idx == PF_SK_STACK) ?
 						    sk : NULL);
 						printf("\n");
 					}
 					PF_HASHROW_UNLOCK(ih);
 					KEYS_UNLOCK();
 					uma_zfree(V_pf_state_key_z, sk);
 					if (idx == PF_SK_STACK)
 						pf_detach_state(s);
 					return (EEXIST); /* collision! */
 				}
 			}
 			PF_HASHROW_UNLOCK(ih);
 		}
 		uma_zfree(V_pf_state_key_z, sk);
 		s->key[idx] = cur;
 	} else {
 		LIST_INSERT_HEAD(&kh->keys, sk, entry);
 		s->key[idx] = sk;
 	}
 
 stateattach:
 	/* List is sorted, if-bound states before floating. */
 	if (s->kif == V_pfi_all)
 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
 	else
 		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
 
 	if (olds) {
 		TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
 		    key_list[idx]);
 		olds = NULL;
 	}
 
 	/*
 	 * Attach done. See how should we (or should not?)
 	 * attach a second key.
 	 */
 	if (sks == skw) {
 		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
 		idx = PF_SK_STACK;
 		sks = NULL;
 		goto stateattach;
 	} else if (sks != NULL) {
 		/*
 		 * Continue attaching with stack key.
 		 */
 		sk = sks;
 		kh = khs;
 		idx = PF_SK_STACK;
 		sks = NULL;
 		goto keyattach;
 	}
 
 	PF_STATE_LOCK(s);
 	KEYS_UNLOCK();
 
 	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
 	    ("%s failure", __func__));
 
 	return (0);
 #undef	KEYS_UNLOCK
 }
 
 static void
 pf_detach_state(struct pf_state *s)
 {
 	struct pf_state_key *sks = s->key[PF_SK_STACK];
 	struct pf_keyhash *kh;
 
 	if (sks != NULL) {
 		kh = &V_pf_keyhash[pf_hashkey(sks)];
 		PF_HASHROW_LOCK(kh);
 		if (s->key[PF_SK_STACK] != NULL)
 			pf_state_key_detach(s, PF_SK_STACK);
 		/*
 		 * If both point to same key, then we are done.
 		 */
 		if (sks == s->key[PF_SK_WIRE]) {
 			pf_state_key_detach(s, PF_SK_WIRE);
 			PF_HASHROW_UNLOCK(kh);
 			return;
 		}
 		PF_HASHROW_UNLOCK(kh);
 	}
 
 	if (s->key[PF_SK_WIRE] != NULL) {
 		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
 		PF_HASHROW_LOCK(kh);
 		if (s->key[PF_SK_WIRE] != NULL)
 			pf_state_key_detach(s, PF_SK_WIRE);
 		PF_HASHROW_UNLOCK(kh);
 	}
 }
 
 static void
 pf_state_key_detach(struct pf_state *s, int idx)
 {
 	struct pf_state_key *sk = s->key[idx];
 #ifdef INVARIANTS
 	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
 
 	PF_HASHROW_ASSERT(kh);
 #endif
 	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
 	s->key[idx] = NULL;
 
 	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
 		LIST_REMOVE(sk, entry);
 		uma_zfree(V_pf_state_key_z, sk);
 	}
 }
 
 static int
 pf_state_key_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct pf_state_key *sk = mem;
 
 	bzero(sk, sizeof(struct pf_state_key_cmp));
 	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
 	TAILQ_INIT(&sk->states[PF_SK_STACK]);
 
 	return (0);
 }
 
 struct pf_state_key *
 pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
 	struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
 {
 	struct pf_state_key *sk;
 
 	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 	if (sk == NULL)
 		return (NULL);
 
 	PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
 	PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
 	sk->port[pd->sidx] = sport;
 	sk->port[pd->didx] = dport;
 	sk->proto = pd->proto;
 	sk->af = pd->af;
 
 	return (sk);
 }
 
 struct pf_state_key *
 pf_state_key_clone(struct pf_state_key *orig)
 {
 	struct pf_state_key *sk;
 
 	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 	if (sk == NULL)
 		return (NULL);
 
 	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
 
 	return (sk);
 }
 
 int
 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
     struct pf_state_key *sks, struct pf_state *s)
 {
 	struct pf_idhash *ih;
 	struct pf_state *cur;
 	int error;
 
 	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
 	    ("%s: sks not pristine", __func__));
 	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
 	    ("%s: skw not pristine", __func__));
 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
 
 	s->kif = kif;
 
 	if (s->id == 0 && s->creatorid == 0) {
 		/* XXX: should be atomic, but probability of collision low */
 		if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
 			V_pf_stateid[curcpu] = 1;
 		s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
 		s->id = htobe64(s->id);
 		s->creatorid = V_pf_status.hostid;
 	}
 
 	/* Returns with ID locked on success. */
 	if ((error = pf_state_key_attach(skw, sks, s)) != 0)
 		return (error);
 
 	ih = &V_pf_idhash[PF_IDHASH(s)];
 	PF_HASHROW_ASSERT(ih);
 	LIST_FOREACH(cur, &ih->states, entry)
 		if (cur->id == s->id && cur->creatorid == s->creatorid)
 			break;
 
 	if (cur != NULL) {
 		PF_HASHROW_UNLOCK(ih);
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state ID collision: "
 			    "id: %016llx creatorid: %08x\n",
 			    (unsigned long long)be64toh(s->id),
 			    ntohl(s->creatorid));
 		}
 		pf_detach_state(s);
 		return (EEXIST);
 	}
 	LIST_INSERT_HEAD(&ih->states, s, entry);
 	/* One for keys, one for ID hash. */
 	refcount_init(&s->refs, 2);
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
 	if (pfsync_insert_state_ptr != NULL)
 		pfsync_insert_state_ptr(s);
 
 	/* Returns locked. */
 	return (0);
 }
 
 /*
  * Find state by ID: returns with locked row on success.
  */
 struct pf_state *
 pf_find_state_byid(uint64_t id, uint32_t creatorid)
 {
 	struct pf_idhash *ih;
 	struct pf_state *s;
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
 
 	ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))];
 
 	PF_HASHROW_LOCK(ih);
 	LIST_FOREACH(s, &ih->states, entry)
 		if (s->id == id && s->creatorid == creatorid)
 			break;
 
 	if (s == NULL)
 		PF_HASHROW_UNLOCK(ih);
 
 	return (s);
 }
 
 /*
  * Find state by key.
  * Returns with ID hash slot locked on success.
  */
 static struct pf_state *
 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
 {
 	struct pf_keyhash	*kh;
 	struct pf_state_key	*sk;
 	struct pf_state		*s;
 	int idx;
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
 
 	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
 
 	PF_HASHROW_LOCK(kh);
 	LIST_FOREACH(sk, &kh->keys, entry)
 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 	if (sk == NULL) {
 		PF_HASHROW_UNLOCK(kh);
 		return (NULL);
 	}
 
 	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
 
 	/* List is sorted, if-bound states before floating ones. */
 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
 		if (s->kif == V_pfi_all || s->kif == kif) {
 			PF_STATE_LOCK(s);
 			PF_HASHROW_UNLOCK(kh);
 			if (s->timeout >= PFTM_MAX) {
 				/*
 				 * State is either being processed by
 				 * pf_unlink_state() in an other thread, or
 				 * is scheduled for immediate expiry.
 				 */
 				PF_STATE_UNLOCK(s);
 				return (NULL);
 			}
 			return (s);
 		}
 	PF_HASHROW_UNLOCK(kh);
 
 	return (NULL);
 }
 
 struct pf_state *
 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
 {
 	struct pf_keyhash	*kh;
 	struct pf_state_key	*sk;
 	struct pf_state		*s, *ret = NULL;
 	int			 idx, inout = 0;
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
 
 	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
 
 	PF_HASHROW_LOCK(kh);
 	LIST_FOREACH(sk, &kh->keys, entry)
 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 	if (sk == NULL) {
 		PF_HASHROW_UNLOCK(kh);
 		return (NULL);
 	}
 	switch (dir) {
 	case PF_IN:
 		idx = PF_SK_WIRE;
 		break;
 	case PF_OUT:
 		idx = PF_SK_STACK;
 		break;
 	case PF_INOUT:
 		idx = PF_SK_WIRE;
 		inout = 1;
 		break;
 	default:
 		panic("%s: dir %u", __func__, dir);
 	}
 second_run:
 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
 		if (more == NULL) {
 			PF_HASHROW_UNLOCK(kh);
 			return (s);
 		}
 
 		if (ret)
 			(*more)++;
 		else
 			ret = s;
 	}
 	if (inout == 1) {
 		inout = 0;
 		idx = PF_SK_STACK;
 		goto second_run;
 	}
 	PF_HASHROW_UNLOCK(kh);
 
 	return (ret);
 }
 
 /* END state table stuff */
 
 static void
 pf_send(struct pf_send_entry *pfse)
 {
 
 	PF_SENDQ_LOCK();
 	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
 	PF_SENDQ_UNLOCK();
 	swi_sched(V_pf_swi_cookie, 0);
 }
 
 void
 pf_intr(void *v)
 {
 	struct pf_send_head queue;
 	struct pf_send_entry *pfse, *next;
 
 	CURVNET_SET((struct vnet *)v);
 
 	PF_SENDQ_LOCK();
 	queue = V_pf_sendqueue;
 	STAILQ_INIT(&V_pf_sendqueue);
 	PF_SENDQ_UNLOCK();
 
 	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
 		switch (pfse->pfse_type) {
 #ifdef INET
 		case PFSE_IP:
 			ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
 			break;
 		case PFSE_ICMP:
 			icmp_error(pfse->pfse_m, pfse->icmpopts.type,
 			    pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case PFSE_IP6:
 			ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
 			    NULL);
 			break;
 		case PFSE_ICMP6:
 			icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
 			    pfse->icmpopts.code, pfse->icmpopts.mtu);
 			break;
 #endif /* INET6 */
 		default:
 			panic("%s: unknown type", __func__);
 		}
 		free(pfse, M_PFTEMP);
 	}
 	CURVNET_RESTORE();
 }
 
 void
 pf_purge_thread(void *v)
 {
 	u_int idx = 0;
 
 	CURVNET_SET((struct vnet *)v);
 
 	for (;;) {
 		PF_RULES_RLOCK();
 		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
 
 		if (V_pf_end_threads) {
 			/*
 			 * To cleanse up all kifs and rules we need
 			 * two runs: first one clears reference flags,
 			 * then pf_purge_expired_states() doesn't
 			 * raise them, and then second run frees.
 			 */
 			PF_RULES_RUNLOCK();
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 
 			/*
 			 * Now purge everything.
 			 */
 			pf_purge_expired_states(0, pf_hashmask);
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes();
 
 			/*
 			 * Now all kifs & rules should be unreferenced,
 			 * thus should be successfully freed.
 			 */
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 
 			/*
 			 * Announce success and exit.
 			 */
 			PF_RULES_RLOCK();
 			V_pf_end_threads++;
 			PF_RULES_RUNLOCK();
 			wakeup(pf_purge_thread);
 			kproc_exit(0);
 		}
 		PF_RULES_RUNLOCK();
 
 		/* Process 1/interval fraction of the state table every run. */
 		idx = pf_purge_expired_states(idx, pf_hashmask /
 			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
 
 		/* Purge other expired types every PFTM_INTERVAL seconds. */
 		if (idx == 0) {
 			/*
 			 * Order is important:
 			 * - states and src nodes reference rules
 			 * - states and rules reference kifs
 			 */
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes();
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 		}
 	}
 	/* not reached */
 	CURVNET_RESTORE();
 }
 
 u_int32_t
 pf_state_expires(const struct pf_state *state)
 {
 	u_int32_t	timeout;
 	u_int32_t	start;
 	u_int32_t	end;
 	u_int32_t	states;
 
 	/* handle all PFTM_* > PFTM_MAX here */
 	if (state->timeout == PFTM_PURGE)
 		return (time_uptime);
 	KASSERT(state->timeout != PFTM_UNLINKED,
 	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
 	KASSERT((state->timeout < PFTM_MAX),
 	    ("pf_state_expires: timeout > PFTM_MAX"));
 	timeout = state->rule.ptr->timeout[state->timeout];
 	if (!timeout)
 		timeout = V_pf_default_rule.timeout[state->timeout];
 	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
 	if (start) {
 		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
 		states = counter_u64_fetch(state->rule.ptr->states_cur);
 	} else {
 		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
 		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
 		states = V_pf_status.states;
 	}
 	if (end && states > start && start < end) {
 		if (states < end)
 			return (state->expire + timeout * (end - states) /
 			    (end - start));
 		else
 			return (time_uptime);
 	}
 	return (state->expire + timeout);
 }
 
 void
 pf_purge_expired_src_nodes()
 {
 	struct pf_src_node_list	 freelist;
 	struct pf_srchash	*sh;
 	struct pf_src_node	*cur, *next;
 	int i;
 
 	LIST_INIT(&freelist);
 	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
 	    PF_HASHROW_LOCK(sh);
 	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
 		if (cur->states == 0 && cur->expire <= time_uptime) {
 			pf_unlink_src_node(cur);
 			LIST_INSERT_HEAD(&freelist, cur, entry);
 		} else if (cur->rule.ptr != NULL)
 			cur->rule.ptr->rule_flag |= PFRULE_REFS;
 	    PF_HASHROW_UNLOCK(sh);
 	}
 
 	pf_free_src_nodes(&freelist);
 
 	V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
 }
 
 static void
 pf_src_tree_remove_state(struct pf_state *s)
 {
 	struct pf_src_node *sn;
 	struct pf_srchash *sh;
 	uint32_t timeout;
 
 	timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ?
 	    s->rule.ptr->timeout[PFTM_SRC_NODE] :
 	    V_pf_default_rule.timeout[PFTM_SRC_NODE];
 
 	if (s->src_node != NULL) {
 		sn = s->src_node;
 		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 	    	PF_HASHROW_LOCK(sh);
 		if (s->src.tcp_est)
 			--sn->conn;
 		if (--sn->states == 0)
 			sn->expire = time_uptime + timeout;
 	    	PF_HASHROW_UNLOCK(sh);
 	}
 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
 		sn = s->nat_src_node;
 		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 	    	PF_HASHROW_LOCK(sh);
 		if (--sn->states == 0)
 			sn->expire = time_uptime + timeout;
 	    	PF_HASHROW_UNLOCK(sh);
 	}
 	s->src_node = s->nat_src_node = NULL;
 }
 
 /*
  * Unlink and potentilly free a state. Function may be
  * called with ID hash row locked, but always returns
  * unlocked, since it needs to go through key hash locking.
  */
 int
 pf_unlink_state(struct pf_state *s, u_int flags)
 {
 	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
 
 	if ((flags & PF_ENTER_LOCKED) == 0)
 		PF_HASHROW_LOCK(ih);
 	else
 		PF_HASHROW_ASSERT(ih);
 
 	if (s->timeout == PFTM_UNLINKED) {
 		/*
 		 * State is being processed
 		 * by pf_unlink_state() in
 		 * an other thread.
 		 */
 		PF_HASHROW_UNLOCK(ih);
 		return (0);	/* XXXGL: undefined actually */
 	}
 
 	if (s->src.state == PF_TCPS_PROXY_DST) {
 		/* XXX wire key the right one? */
 		pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
 		    &s->key[PF_SK_WIRE]->addr[1],
 		    &s->key[PF_SK_WIRE]->addr[0],
 		    s->key[PF_SK_WIRE]->port[1],
 		    s->key[PF_SK_WIRE]->port[0],
 		    s->src.seqhi, s->src.seqlo + 1,
 		    TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
 	}
 
 	LIST_REMOVE(s, entry);
 	pf_src_tree_remove_state(s);
 
 	if (pfsync_delete_state_ptr != NULL)
 		pfsync_delete_state_ptr(s);
 
 	STATE_DEC_COUNTERS(s);
 
 	s->timeout = PFTM_UNLINKED;
 
 	PF_HASHROW_UNLOCK(ih);
 
 	pf_detach_state(s);
 	refcount_release(&s->refs);
 
 	return (pf_release_state(s));
 }
 
 void
 pf_free_state(struct pf_state *cur)
 {
 
 	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
 	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
 	    cur->timeout));
 
 	pf_normalize_tcp_cleanup(cur);
 	uma_zfree(V_pf_state_z, cur);
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
 }
 
 /*
  * Called only from pf_purge_thread(), thus serialized.
  */
 static u_int
 pf_purge_expired_states(u_int i, int maxcheck)
 {
 	struct pf_idhash *ih;
 	struct pf_state *s;
 
 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 
 	/*
 	 * Go through hash and unlink states that expire now.
 	 */
 	while (maxcheck > 0) {
 
 		ih = &V_pf_idhash[i];
 relock:
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
 			if (pf_state_expires(s) <= time_uptime) {
 				V_pf_status.states -=
 				    pf_unlink_state(s, PF_ENTER_LOCKED);
 				goto relock;
 			}
 			s->rule.ptr->rule_flag |= PFRULE_REFS;
 			if (s->nat_rule.ptr != NULL)
 				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
 			if (s->anchor.ptr != NULL)
 				s->anchor.ptr->rule_flag |= PFRULE_REFS;
 			s->kif->pfik_flags |= PFI_IFLAG_REFS;
 			if (s->rt_kif)
 				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
 		}
 		PF_HASHROW_UNLOCK(ih);
 
 		/* Return when we hit end of hash. */
 		if (++i > pf_hashmask) {
 			V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 			return (0);
 		}
 
 		maxcheck--;
 	}
 
 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 
 	return (i);
 }
 
 static void
 pf_purge_unlinked_rules()
 {
 	struct pf_rulequeue tmpq;
 	struct pf_rule *r, *r1;
 
 	/*
 	 * If we have overloading task pending, then we'd
 	 * better skip purging this time. There is a tiny
 	 * probability that overloading task references
 	 * an already unlinked rule.
 	 */
 	PF_OVERLOADQ_LOCK();
 	if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
 		PF_OVERLOADQ_UNLOCK();
 		return;
 	}
 	PF_OVERLOADQ_UNLOCK();
 
 	/*
 	 * Do naive mark-and-sweep garbage collecting of old rules.
 	 * Reference flag is raised by pf_purge_expired_states()
 	 * and pf_purge_expired_src_nodes().
 	 *
 	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
 	 * use a temporary queue.
 	 */
 	TAILQ_INIT(&tmpq);
 	PF_UNLNKDRULES_LOCK();
 	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
 		if (!(r->rule_flag & PFRULE_REFS)) {
 			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
 			TAILQ_INSERT_TAIL(&tmpq, r, entries);
 		} else
 			r->rule_flag &= ~PFRULE_REFS;
 	}
 	PF_UNLNKDRULES_UNLOCK();
 
 	if (!TAILQ_EMPTY(&tmpq)) {
 		PF_RULES_WLOCK();
 		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
 			TAILQ_REMOVE(&tmpq, r, entries);
 			pf_free_rule(r);
 		}
 		PF_RULES_WUNLOCK();
 	}
 }
 
 void
 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t a = ntohl(addr->addr32[0]);
 		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
 		    (a>>8)&255, a&255);
 		if (p) {
 			p = ntohs(p);
 			printf(":%u", p);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		u_int16_t b;
 		u_int8_t i, curstart, curend, maxstart, maxend;
 		curstart = curend = maxstart = maxend = 255;
 		for (i = 0; i < 8; i++) {
 			if (!addr->addr16[i]) {
 				if (curstart == 255)
 					curstart = i;
 				curend = i;
 			} else {
 				if ((curend - curstart) >
 				    (maxend - maxstart)) {
 					maxstart = curstart;
 					maxend = curend;
 				}
 				curstart = curend = 255;
 			}
 		}
 		if ((curend - curstart) >
 		    (maxend - maxstart)) {
 			maxstart = curstart;
 			maxend = curend;
 		}
 		for (i = 0; i < 8; i++) {
 			if (i >= maxstart && i <= maxend) {
 				if (i == 0)
 					printf(":");
 				if (i == maxend)
 					printf(":");
 			} else {
 				b = ntohs(addr->addr16[i]);
 				printf("%x", b);
 				if (i < 7)
 					printf(":");
 			}
 		}
 		if (p) {
 			p = ntohs(p);
 			printf("[%u]", p);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 }
 
 void
 pf_print_state(struct pf_state *s)
 {
 	pf_print_state_parts(s, NULL, NULL);
 }
 
 static void
 pf_print_state_parts(struct pf_state *s,
     struct pf_state_key *skwp, struct pf_state_key *sksp)
 {
 	struct pf_state_key *skw, *sks;
 	u_int8_t proto, dir;
 
 	/* Do our best to fill these, but they're skipped if NULL */
 	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
 	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
 	proto = skw ? skw->proto : (sks ? sks->proto : 0);
 	dir = s ? s->direction : 0;
 
 	switch (proto) {
 	case IPPROTO_IPV4:
 		printf("IPv4");
 		break;
 	case IPPROTO_IPV6:
 		printf("IPv6");
 		break;
 	case IPPROTO_TCP:
 		printf("TCP");
 		break;
 	case IPPROTO_UDP:
 		printf("UDP");
 		break;
 	case IPPROTO_ICMP:
 		printf("ICMP");
 		break;
 	case IPPROTO_ICMPV6:
 		printf("ICMPv6");
 		break;
 	default:
 		printf("%u", skw->proto);
 		break;
 	}
 	switch (dir) {
 	case PF_IN:
 		printf(" in");
 		break;
 	case PF_OUT:
 		printf(" out");
 		break;
 	}
 	if (skw) {
 		printf(" wire: ");
 		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
 		printf(" ");
 		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
 	}
 	if (sks) {
 		printf(" stack: ");
 		if (sks != skw) {
 			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
 			printf(" ");
 			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
 		} else
 			printf("-");
 	}
 	if (s) {
 		if (proto == IPPROTO_TCP) {
 			printf(" [lo=%u high=%u win=%u modulator=%u",
 			    s->src.seqlo, s->src.seqhi,
 			    s->src.max_win, s->src.seqdiff);
 			if (s->src.wscale && s->dst.wscale)
 				printf(" wscale=%u",
 				    s->src.wscale & PF_WSCALE_MASK);
 			printf("]");
 			printf(" [lo=%u high=%u win=%u modulator=%u",
 			    s->dst.seqlo, s->dst.seqhi,
 			    s->dst.max_win, s->dst.seqdiff);
 			if (s->src.wscale && s->dst.wscale)
 				printf(" wscale=%u",
 				s->dst.wscale & PF_WSCALE_MASK);
 			printf("]");
 		}
 		printf(" %u:%u", s->src.state, s->dst.state);
 	}
 }
 
 void
 pf_print_flags(u_int8_t f)
 {
 	if (f)
 		printf(" ");
 	if (f & TH_FIN)
 		printf("F");
 	if (f & TH_SYN)
 		printf("S");
 	if (f & TH_RST)
 		printf("R");
 	if (f & TH_PUSH)
 		printf("P");
 	if (f & TH_ACK)
 		printf("A");
 	if (f & TH_URG)
 		printf("U");
 	if (f & TH_ECE)
 		printf("E");
 	if (f & TH_CWR)
 		printf("W");
 }
 
 #define	PF_SET_SKIP_STEPS(i)					\
 	do {							\
 		while (head[i] != cur) {			\
 			head[i]->skip[i].ptr = cur;		\
 			head[i] = TAILQ_NEXT(head[i], entries);	\
 		}						\
 	} while (0)
 
 void
 pf_calc_skip_steps(struct pf_rulequeue *rules)
 {
 	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
 	int i;
 
 	cur = TAILQ_FIRST(rules);
 	prev = cur;
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		head[i] = cur;
 	while (cur != NULL) {
 
 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
 		if (cur->direction != prev->direction)
 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
 		if (cur->af != prev->af)
 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
 		if (cur->proto != prev->proto)
 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
 		if (cur->src.neg != prev->src.neg ||
 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
 		if (cur->src.port[0] != prev->src.port[0] ||
 		    cur->src.port[1] != prev->src.port[1] ||
 		    cur->src.port_op != prev->src.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
 		if (cur->dst.neg != prev->dst.neg ||
 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
 		if (cur->dst.port[0] != prev->dst.port[0] ||
 		    cur->dst.port[1] != prev->dst.port[1] ||
 		    cur->dst.port_op != prev->dst.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
 
 		prev = cur;
 		cur = TAILQ_NEXT(cur, entries);
 	}
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		PF_SET_SKIP_STEPS(i);
 }
 
 static int
 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
 {
 	if (aw1->type != aw2->type)
 		return (1);
 	switch (aw1->type) {
 	case PF_ADDR_ADDRMASK:
 	case PF_ADDR_RANGE:
 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
 			return (1);
 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
 			return (1);
 		return (0);
 	case PF_ADDR_DYNIFTL:
 		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
 	case PF_ADDR_NOROUTE:
 	case PF_ADDR_URPFFAILED:
 		return (0);
 	case PF_ADDR_TABLE:
 		return (aw1->p.tbl != aw2->p.tbl);
 	default:
 		printf("invalid address type: %d\n", aw1->type);
 		return (1);
 	}
 }
 
 /**
  * Checksum updates are a little complicated because the checksum in the TCP/UDP
  * header isn't always a full checksum. In some cases (i.e. output) it's a
  * pseudo-header checksum, which is a partial checksum over src/dst IP
  * addresses, protocol number and length.
  *
  * That means we have the following cases:
  *  * Input or forwarding: we don't have TSO, the checksum fields are full
  *  	checksums, we need to update the checksum whenever we change anything.
  *  * Output (i.e. the checksum is a pseudo-header checksum):
  *  	x The field being updated is src/dst address or affects the length of
  *  	the packet. We need to update the pseudo-header checksum (note that this
  *  	checksum is not ones' complement).
  *  	x Some other field is being modified (e.g. src/dst port numbers): We
  *  	don't have to update anything.
  **/
 u_int16_t
 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
 {
 	u_int32_t	l;
 
 	if (udp && !cksum)
 		return (0x0000);
 	l = cksum + old - new;
 	l = (l >> 16) + (l & 65535);
 	l = l & 65535;
 	if (udp && !l)
 		return (0xFFFF);
 	return (l);
 }
 
 u_int16_t
 pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
         u_int16_t new, u_int8_t udp)
 {
 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
 		return (cksum);
 
 	return (pf_cksum_fixup(cksum, old, new, udp));
 }
 
 static void
 pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
         u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
         sa_family_t af)
 {
 	struct pf_addr	ao;
 	u_int16_t	po = *p;
 
 	PF_ACPY(&ao, a, af);
 	PF_ACPY(a, an, af);
 
 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
 		*pc = ~*pc;
 
 	*p = pn;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    ao.addr16[0], an->addr16[0], 0),
 		    ao.addr16[1], an->addr16[1], 0);
 		*p = pn;
 
 		*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u);
 
 		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u),
 		    ao.addr16[2], an->addr16[2], u),
 		    ao.addr16[3], an->addr16[3], u),
 		    ao.addr16[4], an->addr16[4], u),
 		    ao.addr16[5], an->addr16[5], u),
 		    ao.addr16[6], an->addr16[6], u),
 		    ao.addr16[7], an->addr16[7], u);
 
 		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
 		break;
 #endif /* INET6 */
 	}
 
 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | 
 	    CSUM_DELAY_DATA_IPV6)) {
 		*pc = ~*pc;
 		if (! *pc)
 			*pc = 0xffff;
 	}
 }
 
 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
 void
 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
 {
 	u_int32_t	ao;
 
 	memcpy(&ao, a, sizeof(ao));
 	memcpy(a, &an, sizeof(u_int32_t));
 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
 	    ao % 65536, an % 65536, u);
 }
 
 void
 pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
 {
 	u_int32_t	ao;
 
 	memcpy(&ao, a, sizeof(ao));
 	memcpy(a, &an, sizeof(u_int32_t));
 
 	*c = pf_proto_cksum_fixup(m,
 	    pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
 	    ao % 65536, an % 65536, udp);
 }
 
 #ifdef INET6
 static void
 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
 {
 	struct pf_addr	ao;
 
 	PF_ACPY(&ao, a, AF_INET6);
 	PF_ACPY(a, an, AF_INET6);
 
 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(*c,
 	    ao.addr16[0], an->addr16[0], u),
 	    ao.addr16[1], an->addr16[1], u),
 	    ao.addr16[2], an->addr16[2], u),
 	    ao.addr16[3], an->addr16[3], u),
 	    ao.addr16[4], an->addr16[4], u),
 	    ao.addr16[5], an->addr16[5], u),
 	    ao.addr16[6], an->addr16[6], u),
 	    ao.addr16[7], an->addr16[7], u);
 }
 #endif /* INET6 */
 
 static void
 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
 {
 	struct pf_addr	oia, ooa;
 
 	PF_ACPY(&oia, ia, af);
 	if (oa)
 		PF_ACPY(&ooa, oa, af);
 
 	/* Change inner protocol port, fix inner protocol checksum. */
 	if (ip != NULL) {
 		u_int16_t	oip = *ip;
 		u_int32_t	opc;
 
 		if (pc != NULL)
 			opc = *pc;
 		*ip = np;
 		if (pc != NULL)
 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
 		if (pc != NULL)
 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
 	}
 	/* Change inner ip address, fix inner ip and icmp checksums. */
 	PF_ACPY(ia, na, af);
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t	 oh2c = *h2c;
 
 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], u),
 		    oia.addr16[1], ia->addr16[1], u),
 		    oia.addr16[2], ia->addr16[2], u),
 		    oia.addr16[3], ia->addr16[3], u),
 		    oia.addr16[4], ia->addr16[4], u),
 		    oia.addr16[5], ia->addr16[5], u),
 		    oia.addr16[6], ia->addr16[6], u),
 		    oia.addr16[7], ia->addr16[7], u);
 		break;
 #endif /* INET6 */
 	}
 	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
 	if (oa) {
 		PF_ACPY(oa, na, af);
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
 			    ooa.addr16[0], oa->addr16[0], 0),
 			    ooa.addr16[1], oa->addr16[1], 0);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 			    pf_cksum_fixup(pf_cksum_fixup(*ic,
 			    ooa.addr16[0], oa->addr16[0], u),
 			    ooa.addr16[1], oa->addr16[1], u),
 			    ooa.addr16[2], oa->addr16[2], u),
 			    ooa.addr16[3], oa->addr16[3], u),
 			    ooa.addr16[4], oa->addr16[4], u),
 			    ooa.addr16[5], oa->addr16[5], u),
 			    ooa.addr16[6], oa->addr16[6], u),
 			    ooa.addr16[7], oa->addr16[7], u);
 			break;
 #endif /* INET6 */
 		}
 	}
 }
 
 
 /*
  * Need to modulate the sequence numbers in the TCP SACK option
  * (credits to Krzysztof Pfaff for report and patch)
  */
 static int
 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
     struct tcphdr *th, struct pf_state_peer *dst)
 {
 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
 	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
 	int copyback = 0, i, olen;
 	struct sackblk sack;
 
 #define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
 	if (hlen < TCPOLEN_SACKLEN ||
 	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
 		return 0;
 
 	while (hlen >= TCPOLEN_SACKLEN) {
 		olen = opt[1];
 		switch (*opt) {
 		case TCPOPT_EOL:	/* FALLTHROUGH */
 		case TCPOPT_NOP:
 			opt++;
 			hlen--;
 			break;
 		case TCPOPT_SACK:
 			if (olen > hlen)
 				olen = hlen;
 			if (olen >= TCPOLEN_SACKLEN) {
 				for (i = 2; i + TCPOLEN_SACK <= olen;
 				    i += TCPOLEN_SACK) {
 					memcpy(&sack, &opt[i], sizeof(sack));
 					pf_change_proto_a(m, &sack.start, &th->th_sum,
 					    htonl(ntohl(sack.start) - dst->seqdiff), 0);
 					pf_change_proto_a(m, &sack.end, &th->th_sum,
 					    htonl(ntohl(sack.end) - dst->seqdiff), 0);
 					memcpy(&opt[i], &sack, sizeof(sack));
 				}
 				copyback = 1;
 			}
 			/* FALLTHROUGH */
 		default:
 			if (olen < 2)
 				olen = 2;
 			hlen -= olen;
 			opt += olen;
 		}
 	}
 
 	if (copyback)
 		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
 	return (copyback);
 }
 
 static void
 pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
     const struct pf_addr *saddr, const struct pf_addr *daddr,
     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
     u_int16_t rtag, struct ifnet *ifp)
 {
 	struct pf_send_entry *pfse;
 	struct mbuf	*m;
 	int		 len, tlen;
 #ifdef INET
 	struct ip	*h = NULL;
 #endif /* INET */
 #ifdef INET6
 	struct ip6_hdr	*h6 = NULL;
 #endif /* INET6 */
 	struct tcphdr	*th;
 	char		*opt;
 	struct pf_mtag  *pf_mtag;
 
 	len = 0;
 	th = NULL;
 
 	/* maximum segment size tcp option */
 	tlen = sizeof(struct tcphdr);
 	if (mss)
 		tlen += 4;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		len = sizeof(struct ip) + tlen;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		len = sizeof(struct ip6_hdr) + tlen;
 		break;
 #endif /* INET6 */
 	default:
 		panic("%s: unsupported af %d", __func__, af);
 	}
 
 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
 	if (pfse == NULL)
 		return;
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 #ifdef MAC
 	mac_netinet_firewall_send(m);
 #endif
 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
 		free(pfse, M_PFTEMP);
 		m_freem(m);
 		return;
 	}
 	if (tag)
 		m->m_flags |= M_SKIP_FIREWALL;
 	pf_mtag->tag = rtag;
 
 	if (r != NULL && r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (r != NULL && r->qid) {
 		pf_mtag->qid = r->qid;
 
 		/* add hints for ecn */
 		pf_mtag->hdr = mtod(m, struct ip *);
 	}
 #endif /* ALTQ */
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		h = mtod(m, struct ip *);
 
 		/* IP header fields included in the TCP checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(tlen);
 		h->ip_src.s_addr = saddr->v4.s_addr;
 		h->ip_dst.s_addr = daddr->v4.s_addr;
 
 		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* IP header fields included in the TCP checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(tlen);
 		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
 		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
 
 		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
 		break;
 #endif /* INET6 */
 	}
 
 	/* TCP header */
 	th->th_sport = sport;
 	th->th_dport = dport;
 	th->th_seq = htonl(seq);
 	th->th_ack = htonl(ack);
 	th->th_off = tlen >> 2;
 	th->th_flags = flags;
 	th->th_win = htons(win);
 
 	if (mss) {
 		opt = (char *)(th + 1);
 		opt[0] = TCPOPT_MAXSEG;
 		opt[1] = 4;
 		HTONS(mss);
 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
 	}
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		/* TCP checksum */
 		th->th_sum = in_cksum(m, len);
 
 		/* Finish the IP header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
 		h->ip_len = htons(len);
 		h->ip_ttl = ttl ? ttl : V_ip_defttl;
 		h->ip_sum = 0;
 
 		pfse->pfse_type = PFSE_IP;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		/* TCP checksum */
 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
 		    sizeof(struct ip6_hdr), tlen);
 
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 
 		pfse->pfse_type = PFSE_IP6;
 		break;
 #endif /* INET6 */
 	}
 	pfse->pfse_m = m;
 	pf_send(pfse);
 }
 
 static void
 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
     struct pf_rule *r)
 {
 	struct pf_send_entry *pfse;
 	struct mbuf *m0;
 	struct pf_mtag *pf_mtag;
 
 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
 	if (pfse == NULL)
 		return;
 
 	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 
 	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 	/* XXX: revisit */
 	m0->m_flags |= M_SKIP_FIREWALL;
 
 	if (r->rtableid >= 0)
 		M_SETFIB(m0, r->rtableid);
 
 #ifdef ALTQ
 	if (r->qid) {
 		pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pf_mtag->hdr = mtod(m0, struct ip *);
 	}
 #endif /* ALTQ */
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		pfse->pfse_type = PFSE_ICMP;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		pfse->pfse_type = PFSE_ICMP6;
 		break;
 #endif /* INET6 */
 	}
 	pfse->pfse_m = m0;
 	pfse->icmpopts.type = type;
 	pfse->icmpopts.code = code;
 	pf_send(pfse);
 }
 
 /*
  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
  * If n is 0, they match if they are equal. If n is != 0, they match if they
  * are different.
  */
 int
 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
     struct pf_addr *b, sa_family_t af)
 {
 	int	match = 0;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if ((a->addr32[0] & m->addr32[0]) ==
 		    (b->addr32[0] & m->addr32[0]))
 			match++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (((a->addr32[0] & m->addr32[0]) ==
 		     (b->addr32[0] & m->addr32[0])) &&
 		    ((a->addr32[1] & m->addr32[1]) ==
 		     (b->addr32[1] & m->addr32[1])) &&
 		    ((a->addr32[2] & m->addr32[2]) ==
 		     (b->addr32[2] & m->addr32[2])) &&
 		    ((a->addr32[3] & m->addr32[3]) ==
 		     (b->addr32[3] & m->addr32[3])))
 			match++;
 		break;
 #endif /* INET6 */
 	}
 	if (match) {
 		if (n)
 			return (0);
 		else
 			return (1);
 	} else {
 		if (n)
 			return (1);
 		else
 			return (0);
 	}
 }
 
 /*
  * Return 1 if b <= a <= e, otherwise return 0.
  */
 int
 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
     struct pf_addr *a, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if ((a->addr32[0] < b->addr32[0]) ||
 		    (a->addr32[0] > e->addr32[0]))
 			return (0);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		int	i;
 
 		/* check a >= b */
 		for (i = 0; i < 4; ++i)
 			if (a->addr32[i] > b->addr32[i])
 				break;
 			else if (a->addr32[i] < b->addr32[i])
 				return (0);
 		/* check a <= e */
 		for (i = 0; i < 4; ++i)
 			if (a->addr32[i] < e->addr32[i])
 				break;
 			else if (a->addr32[i] > e->addr32[i])
 				return (0);
 		break;
 	}
 #endif /* INET6 */
 	}
 	return (1);
 }
 
 static int
 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
 {
 	switch (op) {
 	case PF_OP_IRG:
 		return ((p > a1) && (p < a2));
 	case PF_OP_XRG:
 		return ((p < a1) || (p > a2));
 	case PF_OP_RRG:
 		return ((p >= a1) && (p <= a2));
 	case PF_OP_EQ:
 		return (p == a1);
 	case PF_OP_NE:
 		return (p != a1);
 	case PF_OP_LT:
 		return (p < a1);
 	case PF_OP_LE:
 		return (p <= a1);
 	case PF_OP_GT:
 		return (p > a1);
 	case PF_OP_GE:
 		return (p >= a1);
 	}
 	return (0); /* never reached */
 }
 
 int
 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
 {
 	NTOHS(a1);
 	NTOHS(a2);
 	NTOHS(p);
 	return (pf_match(op, a1, a2, p));
 }
 
 static int
 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
 {
 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, u));
 }
 
 static int
 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
 {
 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, g));
 }
 
 int
 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
 {
 	if (*tag == -1)
 		*tag = mtag;
 
 	return ((!r->match_tag_not && r->match_tag == *tag) ||
 	    (r->match_tag_not && r->match_tag != *tag));
 }
 
 int
 pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
 {
 
 	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
 
 	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
 		return (ENOMEM);
 
 	pd->pf_mtag->tag = tag;
 
 	return (0);
 }
 
 #define	PF_ANCHOR_STACKSIZE	32
 struct pf_anchor_stackframe {
 	struct pf_ruleset	*rs;
 	struct pf_rule		*r;	/* XXX: + match bit */
 	struct pf_anchor	*child;
 };
 
 /*
  * XXX: We rely on malloc(9) returning pointer aligned addresses.
  */
 #define	PF_ANCHORSTACK_MATCH	0x00000001
 #define	PF_ANCHORSTACK_MASK	(PF_ANCHORSTACK_MATCH)
 
 #define	PF_ANCHOR_MATCH(f)	((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
 #define	PF_ANCHOR_RULE(f)	(struct pf_rule *)			\
 				((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
 #define	PF_ANCHOR_SET_MATCH(f)	do { (f)->r = (void *) 			\
 				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
 } while (0)
 
 void
 pf_step_into_anchor(struct pf_anchor_stackframe *stack, int *depth,
     struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
     int *match)
 {
 	struct pf_anchor_stackframe	*f;
 
 	PF_RULES_RASSERT();
 
 	if (match)
 		*match = 0;
 	if (*depth >= PF_ANCHOR_STACKSIZE) {
 		printf("%s: anchor stack overflow on %s\n",
 		    __func__, (*r)->anchor->name);
 		*r = TAILQ_NEXT(*r, entries);
 		return;
 	} else if (*depth == 0 && a != NULL)
 		*a = *r;
 	f = stack + (*depth)++;
 	f->rs = *rs;
 	f->r = *r;
 	if ((*r)->anchor_wildcard) {
 		struct pf_anchor_node *parent = &(*r)->anchor->children;
 
 		if ((f->child = RB_MIN(pf_anchor_node, parent)) == NULL) {
 			*r = NULL;
 			return;
 		}
 		*rs = &f->child->ruleset;
 	} else {
 		f->child = NULL;
 		*rs = &(*r)->anchor->ruleset;
 	}
 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 }
 
 int
 pf_step_out_of_anchor(struct pf_anchor_stackframe *stack, int *depth,
     struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
     int *match)
 {
 	struct pf_anchor_stackframe	*f;
 	struct pf_rule *fr;
 	int quick = 0;
 
 	PF_RULES_RASSERT();
 
 	do {
 		if (*depth <= 0)
 			break;
 		f = stack + *depth - 1;
 		fr = PF_ANCHOR_RULE(f);
 		if (f->child != NULL) {
 			struct pf_anchor_node *parent;
 
 			/*
 			 * This block traverses through
 			 * a wildcard anchor.
 			 */
 			parent = &fr->anchor->children;
 			if (match != NULL && *match) {
 				/*
 				 * If any of "*" matched, then
 				 * "foo/ *" matched, mark frame
 				 * appropriately.
 				 */
 				PF_ANCHOR_SET_MATCH(f);
 				*match = 0;
 			}
 			f->child = RB_NEXT(pf_anchor_node, parent, f->child);
 			if (f->child != NULL) {
 				*rs = &f->child->ruleset;
 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 				if (*r == NULL)
 					continue;
 				else
 					break;
 			}
 		}
 		(*depth)--;
 		if (*depth == 0 && a != NULL)
 			*a = NULL;
 		*rs = f->rs;
 		if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
 			quick = fr->quick;
 		*r = TAILQ_NEXT(fr, entries);
 	} while (*r == NULL);
 
 	return (quick);
 }
 
 #ifdef INET6
 void
 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
 		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
 		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
 		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
 		break;
 	}
 }
 
 void
 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		if (addr->addr32[3] == 0xffffffff) {
 			addr->addr32[3] = 0;
 			if (addr->addr32[2] == 0xffffffff) {
 				addr->addr32[2] = 0;
 				if (addr->addr32[1] == 0xffffffff) {
 					addr->addr32[1] = 0;
 					addr->addr32[0] =
 					    htonl(ntohl(addr->addr32[0]) + 1);
 				} else
 					addr->addr32[1] =
 					    htonl(ntohl(addr->addr32[1]) + 1);
 			} else
 				addr->addr32[2] =
 				    htonl(ntohl(addr->addr32[2]) + 1);
 		} else
 			addr->addr32[3] =
 			    htonl(ntohl(addr->addr32[3]) + 1);
 		break;
 	}
 }
 #endif /* INET6 */
 
 int
 pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
 {
 	struct pf_addr		*saddr, *daddr;
 	u_int16_t		 sport, dport;
 	struct inpcbinfo	*pi;
 	struct inpcb		*inp;
 
 	pd->lookup.uid = UID_MAX;
 	pd->lookup.gid = GID_MAX;
 
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		if (pd->hdr.tcp == NULL)
 			return (-1);
 		sport = pd->hdr.tcp->th_sport;
 		dport = pd->hdr.tcp->th_dport;
 		pi = &V_tcbinfo;
 		break;
 	case IPPROTO_UDP:
 		if (pd->hdr.udp == NULL)
 			return (-1);
 		sport = pd->hdr.udp->uh_sport;
 		dport = pd->hdr.udp->uh_dport;
 		pi = &V_udbinfo;
 		break;
 	default:
 		return (-1);
 	}
 	if (direction == PF_IN) {
 		saddr = pd->src;
 		daddr = pd->dst;
 	} else {
 		u_int16_t	p;
 
 		p = sport;
 		sport = dport;
 		dport = p;
 		saddr = pd->dst;
 		daddr = pd->src;
 	}
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
 		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
 		if (inp == NULL) {
 			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
 			   daddr->v4, dport, INPLOOKUP_WILDCARD |
 			   INPLOOKUP_RLOCKPCB, NULL, m);
 			if (inp == NULL)
 				return (-1);
 		}
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
 		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
 		if (inp == NULL) {
 			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
 			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, NULL, m);
 			if (inp == NULL)
 				return (-1);
 		}
 		break;
 #endif /* INET6 */
 
 	default:
 		return (-1);
 	}
 	INP_RLOCK_ASSERT(inp);
 	pd->lookup.uid = inp->inp_cred->cr_uid;
 	pd->lookup.gid = inp->inp_cred->cr_groups[0];
 	INP_RUNLOCK(inp);
 
 	return (1);
 }
 
 static u_int8_t
 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int8_t	 wscale = 0;
 
 	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= 3) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_WINDOW:
 			wscale = opt[2];
 			if (wscale > TCP_MAX_WINSHIFT)
 				wscale = TCP_MAX_WINSHIFT;
 			wscale |= PF_WSCALE_FLAG;
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (wscale);
 }
 
 static u_int16_t
 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int16_t	 mss = V_tcp_mssdflt;
 
 	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= TCPOLEN_MAXSEG) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_MAXSEG:
 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
 			NTOHS(mss);
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (mss);
 }
 
 static u_int16_t
 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
 {
 #ifdef INET
 	struct sockaddr_in	*dst;
 	struct route		 ro;
 #endif /* INET */
 #ifdef INET6
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro6;
 #endif /* INET6 */
 	struct rtentry		*rt = NULL;
 	int			 hlen = 0;
 	u_int16_t		 mss = V_tcp_mssdflt;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		hlen = sizeof(struct ip);
 		bzero(&ro, sizeof(ro));
 		dst = (struct sockaddr_in *)&ro.ro_dst;
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 		in_rtalloc_ign(&ro, 0, rtableid);
 		rt = ro.ro_rt;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		hlen = sizeof(struct ip6_hdr);
 		bzero(&ro6, sizeof(ro6));
 		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 		in6_rtalloc_ign(&ro6, 0, rtableid);
 		rt = ro6.ro_rt;
 		break;
 #endif /* INET6 */
 	}
 
 	if (rt && rt->rt_ifp) {
 		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
 		mss = max(V_tcp_mssdflt, mss);
 		RTFREE(rt);
 	}
 	mss = min(mss, offer);
 	mss = max(mss, 64);		/* sanity - at least max opt space */
 	return (mss);
 }
 
 static u_int32_t
 pf_tcp_iss(struct pf_pdesc *pd)
 {
 	MD5_CTX ctx;
 	u_int32_t digest[4];
 
 	if (V_pf_tcp_secret_init == 0) {
 		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
 		MD5Init(&V_pf_tcp_secret_ctx);
 		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
 		    sizeof(V_pf_tcp_secret));
 		V_pf_tcp_secret_init = 1;
 	}
 
 	ctx = V_pf_tcp_secret_ctx;
 
 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
 	if (pd->af == AF_INET6) {
 		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
 		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
 	} else {
 		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
 		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
 	}
 	MD5Final((u_char *)digest, &ctx);
 	V_pf_tcp_iss_off += 4096;
 #define	ISN_RANDOM_INCREMENT (4096 - 1)
 	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
 	    V_pf_tcp_iss_off);
 #undef	ISN_RANDOM_INCREMENT
 }
 
 static int
 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
     struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
 {
 	struct pf_rule		*nr = NULL;
 	struct pf_addr		* const saddr = pd->src;
 	struct pf_addr		* const daddr = pd->dst;
 	sa_family_t		 af = pd->af;
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_src_node	*nsn = NULL;
 	struct tcphdr		*th = pd->hdr.tcp;
 	struct pf_state_key	*sk = NULL, *nk = NULL;
 	u_short			 reason;
 	int			 rewrite = 0, hdrlen = 0;
 	int			 tag = -1, rtableid = -1;
 	int			 asd = 0;
 	int			 match = 0;
 	int			 state_icmp = 0;
 	u_int16_t		 sport = 0, dport = 0;
 	u_int16_t		 bproto_sum = 0, bip_sum = 0;
 	u_int8_t		 icmptype = 0, icmpcode = 0;
 	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
 
 	PF_RULES_RASSERT();
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		pd->lookup.uid = inp->inp_cred->cr_uid;
 		pd->lookup.gid = inp->inp_cred->cr_groups[0];
 		pd->lookup.done = 1;
 	}
 
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		sport = th->th_sport;
 		dport = th->th_dport;
 		hdrlen = sizeof(*th);
 		break;
 	case IPPROTO_UDP:
 		sport = pd->hdr.udp->uh_sport;
 		dport = pd->hdr.udp->uh_dport;
 		hdrlen = sizeof(*pd->hdr.udp);
 		break;
 #ifdef INET
 	case IPPROTO_ICMP:
 		if (pd->af != AF_INET)
 			break;
 		sport = dport = pd->hdr.icmp->icmp_id;
 		hdrlen = sizeof(*pd->hdr.icmp);
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpcode = pd->hdr.icmp->icmp_code;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		if (af != AF_INET6)
 			break;
 		sport = dport = pd->hdr.icmp6->icmp6_id;
 		hdrlen = sizeof(*pd->hdr.icmp6);
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpcode = pd->hdr.icmp6->icmp6_code;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	default:
 		sport = dport = hdrlen = 0;
 		break;
 	}
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
 	/* check packet for BINAT/NAT/RDR */
 	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
 	    &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) {
 		KASSERT(sk != NULL, ("%s: null sk", __func__));
 		KASSERT(nk != NULL, ("%s: null nk", __func__));
 
 		if (pd->ip_sum)
 			bip_sum = *pd->ip_sum;
 
 		switch (pd->proto) {
 		case IPPROTO_TCP:
 			bproto_sum = th->th_sum;
 			pd->proto_sum = &th->th_sum;
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
 				pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 0, af);
 				pd->sport = &th->th_sport;
 				sport = th->th_sport;
 			}
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
 				pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->didx],
 				    nk->port[pd->didx], 0, af);
 				dport = th->th_dport;
 				pd->dport = &th->th_dport;
 			}
 			rewrite++;
 			break;
 		case IPPROTO_UDP:
 			bproto_sum = pd->hdr.udp->uh_sum;
 			pd->proto_sum = &pd->hdr.udp->uh_sum;
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
 				pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 1, af);
 				sport = pd->hdr.udp->uh_sport;
 				pd->sport = &pd->hdr.udp->uh_sport;
 			}
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
 				pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->didx],
 				    nk->port[pd->didx], 1, af);
 				dport = pd->hdr.udp->uh_dport;
 				pd->dport = &pd->hdr.udp->uh_dport;
 			}
 			rewrite++;
 			break;
 #ifdef INET
 		case IPPROTO_ICMP:
 			nk->port[0] = nk->port[1];
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
 				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
 				    nk->addr[pd->sidx].v4.s_addr, 0);
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
 				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
 				    nk->addr[pd->didx].v4.s_addr, 0);
 
 			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
 				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
 				    pd->hdr.icmp->icmp_cksum, sport,
 				    nk->port[1], 0);
 				pd->hdr.icmp->icmp_id = nk->port[1];
 				pd->sport = &pd->hdr.icmp->icmp_id;
 			}
 			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 			nk->port[0] = nk->port[1];
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
 				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &nk->addr[pd->sidx], 0);
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
 				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &nk->addr[pd->didx], 0);
 			rewrite++;
 			break;
 #endif /* INET */
 		default:
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				if (PF_ANEQ(saddr,
 				    &nk->addr[pd->sidx], AF_INET))
 					pf_change_a(&saddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->sidx].v4.s_addr, 0);
 
 				if (PF_ANEQ(daddr,
 				    &nk->addr[pd->didx], AF_INET))
 					pf_change_a(&daddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->didx].v4.s_addr, 0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (PF_ANEQ(saddr,
 				    &nk->addr[pd->sidx], AF_INET6))
 					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
 
 				if (PF_ANEQ(daddr,
 				    &nk->addr[pd->didx], AF_INET6))
 					PF_ACPY(saddr, &nk->addr[pd->didx], af);
 				break;
 #endif /* INET */
 			}
 			break;
 		}
 		if (nr->natpass)
 			r = NULL;
 		pd->nat_rule = nr;
 	}
 
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		/* tcp/udp only. port_op always 0 in other cases */
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 		    r->src.port[0], r->src.port[1], sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		/* tcp/udp only. port_op always 0 in other cases */
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 		    r->dst.port[0], r->dst.port[1], dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		/* icmp only. type always 0 in other cases */
 		else if (r->type && r->type != icmptype + 1)
 			r = TAILQ_NEXT(r, entries);
 		/* icmp only. type always 0 in other cases */
 		else if (r->code && r->code != icmpcode + 1)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->rule_flag & PFRULE_FRAGMENT)
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_TCP &&
 		    (r->flagset & th->th_flags) != r->flags)
 			r = TAILQ_NEXT(r, entries);
 		/* tcp/udp only. uid.op always 0 in other cases */
 		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
 		    pf_socket_lookup(direction, pd, m), 1)) &&
 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
 		    pd->lookup.uid))
 			r = TAILQ_NEXT(r, entries);
 		/* tcp/udp only. gid.op always 0 in other cases */
 		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
 		    pf_socket_lookup(direction, pd, m), 1)) &&
 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
 		    pd->lookup.gid))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob &&
 		    r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY &&
 		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
 		    pf_osfp_fingerprint(pd, m, off, th),
 		    r->os_fingerprint)))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(anchor_stack, &asd,
 				    &ruleset, PF_RULESET_FILTER, &r, &a,
 				    &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
 		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log || (nr != NULL && nr->log)) {
 		if (rewrite)
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
 		    ruleset, pd, 1);
 	}
 
 	if ((r->action == PF_DROP) &&
 	    ((r->rule_flag & PFRULE_RETURNRST) ||
 	    (r->rule_flag & PFRULE_RETURNICMP) ||
 	    (r->rule_flag & PFRULE_RETURN))) {
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
 			PF_ACPY(daddr, &sk->addr[pd->didx], af);
 			if (pd->sport)
 				*pd->sport = sk->port[pd->sidx];
 			if (pd->dport)
 				*pd->dport = sk->port[pd->didx];
 			if (pd->proto_sum)
 				*pd->proto_sum = bproto_sum;
 			if (pd->ip_sum)
 				*pd->ip_sum = bip_sum;
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		}
 		if (pd->proto == IPPROTO_TCP &&
 		    ((r->rule_flag & PFRULE_RETURNRST) ||
 		    (r->rule_flag & PFRULE_RETURN)) &&
 		    !(th->th_flags & TH_RST)) {
 			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
 			int		 len = 0;
 #ifdef INET
 			struct ip	*h4;
 #endif
 #ifdef INET6
 			struct ip6_hdr	*h6;
 #endif
 
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				h4 = mtod(m, struct ip *);
 				len = ntohs(h4->ip_len) - off;
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				h6 = mtod(m, struct ip6_hdr *);
 				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
 				break;
 #endif
 			}
 
 			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
 				REASON_SET(&reason, PFRES_PROTCKSUM);
 			else {
 				if (th->th_flags & TH_SYN)
 					ack++;
 				if (th->th_flags & TH_FIN)
 					ack++;
 				pf_send_tcp(m, r, af, pd->dst,
 				    pd->src, th->th_dport, th->th_sport,
 				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
 				    r->return_ttl, 1, 0, kif->pfik_ifp);
 			}
 		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
 		    r->return_icmp)
 			pf_send_icmp(m, r->return_icmp >> 8,
 			    r->return_icmp & 255, af, r);
 		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
 		    r->return_icmp6)
 			pf_send_icmp(m, r->return_icmp6 >> 8,
 			    r->return_icmp6 & 255, af, r);
 	}
 
 	if (r->action == PF_DROP)
 		goto cleanup;
 
 	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		goto cleanup;
 	}
 	if (rtableid >= 0)
 		M_SETFIB(m, rtableid);
 
 	if (!state_icmp && (r->keep_state || nr != NULL ||
 	    (pd->flags & PFDESC_TCP_NORM))) {
 		int action;
 		action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
 		    sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
 		    hdrlen);
 		if (action != PF_PASS)
 			return (action);
 	} else {
 		if (sk != NULL)
 			uma_zfree(V_pf_state_key_z, sk);
 		if (nk != NULL)
 			uma_zfree(V_pf_state_key_z, nk);
 	}
 
 	/* copy back packet headers if we performed NAT operations */
 	if (rewrite)
 		m_copyback(m, off, hdrlen, pd->hdr.any);
 
 	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
 	    direction == PF_OUT &&
 	    pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
 		/*
 		 * We want the state created, but we dont
 		 * want to send this in case a partner
 		 * firewall has to know about it to allow
 		 * replies through it.
 		 */
 		return (PF_DEFER);
 
 	return (PF_PASS);
 
 cleanup:
 	if (sk != NULL)
 		uma_zfree(V_pf_state_key_z, sk);
 	if (nk != NULL)
 		uma_zfree(V_pf_state_key_z, nk);
 	return (PF_DROP);
 }
 
 static int
 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
     struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
     struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
     u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
     int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
 {
 	struct pf_state		*s = NULL;
 	struct pf_src_node	*sn = NULL;
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 mss = V_tcp_mssdflt;
 	u_short			 reason;
 
 	/* check maximums */
 	if (r->max_states &&
 	    (counter_u64_fetch(r->states_cur) >= r->max_states)) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
 		REASON_SET(&reason, PFRES_MAXSTATES);
 		return (PF_DROP);
 	}
 	/* src node for filter rule */
 	if ((r->rule_flag & PFRULE_SRCTRACK ||
 	    r->rpool.opts & PF_POOL_STICKYADDR) &&
 	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
 		REASON_SET(&reason, PFRES_SRCLIMIT);
 		goto csfailed;
 	}
 	/* src node for translation rule */
 	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
 	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
 		REASON_SET(&reason, PFRES_SRCLIMIT);
 		goto csfailed;
 	}
 	s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
 	if (s == NULL) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		goto csfailed;
 	}
 	s->rule.ptr = r;
 	s->nat_rule.ptr = nr;
 	s->anchor.ptr = a;
 	STATE_INC_COUNTERS(s);
 	if (r->allow_opts)
 		s->state_flags |= PFSTATE_ALLOWOPTS;
 	if (r->rule_flag & PFRULE_STATESLOPPY)
 		s->state_flags |= PFSTATE_SLOPPY;
 	s->log = r->log & PF_LOG_ALL;
 	s->sync_state = PFSYNC_S_NONE;
 	if (nr != NULL)
 		s->log |= nr->log & PF_LOG_ALL;
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		s->src.seqlo = ntohl(th->th_seq);
 		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
 		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
 		    r->keep_state == PF_STATE_MODULATE) {
 			/* Generate sequence number modulator */
 			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
 			    0)
 				s->src.seqdiff = 1;
 			pf_change_proto_a(m, &th->th_seq, &th->th_sum,
 			    htonl(s->src.seqlo + s->src.seqdiff), 0);
 			*rewrite = 1;
 		} else
 			s->src.seqdiff = 0;
 		if (th->th_flags & TH_SYN) {
 			s->src.seqhi++;
 			s->src.wscale = pf_get_wscale(m, off,
 			    th->th_off, pd->af);
 		}
 		s->src.max_win = MAX(ntohs(th->th_win), 1);
 		if (s->src.wscale & PF_WSCALE_MASK) {
 			/* Remove scale factor from initial window */
 			int win = s->src.max_win;
 			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
 			s->src.max_win = (win - 1) >>
 			    (s->src.wscale & PF_WSCALE_MASK);
 		}
 		if (th->th_flags & TH_FIN)
 			s->src.seqhi++;
 		s->dst.seqhi = 1;
 		s->dst.max_win = 1;
 		s->src.state = TCPS_SYN_SENT;
 		s->dst.state = TCPS_CLOSED;
 		s->timeout = PFTM_TCP_FIRST_PACKET;
 		break;
 	case IPPROTO_UDP:
 		s->src.state = PFUDPS_SINGLE;
 		s->dst.state = PFUDPS_NO_TRAFFIC;
 		s->timeout = PFTM_UDP_FIRST_PACKET;
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif
 		s->timeout = PFTM_ICMP_FIRST_PACKET;
 		break;
 	default:
 		s->src.state = PFOTHERS_SINGLE;
 		s->dst.state = PFOTHERS_NO_TRAFFIC;
 		s->timeout = PFTM_OTHER_FIRST_PACKET;
 	}
 
 	if (r->rt && r->rt != PF_FASTROUTE) {
 		if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) {
 			REASON_SET(&reason, PFRES_MAPFAILED);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			goto csfailed;
 		}
 		s->rt_kif = r->rpool.cur->kif;
 	}
 
 	s->creation = time_uptime;
 	s->expire = time_uptime;
 
 	if (sn != NULL)
 		s->src_node = sn;
 	if (nsn != NULL) {
 		/* XXX We only modify one side for now. */
 		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
 		s->nat_src_node = nsn;
 	}
 	if (pd->proto == IPPROTO_TCP) {
 		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
 		    off, pd, th, &s->src, &s->dst)) {
 			REASON_SET(&reason, PFRES_MEMORY);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			return (PF_DROP);
 		}
 		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
 		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
 		    &s->src, &s->dst, rewrite)) {
 			/* This really shouldn't happen!!! */
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("pf_normalize_tcp_stateful failed on first pkt"));
 			pf_normalize_tcp_cleanup(s);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			return (PF_DROP);
 		}
 	}
 	s->direction = pd->dir;
 
 	/*
 	 * sk/nk could already been setup by pf_get_translation().
 	 */
 	if (nr == NULL) {
 		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
 		    __func__, nr, sk, nk));
 		sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
 		if (sk == NULL)
 			goto csfailed;
 		nk = sk;
 	} else
 		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
 		    __func__, nr, sk, nk));
 
 	/* Swap sk/nk for PF_OUT. */
 	if (pf_state_insert(BOUND_IFACE(r, kif),
 	    (pd->dir == PF_IN) ? sk : nk,
 	    (pd->dir == PF_IN) ? nk : sk, s)) {
 		if (pd->proto == IPPROTO_TCP)
 			pf_normalize_tcp_cleanup(s);
 		REASON_SET(&reason, PFRES_STATEINS);
 		pf_src_tree_remove_state(s);
 		STATE_DEC_COUNTERS(s);
 		uma_zfree(V_pf_state_z, s);
 		return (PF_DROP);
 	} else
 		*sm = s;
 
 	if (tag > 0)
 		s->tag = tag;
 	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
 	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
 		s->src.state = PF_TCPS_PROXY_SRC;
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			struct pf_state_key *skt = s->key[PF_SK_WIRE];
 			if (pd->dir == PF_OUT)
 				skt = s->key[PF_SK_STACK];
 			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
 			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
 			if (pd->sport)
 				*pd->sport = skt->port[pd->sidx];
 			if (pd->dport)
 				*pd->dport = skt->port[pd->didx];
 			if (pd->proto_sum)
 				*pd->proto_sum = bproto_sum;
 			if (pd->ip_sum)
 				*pd->ip_sum = bip_sum;
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		}
 		s->src.seqhi = htonl(arc4random());
 		/* Find mss option */
 		int rtid = M_GETFIB(m);
 		mss = pf_get_mss(m, off, th->th_off, pd->af);
 		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
 		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
 		s->src.mss = mss;
 		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
 		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
 		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
 		REASON_SET(&reason, PFRES_SYNPROXY);
 		return (PF_SYNPROXY_DROP);
 	}
 
 	return (PF_PASS);
 
 csfailed:
 	if (sk != NULL)
 		uma_zfree(V_pf_state_key_z, sk);
 	if (nk != NULL)
 		uma_zfree(V_pf_state_key_z, nk);
 
 	if (sn != NULL) {
 		struct pf_srchash *sh;
 
 		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 		PF_HASHROW_LOCK(sh);
 		if (--sn->states == 0 && sn->expire == 0) {
 			pf_unlink_src_node(sn);
 			uma_zfree(V_pf_sources_z, sn);
 			counter_u64_add(
 			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
 		}
 		PF_HASHROW_UNLOCK(sh);
 	}
 
 	if (nsn != sn && nsn != NULL) {
 		struct pf_srchash *sh;
 
 		sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
 		PF_HASHROW_LOCK(sh);
 		if (--nsn->states == 0 && nsn->expire == 0) {
 			pf_unlink_src_node(nsn);
 			uma_zfree(V_pf_sources_z, nsn);
 			counter_u64_add(
 			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
 		}
 		PF_HASHROW_UNLOCK(sh);
 	}
 
 	return (PF_DROP);
 }
 
 static int
 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
     struct pf_ruleset **rsm)
 {
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	sa_family_t		 af = pd->af;
 	u_short			 reason;
 	int			 tag = -1;
 	int			 asd = 0;
 	int			 match = 0;
 	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY)
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_UDP &&
 		    (r->src.port_op || r->dst.port_op))
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_TCP &&
 		    (r->src.port_op || r->dst.port_op || r->flagset))
 			r = TAILQ_NEXT(r, entries);
 		else if ((pd->proto == IPPROTO_ICMP ||
 		    pd->proto == IPPROTO_ICMPV6) &&
 		    (r->type || r->code))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <=
 		    (arc4random() % (UINT_MAX - 1) + 1))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(anchor_stack, &asd,
 				    &ruleset, PF_RULESET_FILTER, &r, &a,
 				    &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
 		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log)
 		PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
 		    1);
 
 	if (r->action != PF_PASS)
 		return (PF_DROP);
 
 	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
 	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
 	struct pf_pdesc *pd, u_short *reason, int *copyback)
 {
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 win = ntohs(th->th_win);
 	u_int32_t		 ack, end, seq, orig_seq;
 	u_int8_t		 sws, dws;
 	int			 ackskew;
 
 	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
 		sws = src->wscale & PF_WSCALE_MASK;
 		dws = dst->wscale & PF_WSCALE_MASK;
 	} else
 		sws = dws = 0;
 
 	/*
 	 * Sequence tracking algorithm from Guido van Rooij's paper:
 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
 	 *	tcp_filtering.ps
 	 */
 
 	orig_seq = seq = ntohl(th->th_seq);
 	if (src->seqlo == 0) {
 		/* First packet from this end. Set its state */
 
 		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
 		    src->scrub == NULL) {
 			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
 				REASON_SET(reason, PFRES_MEMORY);
 				return (PF_DROP);
 			}
 		}
 
 		/* Deferred generation of sequence number modulator */
 		if (dst->seqdiff && !src->seqdiff) {
 			/* use random iss for the TCP server */
 			while ((src->seqdiff = arc4random() - seq) == 0)
 				;
 			ack = ntohl(th->th_ack) - dst->seqdiff;
 			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		} else {
 			ack = ntohl(th->th_ack);
 		}
 
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN) {
 			end++;
 			if (dst->wscale & PF_WSCALE_FLAG) {
 				src->wscale = pf_get_wscale(m, off, th->th_off,
 				    pd->af);
 				if (src->wscale & PF_WSCALE_FLAG) {
 					/* Remove scale factor from initial
 					 * window */
 					sws = src->wscale & PF_WSCALE_MASK;
 					win = ((u_int32_t)win + (1 << sws) - 1)
 					    >> sws;
 					dws = dst->wscale & PF_WSCALE_MASK;
 				} else {
 					/* fixup other window */
 					dst->max_win <<= dst->wscale &
 					    PF_WSCALE_MASK;
 					/* in case of a retrans SYN|ACK */
 					dst->wscale = 0;
 				}
 			}
 		}
 		if (th->th_flags & TH_FIN)
 			end++;
 
 		src->seqlo = seq;
 		if (src->state < TCPS_SYN_SENT)
 			src->state = TCPS_SYN_SENT;
 
 		/*
 		 * May need to slide the window (seqhi may have been set by
 		 * the crappy stack check or if we picked up the connection
 		 * after establishment)
 		 */
 		if (src->seqhi == 1 ||
 		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
 			src->seqhi = end + MAX(1, dst->max_win << dws);
 		if (win > src->max_win)
 			src->max_win = win;
 
 	} else {
 		ack = ntohl(th->th_ack) - dst->seqdiff;
 		if (src->seqdiff) {
 			/* Modulate sequence numbers */
 			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		}
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN)
 			end++;
 		if (th->th_flags & TH_FIN)
 			end++;
 	}
 
 	if ((th->th_flags & TH_ACK) == 0) {
 		/* Let it pass through the ack skew check */
 		ack = dst->seqlo;
 	} else if ((ack == 0 &&
 	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
 	    /* broken tcp stacks do not set ack */
 	    (dst->state < TCPS_SYN_SENT)) {
 		/*
 		 * Many stacks (ours included) will set the ACK number in an
 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
 		 */
 		ack = dst->seqlo;
 	}
 
 	if (seq == end) {
 		/* Ease sequencing restrictions on no data packets */
 		seq = src->seqlo;
 		end = seq;
 	}
 
 	ackskew = dst->seqlo - ack;
 
 
 	/*
 	 * Need to demodulate the sequence numbers in any TCP SACK options
 	 * (Selective ACK). We could optionally validate the SACK values
 	 * against the current ACK window, either forwards or backwards, but
 	 * I'm not confident that SACK has been implemented properly
 	 * everywhere. It wouldn't surprise me if several stacks accidently
 	 * SACK too far backwards of previously ACKed data. There really aren't
 	 * any security implications of bad SACKing unless the target stack
 	 * doesn't validate the option length correctly. Someone trying to
 	 * spoof into a TCP connection won't bother blindly sending SACK
 	 * options anyway.
 	 */
 	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
 		if (pf_modulate_sack(m, off, pd, th, dst))
 			*copyback = 1;
 	}
 
 
 #define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
 	if (SEQ_GEQ(src->seqhi, end) &&
 	    /* Last octet inside other's window space */
 	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
 	    /* Retrans: not more than one window back */
 	    (ackskew >= -MAXACKWINDOW) &&
 	    /* Acking not more than one reassembled fragment backwards */
 	    (ackskew <= (MAXACKWINDOW << sws)) &&
 	    /* Acking not more than one window forward */
 	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
 	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
 	    (pd->flags & PFDESC_IP_REAS) == 0)) {
 	    /* Require an exact/+1 sequence match on resets when possible */
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 
 		/* update states */
 		if (th->th_flags & TH_SYN)
 			if (src->state < TCPS_SYN_SENT)
 				src->state = TCPS_SYN_SENT;
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_ACK) {
 			if (dst->state == TCPS_SYN_SENT) {
 				dst->state = TCPS_ESTABLISHED;
 				if (src->state == TCPS_ESTABLISHED &&
 				    (*state)->src_node != NULL &&
 				    pf_src_connlimit(state)) {
 					REASON_SET(reason, PFRES_SRCLIMIT);
 					return (PF_DROP);
 				}
 			} else if (dst->state == TCPS_CLOSING)
 				dst->state = TCPS_FIN_WAIT_2;
 		}
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* update expire time */
 		(*state)->expire = time_uptime;
 		if (src->state >= TCPS_FIN_WAIT_2 &&
 		    dst->state >= TCPS_FIN_WAIT_2)
 			(*state)->timeout = PFTM_TCP_CLOSED;
 		else if (src->state >= TCPS_CLOSING &&
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
 		else if (src->state < TCPS_ESTABLISHED ||
 		    dst->state < TCPS_ESTABLISHED)
 			(*state)->timeout = PFTM_TCP_OPENING;
 		else if (src->state >= TCPS_CLOSING ||
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_CLOSING;
 		else
 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
 
 		/* Fall through to PASS packet */
 
 	} else if ((dst->state < TCPS_SYN_SENT ||
 		dst->state >= TCPS_FIN_WAIT_2 ||
 		src->state >= TCPS_FIN_WAIT_2) &&
 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
 	    /* Within a window forward of the originating packet */
 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
 	    /* Within a window backward of the originating packet */
 
 		/*
 		 * This currently handles three situations:
 		 *  1) Stupid stacks will shotgun SYNs before their peer
 		 *     replies.
 		 *  2) When PF catches an already established stream (the
 		 *     firewall rebooted, the state table was flushed, routes
 		 *     changed...)
 		 *  3) Packets get funky immediately after the connection
 		 *     closes (this should catch Solaris spurious ACK|FINs
 		 *     that web servers like to spew after a close)
 		 *
 		 * This must be a little more careful than the above code
 		 * since packet floods will also be caught here. We don't
 		 * update the TTL here to mitigate the damage of a packet
 		 * flood and so the same code can handle awkward establishment
 		 * and a loosened connection close.
 		 * In the establishment case, a correct peer response will
 		 * validate the connection, go through the normal state code
 		 * and keep updating the state TTL.
 		 */
 
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: loose state match: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
 			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1],
 			    pd->dir == PF_IN ? "in" : "out",
 			    pd->dir == (*state)->direction ? "fwd" : "rev");
 		}
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 		/*
 		 * Cannot set dst->seqhi here since this could be a shotgunned
 		 * SYN and not an already established connection.
 		 */
 
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* Fall through to PASS packet */
 
 	} else {
 		if ((*state)->dst.state == TCPS_SYN_SENT &&
 		    (*state)->src.state == TCPS_SYN_SENT) {
 			/* Send RST for state mismatches during handshake */
 			if (!(th->th_flags & TH_RST))
 				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 				    pd->dst, pd->src, th->th_dport,
 				    th->th_sport, ntohl(th->th_ack), 0,
 				    TH_RST, 0, 0,
 				    (*state)->rule.ptr->return_ttl, 1, 0,
 				    kif->pfik_ifp);
 			src->seqlo = 0;
 			src->seqhi = 1;
 			src->max_win = 1;
 		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: BAD state: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu dir=%s,%s\n",
 			    seq, orig_seq, ack, pd->p_len, ackskew,
 			    (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1],
 			    pd->dir == PF_IN ? "in" : "out",
 			    pd->dir == (*state)->direction ? "fwd" : "rev");
 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
 			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
 			    ' ': '2',
 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
 		}
 		REASON_SET(reason, PFRES_BADSTATE);
 		return (PF_DROP);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
 	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
 {
 	struct tcphdr		*th = pd->hdr.tcp;
 
 	if (th->th_flags & TH_SYN)
 		if (src->state < TCPS_SYN_SENT)
 			src->state = TCPS_SYN_SENT;
 	if (th->th_flags & TH_FIN)
 		if (src->state < TCPS_CLOSING)
 			src->state = TCPS_CLOSING;
 	if (th->th_flags & TH_ACK) {
 		if (dst->state == TCPS_SYN_SENT) {
 			dst->state = TCPS_ESTABLISHED;
 			if (src->state == TCPS_ESTABLISHED &&
 			    (*state)->src_node != NULL &&
 			    pf_src_connlimit(state)) {
 				REASON_SET(reason, PFRES_SRCLIMIT);
 				return (PF_DROP);
 			}
 		} else if (dst->state == TCPS_CLOSING) {
 			dst->state = TCPS_FIN_WAIT_2;
 		} else if (src->state == TCPS_SYN_SENT &&
 		    dst->state < TCPS_SYN_SENT) {
 			/*
 			 * Handle a special sloppy case where we only see one
 			 * half of the connection. If there is a ACK after
 			 * the initial SYN without ever seeing a packet from
 			 * the destination, set the connection to established.
 			 */
 			dst->state = src->state = TCPS_ESTABLISHED;
 			if ((*state)->src_node != NULL &&
 			    pf_src_connlimit(state)) {
 				REASON_SET(reason, PFRES_SRCLIMIT);
 				return (PF_DROP);
 			}
 		} else if (src->state == TCPS_CLOSING &&
 		    dst->state == TCPS_ESTABLISHED &&
 		    dst->seqlo == 0) {
 			/*
 			 * Handle the closing of half connections where we
 			 * don't see the full bidirectional FIN/ACK+ACK
 			 * handshake.
 			 */
 			dst->state = TCPS_CLOSING;
 		}
 	}
 	if (th->th_flags & TH_RST)
 		src->state = dst->state = TCPS_TIME_WAIT;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state >= TCPS_FIN_WAIT_2 &&
 	    dst->state >= TCPS_FIN_WAIT_2)
 		(*state)->timeout = PFTM_TCP_CLOSED;
 	else if (src->state >= TCPS_CLOSING &&
 	    dst->state >= TCPS_CLOSING)
 		(*state)->timeout = PFTM_TCP_FIN_WAIT;
 	else if (src->state < TCPS_ESTABLISHED ||
 	    dst->state < TCPS_ESTABLISHED)
 		(*state)->timeout = PFTM_TCP_OPENING;
 	else if (src->state >= TCPS_CLOSING ||
 	    dst->state >= TCPS_CLOSING)
 		(*state)->timeout = PFTM_TCP_CLOSING;
 	else
 		(*state)->timeout = PFTM_TCP_ESTABLISHED;
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
     u_short *reason)
 {
 	struct pf_state_key_cmp	 key;
 	struct tcphdr		*th = pd->hdr.tcp;
 	int			 copyback = 0;
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key	*sk;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = IPPROTO_TCP;
 	if (direction == PF_IN)	{	/* wire side, straight */
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = th->th_sport;
 		key.port[1] = th->th_dport;
 	} else {			/* stack side, reverse */
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = th->th_sport;
 		key.port[0] = th->th_dport;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	sk = (*state)->key[pd->didx];
 
 	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
 		if (direction != (*state)->direction) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 		if (th->th_flags & TH_SYN) {
 			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 			    pd->src, th->th_dport, th->th_sport,
 			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
 			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (!(th->th_flags & TH_ACK) ||
 		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else if ((*state)->src_node != NULL &&
 		    pf_src_connlimit(state)) {
 			REASON_SET(reason, PFRES_SRCLIMIT);
 			return (PF_DROP);
 		} else
 			(*state)->src.state = PF_TCPS_PROXY_DST;
 	}
 	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
 		if (direction == (*state)->direction) {
 			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
 			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
 			if ((*state)->dst.seqhi == 1)
 				(*state)->dst.seqhi = htonl(arc4random());
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
 			    sk->port[pd->sidx], sk->port[pd->didx],
 			    (*state)->dst.seqhi, 0, TH_SYN, 0,
 			    (*state)->src.mss, 0, 0, (*state)->tag, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
 		    (TH_SYN|TH_ACK)) ||
 		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else {
 			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
 			(*state)->dst.seqlo = ntohl(th->th_seq);
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 			    pd->src, th->th_dport, th->th_sport,
 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
 			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
 			    (*state)->tag, NULL);
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
 			    sk->port[pd->sidx], sk->port[pd->didx],
 			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
 			    TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
 			(*state)->src.seqdiff = (*state)->dst.seqhi -
 			    (*state)->src.seqlo;
 			(*state)->dst.seqdiff = (*state)->src.seqhi -
 			    (*state)->dst.seqlo;
 			(*state)->src.seqhi = (*state)->src.seqlo +
 			    (*state)->dst.max_win;
 			(*state)->dst.seqhi = (*state)->dst.seqlo +
 			    (*state)->src.max_win;
 			(*state)->src.wscale = (*state)->dst.wscale = 0;
 			(*state)->src.state = (*state)->dst.state =
 			    TCPS_ESTABLISHED;
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 	}
 
 	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
 	    dst->state >= TCPS_FIN_WAIT_2 &&
 	    src->state >= TCPS_FIN_WAIT_2) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state reuse ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf("\n");
 		}
 		/* XXX make sure it's the same direction ?? */
 		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
 		pf_unlink_state(*state, PF_ENTER_LOCKED);
 		*state = NULL;
 		return (PF_DROP);
 	}
 
 	if ((*state)->state_flags & PFSTATE_SLOPPY) {
 		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
 			return (PF_DROP);
 	} else {
 		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
 		    &copyback) == PF_DROP)
 			return (PF_DROP);
 	}
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != th->th_sport)
 			pf_change_ap(m, pd->src, &th->th_sport,
 			    pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 0, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != th->th_dport)
 			pf_change_ap(m, pd->dst, &th->th_dport,
 			    pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 0, pd->af);
 		copyback = 1;
 	}
 
 	/* Copyback sequence modulation or stateful scrub changes if needed */
 	if (copyback)
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key_cmp	 key;
 	struct udphdr		*uh = pd->hdr.udp;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = IPPROTO_UDP;
 	if (direction == PF_IN)	{	/* wire side, straight */
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = uh->uh_sport;
 		key.port[1] = uh->uh_dport;
 	} else {			/* stack side, reverse */
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = uh->uh_sport;
 		key.port[0] = uh->uh_dport;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFUDPS_SINGLE)
 		src->state = PFUDPS_SINGLE;
 	if (dst->state == PFUDPS_SINGLE)
 		dst->state = PFUDPS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
 		(*state)->timeout = PFTM_UDP_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_UDP_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != uh->uh_sport)
 			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 1, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != uh->uh_dport)
 			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 1, pd->af);
 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
 {
 	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
 	u_int16_t	 icmpid = 0, *icmpsum;
 	u_int8_t	 icmptype;
 	int		 state_icmp = 0;
 	struct pf_state_key_cmp key;
 
 	bzero(&key, sizeof(key));
 	switch (pd->proto) {
 #ifdef INET
 	case IPPROTO_ICMP:
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpid = pd->hdr.icmp->icmp_id;
 		icmpsum = &pd->hdr.icmp->icmp_cksum;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpid = pd->hdr.icmp6->icmp6_id;
 		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	}
 
 	if (!state_icmp) {
 
 		/*
 		 * ICMP query/reply message not related to a TCP/UDP packet.
 		 * Search for an ICMP state.
 		 */
 		key.af = pd->af;
 		key.proto = pd->proto;
 		key.port[0] = key.port[1] = icmpid;
 		if (direction == PF_IN)	{	/* wire side, straight */
 			PF_ACPY(&key.addr[0], pd->src, key.af);
 			PF_ACPY(&key.addr[1], pd->dst, key.af);
 		} else {			/* stack side, reverse */
 			PF_ACPY(&key.addr[1], pd->src, key.af);
 			PF_ACPY(&key.addr[0], pd->dst, key.af);
 		}
 
 		STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 		(*state)->expire = time_uptime;
 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
 
 		/* translate source/destination address, if necessary */
 		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 			struct pf_state_key *nk = (*state)->key[pd->didx];
 
 			switch (pd->af) {
 #ifdef INET
 			case AF_INET:
 				if (PF_ANEQ(pd->src,
 				    &nk->addr[pd->sidx], AF_INET))
 					pf_change_a(&saddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->sidx].v4.s_addr, 0);
 
 				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
 				    AF_INET))
 					pf_change_a(&daddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->didx].v4.s_addr, 0);
 
 				if (nk->port[0] !=
 				    pd->hdr.icmp->icmp_id) {
 					pd->hdr.icmp->icmp_cksum =
 					    pf_cksum_fixup(
 					    pd->hdr.icmp->icmp_cksum, icmpid,
 					    nk->port[pd->sidx], 0);
 					pd->hdr.icmp->icmp_id =
 					    nk->port[pd->sidx];
 				}
 
 				m_copyback(m, off, ICMP_MINLEN,
 				    (caddr_t )pd->hdr.icmp);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (PF_ANEQ(pd->src,
 				    &nk->addr[pd->sidx], AF_INET6))
 					pf_change_a6(saddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &nk->addr[pd->sidx], 0);
 
 				if (PF_ANEQ(pd->dst,
 				    &nk->addr[pd->didx], AF_INET6))
 					pf_change_a6(daddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &nk->addr[pd->didx], 0);
 
 				m_copyback(m, off, sizeof(struct icmp6_hdr),
 				    (caddr_t )pd->hdr.icmp6);
 				break;
 #endif /* INET6 */
 			}
 		}
 		return (PF_PASS);
 
 	} else {
 		/*
 		 * ICMP error message in response to a TCP/UDP packet.
 		 * Extract the inner TCP/UDP header and search for that state.
 		 */
 
 		struct pf_pdesc	pd2;
 		bzero(&pd2, sizeof pd2);
 #ifdef INET
 		struct ip	h2;
 #endif /* INET */
 #ifdef INET6
 		struct ip6_hdr	h2_6;
 		int		terminal = 0;
 #endif /* INET6 */
 		int		ipoff2 = 0;
 		int		off2 = 0;
 
 		pd2.af = pd->af;
 		/* Payload packet is from the opposite direction. */
 		pd2.sidx = (direction == PF_IN) ? 1 : 0;
 		pd2.didx = (direction == PF_IN) ? 0 : 1;
 		switch (pd->af) {
 #ifdef INET
 		case AF_INET:
 			/* offset of h2 in mbuf chain */
 			ipoff2 = off + ICMP_MINLEN;
 
 			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip)\n"));
 				return (PF_DROP);
 			}
 			/*
 			 * ICMP error messages don't refer to non-first
 			 * fragments
 			 */
 			if (h2.ip_off & htons(IP_OFFMASK)) {
 				REASON_SET(reason, PFRES_FRAG);
 				return (PF_DROP);
 			}
 
 			/* offset of protocol header that follows h2 */
 			off2 = ipoff2 + (h2.ip_hl << 2);
 
 			pd2.proto = h2.ip_p;
 			pd2.src = (struct pf_addr *)&h2.ip_src;
 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
 			pd2.ip_sum = &h2.ip_sum;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			ipoff2 = off + sizeof(struct icmp6_hdr);
 
 			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip6)\n"));
 				return (PF_DROP);
 			}
 			pd2.proto = h2_6.ip6_nxt;
 			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
 			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
 			pd2.ip_sum = NULL;
 			off2 = ipoff2 + sizeof(h2_6);
 			do {
 				switch (pd2.proto) {
 				case IPPROTO_FRAGMENT:
 					/*
 					 * ICMPv6 error messages for
 					 * non-first fragments
 					 */
 					REASON_SET(reason, PFRES_FRAG);
 					return (PF_DROP);
 				case IPPROTO_AH:
 				case IPPROTO_HOPOPTS:
 				case IPPROTO_ROUTING:
 				case IPPROTO_DSTOPTS: {
 					/* get next header and header length */
 					struct ip6_ext opt6;
 
 					if (!pf_pull_hdr(m, off2, &opt6,
 					    sizeof(opt6), NULL, reason,
 					    pd2.af)) {
 						DPFPRINTF(PF_DEBUG_MISC,
 						    ("pf: ICMPv6 short opt\n"));
 						return (PF_DROP);
 					}
 					if (pd2.proto == IPPROTO_AH)
 						off2 += (opt6.ip6e_len + 2) * 4;
 					else
 						off2 += (opt6.ip6e_len + 1) * 8;
 					pd2.proto = opt6.ip6e_nxt;
 					/* goto the next header */
 					break;
 				}
 				default:
 					terminal++;
 					break;
 				}
 			} while (!terminal);
 			break;
 #endif /* INET6 */
 		}
 
 		switch (pd2.proto) {
 		case IPPROTO_TCP: {
 			struct tcphdr		 th;
 			u_int32_t		 seq;
 			struct pf_state_peer	*src, *dst;
 			u_int8_t		 dws;
 			int			 copyback = 0;
 
 			/*
 			 * Only the first 8 bytes of the TCP header can be
 			 * expected. Don't access any TCP header fields after
 			 * th_seq, an ackskew test is not possible.
 			 */
 			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
 			    pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(tcp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_TCP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[pd2.sidx] = th.th_sport;
 			key.port[pd2.didx] = th.th_dport;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			if (direction == (*state)->direction) {
 				src = &(*state)->dst;
 				dst = &(*state)->src;
 			} else {
 				src = &(*state)->src;
 				dst = &(*state)->dst;
 			}
 
 			if (src->wscale && dst->wscale)
 				dws = dst->wscale & PF_WSCALE_MASK;
 			else
 				dws = 0;
 
 			/* Demodulate sequence number */
 			seq = ntohl(th.th_seq) - src->seqdiff;
 			if (src->seqdiff) {
 				pf_change_a(&th.th_seq, icmpsum,
 				    htonl(seq), 0);
 				copyback = 1;
 			}
 
 			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
 			    (!SEQ_GEQ(src->seqhi, seq) ||
 			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
 					printf("pf: BAD ICMP %d:%d ",
 					    icmptype, pd->hdr.icmp->icmp_code);
 					pf_print_host(pd->src, 0, pd->af);
 					printf(" -> ");
 					pf_print_host(pd->dst, 0, pd->af);
 					printf(" state: ");
 					pf_print_state(*state);
 					printf(" seq=%u\n", seq);
 				}
 				REASON_SET(reason, PFRES_BADSTATE);
 				return (PF_DROP);
 			} else {
 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
 					printf("pf: OK ICMP %d:%d ",
 					    icmptype, pd->hdr.icmp->icmp_code);
 					pf_print_host(pd->src, 0, pd->af);
 					printf(" -> ");
 					pf_print_host(pd->dst, 0, pd->af);
 					printf(" state: ");
 					pf_print_state(*state);
 					printf(" seq=%u\n", seq);
 				}
 			}
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != th.th_sport)
 					pf_change_icmp(pd2.src, &th.th_sport,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != th.th_dport)
 					pf_change_icmp(pd2.dst, &th.th_dport,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 				copyback = 1;
 			}
 
 			if (copyback) {
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t )pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2),
 					    (caddr_t )&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, 8, (caddr_t)&th);
 			}
 
 			return (PF_PASS);
 			break;
 		}
 		case IPPROTO_UDP: {
 			struct udphdr		uh;
 
 			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(udp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_UDP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[pd2.sidx] = uh.uh_sport;
 			key.port[pd2.didx] = uh.uh_dport;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != uh.uh_sport)
 					pf_change_icmp(pd2.src, &uh.uh_sport,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != uh.uh_dport)
 					pf_change_icmp(pd2.dst, &uh.uh_dport,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t )pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
 			}
 			return (PF_PASS);
 			break;
 		}
 #ifdef INET
 		case IPPROTO_ICMP: {
 			struct icmp		iih;
 
 			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short i"
 				    "(icmp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = iih.icmp_id;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != iih.icmp_id)
 					pf_change_icmp(pd2.src, &iih.icmp_id,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != iih.icmp_id)
 					pf_change_icmp(pd2.dst, &iih.icmp_id,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 
 				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
 				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
 			}
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET */
 #ifdef INET6
 		case IPPROTO_ICMPV6: {
 			struct icmp6_hdr	iih;
 
 			if (!pf_pull_hdr(m, off2, &iih,
 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(icmp6)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMPV6;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = iih.icmp6_id;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != iih.icmp6_id)
 					pf_change_icmp(pd2.src, &iih.icmp6_id,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != iih.icmp6_id)
 					pf_change_icmp(pd2.dst, &iih.icmp6_id,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 
 				m_copyback(m, off, sizeof(struct icmp6_hdr),
 				    (caddr_t)pd->hdr.icmp6);
 				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
 				m_copyback(m, off2, sizeof(struct icmp6_hdr),
 				    (caddr_t)&iih);
 			}
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET6 */
 		default: {
 			key.af = pd2.af;
 			key.proto = pd2.proto;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = 0;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af))
 					pf_change_icmp(pd2.src, NULL, daddr,
 					    &nk->addr[pd2.sidx], 0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af))
 					pf_change_icmp(pd2.src, NULL,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx], 0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 			}
 			return (PF_PASS);
 			break;
 		}
 		}
 	}
 }
 
 static int
 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key_cmp	 key;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = pd->proto;
 	if (direction == PF_IN)	{
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = key.port[1] = 0;
 	} else {
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = key.port[0] = 0;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFOTHERS_SINGLE)
 		src->state = PFOTHERS_SINGLE;
 	if (dst->state == PFOTHERS_SINGLE)
 		dst->state = PFOTHERS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_OTHER_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		KASSERT(nk, ("%s: nk is null", __func__));
 		KASSERT(pd, ("%s: pd is null", __func__));
 		KASSERT(pd->src, ("%s: pd->src is null", __func__));
 		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
 		switch (pd->af) {
 #ifdef INET
 		case AF_INET:
 			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
 				pf_change_a(&pd->src->v4.s_addr,
 				    pd->ip_sum,
 				    nk->addr[pd->sidx].v4.s_addr,
 				    0);
 
 
 			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
 				pf_change_a(&pd->dst->v4.s_addr,
 				    pd->ip_sum,
 				    nk->addr[pd->didx].v4.s_addr,
 				    0);
 
 				break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
 				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
 
 			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
 				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
 #endif /* INET6 */
 		}
 	}
 	return (PF_PASS);
 }
 
 /*
  * ipoff and off are measured from the start of the mbuf chain.
  * h must be at "ipoff" on the mbuf chain.
  */
 void *
 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
     u_short *actionp, u_short *reasonp, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		struct ip	*h = mtod(m, struct ip *);
 		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 
 		if (fragoff) {
 			if (fragoff >= len)
 				ACTION_SET(actionp, PF_PASS);
 			else {
 				ACTION_SET(actionp, PF_DROP);
 				REASON_SET(reasonp, PFRES_FRAG);
 			}
 			return (NULL);
 		}
 		if (m->m_pkthdr.len < off + len ||
 		    ntohs(h->ip_len) < off + len) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
 
 		if (m->m_pkthdr.len < off + len ||
 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
 		    (unsigned)(off + len)) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 	m_copydata(m, off, len, p);
 	return (p);
 }
 
 int
 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
     int rtableid)
 {
 #ifdef RADIX_MPATH
 	struct radix_node_head	*rnh;
 #endif
 	struct sockaddr_in	*dst;
 	int			 ret = 1;
 	int			 check_mpath;
 #ifdef INET6
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro;
 #else
 	struct route		 ro;
 #endif
 	struct radix_node	*rn;
 	struct rtentry		*rt;
 	struct ifnet		*ifp;
 
 	check_mpath = 0;
 #ifdef RADIX_MPATH
 	/* XXX: stick to table 0 for now */
 	rnh = rt_tables_get_rnh(0, af);
 	if (rnh != NULL && rn_mpath_capable(rnh))
 		check_mpath = 1;
 #endif
 	bzero(&ro, sizeof(ro));
 	switch (af) {
 	case AF_INET:
 		dst = satosin(&ro.ro_dst);
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		/*
 		 * Skip check for addresses with embedded interface scope,
 		 * as they would always match anyway.
 		 */
 		if (IN6_IS_SCOPE_EMBED(&addr->v6))
 			goto out;
 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 		break;
 #endif /* INET6 */
 	default:
 		return (0);
 	}
 
 	/* Skip checks for ipsec interfaces */
 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
 		goto out;
 
 	switch (af) {
 #ifdef INET6
 	case AF_INET6:
 		in6_rtalloc_ign(&ro, 0, rtableid);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
 		break;
 #endif
 	}
 
 	if (ro.ro_rt != NULL) {
 		/* No interface given, this is a no-route check */
 		if (kif == NULL)
 			goto out;
 
 		if (kif->pfik_ifp == NULL) {
 			ret = 0;
 			goto out;
 		}
 
 		/* Perform uRPF check if passed input interface */
 		ret = 0;
 		rn = (struct radix_node *)ro.ro_rt;
 		do {
 			rt = (struct rtentry *)rn;
 			ifp = rt->rt_ifp;
 
 			if (kif->pfik_ifp == ifp)
 				ret = 1;
 #ifdef RADIX_MPATH
 			rn = rn_mpath_next(rn);
 #endif
 		} while (check_mpath == 1 && rn != NULL && ret == 0);
 	} else
 		ret = 0;
 out:
 	if (ro.ro_rt != NULL)
 		RTFREE(ro.ro_rt);
 	return (ret);
 }
 
 #ifdef INET
 static void
 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0, *m1;
 	struct sockaddr_in	dst;
 	struct ip		*ip;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 	int			 error = 0;
 	uint16_t		 ip_len, ip_off;
 
 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
 	    __func__));
 
 	if ((pd->pf_mtag == NULL &&
 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
 	    pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
 		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 		m0 = *m;
 	}
 
 	ip = mtod(m0, struct ip *);
 
 	bzero(&dst, sizeof(dst));
 	dst.sin_family = AF_INET;
 	dst.sin_len = sizeof(dst);
 	dst.sin_addr = ip->ip_dst;
 
 	if (r->rt == PF_FASTROUTE) {
 		struct rtentry *rt;
 
 		if (s)
 			PF_STATE_UNLOCK(s);
 		rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0));
 		if (rt == NULL) {
 			KMOD_IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 
 		ifp = rt->rt_ifp;
 		counter_u64_add(rt->rt_pksent, 1);
 
 		if (rt->rt_flags & RTF_GATEWAY)
 			bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst));
 		RTFREE_LOCKED(rt);
 	} else {
 		if (TAILQ_EMPTY(&r->rpool.list)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
 			goto bad_locked;
 		}
 		if (s == NULL) {
 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
 			    &naddr, NULL, &sn);
 			if (!PF_AZERO(&naddr, AF_INET))
 				dst.sin_addr.s_addr = naddr.v4.s_addr;
 			ifp = r->rpool.cur->kif ?
 			    r->rpool.cur->kif->pfik_ifp : NULL;
 		} else {
 			if (!PF_AZERO(&s->rt_addr, AF_INET))
 				dst.sin_addr.s_addr =
 				    s->rt_addr.v4.s_addr;
 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 			PF_STATE_UNLOCK(s);
 		}
 	}
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 		if (m0->m_len < sizeof(struct ip)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
 			goto bad;
 		}
 		ip = mtod(m0, struct ip *);
 	}
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		m0->m_flags |= M_SKIP_FIREWALL;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
 	m0->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip_len <= ifp->if_mtu ||
 	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
 		ip->ip_sum = 0;
 		if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
 			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		m_clrprotoflags(m0);	/* Avoid confusing lower layers. */
 		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		KMOD_IPSTAT_INC(ips_cantfrag);
 		if (r->rt != PF_DUPTO) {
 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
 			    ifp->if_mtu);
 			goto done;
 		} else
 			goto bad;
 	}
 
 	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 
 	for (; m0; m0 = m1) {
 		m1 = m0->m_nextpkt;
 		m0->m_nextpkt = NULL;
 		if (error == 0) {
 			m_clrprotoflags(m0);
 			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		} else
 			m_freem(m0);
 	}
 
 	if (error == 0)
 		KMOD_IPSTAT_INC(ips_fragmented);
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	return;
 
 bad_locked:
 	if (s)
 		PF_STATE_UNLOCK(s);
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET */
 
 #ifdef INET6
 static void
 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0;
 	struct sockaddr_in6	dst;
 	struct ip6_hdr		*ip6;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 
 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
 	    __func__));
 
 	if ((pd->pf_mtag == NULL &&
 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
 	    pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
 		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 		m0 = *m;
 	}
 
 	ip6 = mtod(m0, struct ip6_hdr *);
 
 	bzero(&dst, sizeof(dst));
 	dst.sin6_family = AF_INET6;
 	dst.sin6_len = sizeof(dst);
 	dst.sin6_addr = ip6->ip6_dst;
 
 	/* Cheat. XXX why only in the v6 case??? */
 	if (r->rt == PF_FASTROUTE) {
 		if (s)
 			PF_STATE_UNLOCK(s);
 		m0->m_flags |= M_SKIP_FIREWALL;
 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		*m = NULL;
 		return;
 	}
 
 	if (TAILQ_EMPTY(&r->rpool.list)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
 		goto bad_locked;
 	}
 	if (s == NULL) {
 		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
 		    &naddr, NULL, &sn);
 		if (!PF_AZERO(&naddr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &naddr, AF_INET6);
 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
 	} else {
 		if (!PF_AZERO(&s->rt_addr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &s->rt_addr, AF_INET6);
 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 	}
 
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 		if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 		if (m0->m_len < sizeof(struct ip6_hdr)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
 			    __func__));
 			goto bad;
 		}
 		ip6 = mtod(m0, struct ip6_hdr *);
 	}
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		m0->m_flags |= M_SKIP_FIREWALL;
 
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
 	    ~ifp->if_hwassist) {
 		uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
 		in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 	}
 
 	/*
 	 * If the packet is too large for the outgoing interface,
 	 * send back an icmp6 error.
 	 */
 	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
 		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
-		nd6_output_ifp(ifp, ifp, m0, &dst);
+		nd6_output_ifp(ifp, ifp, m0, &dst, NULL);
 	else {
 		in6_ifstat_inc(ifp, ifs6_in_toobig);
 		if (r->rt != PF_DUPTO)
 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
 		else
 			goto bad;
 	}
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	return;
 
 bad_locked:
 	if (s)
 		PF_STATE_UNLOCK(s);
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET6 */
 
 /*
  * FreeBSD supports cksum offloads for the following drivers.
  *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
  *   ti(4), txp(4), xl(4)
  *
  * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
  *  network driver performed cksum including pseudo header, need to verify
  *   csum_data
  * CSUM_DATA_VALID :
  *  network driver performed cksum, needs to additional pseudo header
  *  cksum computation with partial csum_data(i.e. lack of H/W support for
  *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
  *
  * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
  * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
  * TCP/UDP layer.
  * Also, set csum_data to 0xffff to force cksum validation.
  */
 static int
 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
 {
 	u_int16_t sum = 0;
 	int hw_assist = 0;
 	struct ip *ip;
 
 	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
 		return (1);
 	if (m->m_pkthdr.len < off + len)
 		return (1);
 
 	switch (p) {
 	case IPPROTO_TCP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);
 				sum = in_pseudo(ip->ip_src.s_addr,
 				ip->ip_dst.s_addr, htonl((u_short)len +
 				m->m_pkthdr.csum_data + IPPROTO_TCP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
 		}
 		break;
 	case IPPROTO_UDP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);
 				sum = in_pseudo(ip->ip_src.s_addr,
 				ip->ip_dst.s_addr, htonl((u_short)len +
 				m->m_pkthdr.csum_data + IPPROTO_UDP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
 		}
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif /* INET6 */
 		break;
 	default:
 		return (1);
 	}
 
 	if (!hw_assist) {
 		switch (af) {
 		case AF_INET:
 			if (p == IPPROTO_ICMP) {
 				if (m->m_len < off)
 					return (1);
 				m->m_data += off;
 				m->m_len -= off;
 				sum = in_cksum(m, len);
 				m->m_data -= off;
 				m->m_len += off;
 			} else {
 				if (m->m_len < sizeof(struct ip))
 					return (1);
 				sum = in4_cksum(m, p, off, len);
 			}
 			break;
 #ifdef INET6
 		case AF_INET6:
 			if (m->m_len < sizeof(struct ip6_hdr))
 				return (1);
 			sum = in6_cksum(m, p, off, len);
 			break;
 #endif /* INET6 */
 		default:
 			return (1);
 		}
 	}
 	if (sum) {
 		switch (p) {
 		case IPPROTO_TCP:
 		    {
 			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
 			break;
 		    }
 		case IPPROTO_UDP:
 		    {
 			KMOD_UDPSTAT_INC(udps_badsum);
 			break;
 		    }
 #ifdef INET
 		case IPPROTO_ICMP:
 		    {
 			KMOD_ICMPSTAT_INC(icps_checksum);
 			break;
 		    }
 #endif
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 		    {
 			KMOD_ICMP6STAT_INC(icp6s_checksum);
 			break;
 		    }
 #endif /* INET6 */
 		}
 		return (1);
 	} else {
 		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
 			m->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 	}
 	return (0);
 }
 
 
 #ifdef INET
 int
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0;
 	struct ip		*h = NULL;
 	struct m_tag		*ipfwtag;
 	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, dirndx, pqid = 0;
 
 	M_ASSERTPKTHDR(m);
 
 	if (!V_pf_status.running)
 		return (PF_PASS);
 
 	memset(&pd, 0, sizeof(pd));
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
 		return (PF_PASS);
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return (PF_PASS);
 
 	pd.pf_mtag = pf_find_mtag(m);
 
 	PF_RULES_RLOCK();
 
 	if (ip_divert_ptr != NULL &&
 	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
 		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
 		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
 			if (pd.pf_mtag == NULL &&
 			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 				action = PF_DROP;
 				goto done;
 			}
 			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
 			m_tag_delete(m, ipfwtag);
 		}
 		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
 			m->m_flags |= M_FASTFWD_OURS;
 			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
 		}
 	} else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		/* We do IP header normalization and packet reassembly here */
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;	/* pf_normalize messes with m0 */
 	h = mtod(m, struct ip *);
 
 	off = h->ip_hl << 2;
 	if (off < (int)sizeof(struct ip)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_SHORT);
 		log = 1;
 		goto done;
 	}
 
 	pd.src = (struct pf_addr *)&h->ip_src;
 	pd.dst = (struct pf_addr *)&h->ip_dst;
 	pd.sport = pd.dport = NULL;
 	pd.ip_sum = &h->ip_sum;
 	pd.proto_sum = NULL;
 	pd.proto = h->ip_p;
 	pd.dir = dir;
 	pd.sidx = (dir == PF_IN) ? 0 : 1;
 	pd.didx = (dir == PF_IN) ? 1 : 0;
 	pd.af = AF_INET;
 	pd.tos = h->ip_tos;
 	pd.tot_len = ntohs(h->ip_len);
 
 	/* handle fragments that didn't get reassembled by normalization */
 	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		action = pf_test_fragment(&r, dir, kif, m, h,
 		    &pd, &a, &ruleset);
 		goto done;
 	}
 
 	switch (h->ip_p) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
 			pqid = 1;
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_ICMP: {
 		struct icmp	ih;
 
 		pd.hdr.icmp = &ih;
 		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 #ifdef INET6
 	case IPPROTO_ICMPV6: {
 		action = PF_DROP;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
 		goto done;
 	}
 #endif
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, m, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 done:
 	PF_RULES_RUNLOCK();
 	if (action == PF_PASS && h->ip_hl > 5 &&
 	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = r->log;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with ip options\n"));
 	}
 
 	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_MEMORY);
 	}
 	if (r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pd.pf_mtag == NULL &&
 		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 		} else {
 			if (s != NULL)
 				pd.pf_mtag->qid_hash = pf_state_hash(s);
 			if (pqid || (pd.tos & IPTOS_LOWDELAY))
 				pd.pf_mtag->qid = r->pqid;
 			else
 				pd.pf_mtag->qid = r->qid;
 			/* Add hints for ecn. */
 			pd.pf_mtag->hdr = h;
 		}
 
 	}
 #endif /* ALTQ */
 
 	/*
 	 * connections redirected to loopback should not match sockets
 	 * bound specifically to loopback due to security implications,
 	 * see tcp_input() and in_pcblookup_listen().
 	 */
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 		m->m_flags |= M_SKIP_FIREWALL;
 
 	if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
 	    !PACKET_LOOPED(&pd)) {
 
 		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
 		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
 		if (ipfwtag != NULL) {
 			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
 			    ntohs(r->divert.port);
 			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
 
 			if (s)
 				PF_STATE_UNLOCK(s);
 
 			m_tag_prepend(m, ipfwtag);
 			if (m->m_flags & M_FASTFWD_OURS) {
 				if (pd.pf_mtag == NULL &&
 				    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 					action = PF_DROP;
 					REASON_SET(&reason, PFRES_MEMORY);
 					log = 1;
 					DPFPRINTF(PF_DEBUG_MISC,
 					    ("pf: failed to allocate tag\n"));
 				} else {
 					pd.pf_mtag->flags |=
 					    PF_FASTFWD_OURS_PRESENT;
 					m->m_flags &= ~M_FASTFWD_OURS;
 				}
 			}
 			ip_divert_ptr(*m0, dir ==  PF_IN ? DIR_IN : DIR_OUT);
 			*m0 = NULL;
 
 			return (action);
 		} else {
 			/* XXX: ipfw has the same behaviour! */
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 			log = 1;
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("pf: failed to allocate divert tag\n"));
 		}
 	}
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
 		    (s == NULL));
 	}
 
 	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL && r == &V_pf_default_rule)
 			tr = nr;
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl,
 			    (s == NULL) ? pd.src :
 			    &s->key[(s->direction == PF_IN)]->
 				addr[(s->direction == PF_OUT)],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl,
 			    (s == NULL) ? pd.dst :
 			    &s->key[(s->direction == PF_IN)]->
 				addr[(s->direction == PF_IN)],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->dst.neg);
 	}
 
 	switch (action) {
 	case PF_SYNPROXY_DROP:
 		m_freem(*m0);
 	case PF_DEFER:
 		*m0 = NULL;
 		action = PF_PASS;
 		break;
 	case PF_DROP:
 		m_freem(*m0);
 		*m0 = NULL;
 		break;
 	default:
 		/* pf_route() returns unlocked. */
 		if (r->rt) {
 			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
 			return (action);
 		}
 		break;
 	}
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	return (action);
 }
 #endif /* INET */
 
 #ifdef INET6
 int
 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0, *n = NULL;
 	struct m_tag		*mtag;
 	struct ip6_hdr		*h = NULL;
 	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, terminal = 0, dirndx, rh_cnt = 0;
 	int			 fwdir = dir;
 
 	M_ASSERTPKTHDR(m);
 
 	/* Detect packet forwarding.
 	 * If the input interface is different from the output interface we're
 	 * forwarding.
 	 * We do need to be careful about bridges. If the
 	 * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a
 	 * bridge, so if the input interface is a bridge member and the output
 	 * interface is its bridge we're not actually forwarding but bridging.
 	 */
 	if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif
 	    && (m->m_pkthdr.rcvif->if_bridge == NULL
 	        || m->m_pkthdr.rcvif->if_bridge != ifp->if_softc))
 		fwdir = PF_FWD;
 
 	if (!V_pf_status.running)
 		return (PF_PASS);
 
 	memset(&pd, 0, sizeof(pd));
 	pd.pf_mtag = pf_find_mtag(m);
 
 	if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
 		return (PF_PASS);
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
 		return (PF_PASS);
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return (PF_PASS);
 
 	PF_RULES_RLOCK();
 
 	/* We do IP header normalization and packet reassembly here */
 	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;	/* pf_normalize messes with m0 */
 	h = mtod(m, struct ip6_hdr *);
 
 #if 1
 	/*
 	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
 	 * will do something bad, so drop the packet for now.
 	 */
 	if (htons(h->ip6_plen) == 0) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
 		goto done;
 	}
 #endif
 
 	pd.src = (struct pf_addr *)&h->ip6_src;
 	pd.dst = (struct pf_addr *)&h->ip6_dst;
 	pd.sport = pd.dport = NULL;
 	pd.ip_sum = NULL;
 	pd.proto_sum = NULL;
 	pd.dir = dir;
 	pd.sidx = (dir == PF_IN) ? 0 : 1;
 	pd.didx = (dir == PF_IN) ? 1 : 0;
 	pd.af = AF_INET6;
 	pd.tos = 0;
 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
 
 	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
 	pd.proto = h->ip6_nxt;
 	do {
 		switch (pd.proto) {
 		case IPPROTO_FRAGMENT:
 			action = pf_test_fragment(&r, dir, kif, m, h,
 			    &pd, &a, &ruleset);
 			if (action == PF_DROP)
 				REASON_SET(&reason, PFRES_FRAG);
 			goto done;
 		case IPPROTO_ROUTING: {
 			struct ip6_rthdr rthdr;
 
 			if (rh_cnt++) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 more than one rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
 			    &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_SHORT);
 				log = 1;
 				goto done;
 			}
 			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 rthdr0\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			/* FALLTHROUGH */
 		}
 		case IPPROTO_AH:
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_DSTOPTS: {
 			/* get next header and header length */
 			struct ip6_ext	opt6;
 
 			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
 			    NULL, &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short opt\n"));
 				action = PF_DROP;
 				log = 1;
 				goto done;
 			}
 			if (pd.proto == IPPROTO_AH)
 				off += (opt6.ip6e_len + 2) * 4;
 			else
 				off += (opt6.ip6e_len + 1) * 8;
 			pd.proto = opt6.ip6e_nxt;
 			/* goto the next header */
 			break;
 		}
 		default:
 			terminal++;
 			break;
 		}
 	} while (!terminal);
 
 	/* if there's no routing header, use unmodified mbuf for checksumming */
 	if (!n)
 		n = m;
 
 	switch (pd.proto) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_ICMP: {
 		action = PF_DROP;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
 		goto done;
 	}
 
 	case IPPROTO_ICMPV6: {
 		struct icmp6_hdr	ih;
 
 		pd.hdr.icmp6 = &ih;
 		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif,
 		    m, off, h, &pd, &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, m, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 done:
 	PF_RULES_RUNLOCK();
 	if (n != m) {
 		m_freem(n);
 		n = NULL;
 	}
 
 	/* handle dangerous IPv6 extension headers. */
 	if (action == PF_PASS && rh_cnt &&
 	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = r->log;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with dangerous v6 headers\n"));
 	}
 
 	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_MEMORY);
 	}
 	if (r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pd.pf_mtag == NULL &&
 		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 		} else {
 			if (s != NULL)
 				pd.pf_mtag->qid_hash = pf_state_hash(s);
 			if (pd.tos & IPTOS_LOWDELAY)
 				pd.pf_mtag->qid = r->pqid;
 			else
 				pd.pf_mtag->qid = r->qid;
 			/* Add hints for ecn. */
 			pd.pf_mtag->hdr = h;
 		}
 	}
 #endif /* ALTQ */
 
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
 		m->m_flags |= M_SKIP_FIREWALL;
 
 	/* XXX: Anybody working on it?! */
 	if (r->divert.port)
 		printf("pf: divert(9) is not supported for IPv6\n");
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
 		    &pd, (s == NULL));
 	}
 
 	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL && r == &V_pf_default_rule)
 			tr = nr;
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl,
 			    (s == NULL) ? pd.src :
 			    &s->key[(s->direction == PF_IN)]->addr[0],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl,
 			    (s == NULL) ? pd.dst :
 			    &s->key[(s->direction == PF_IN)]->addr[1],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->dst.neg);
 	}
 
 	switch (action) {
 	case PF_SYNPROXY_DROP:
 		m_freem(*m0);
 	case PF_DEFER:
 		*m0 = NULL;
 		action = PF_PASS;
 		break;
 	case PF_DROP:
 		m_freem(*m0);
 		*m0 = NULL;
 		break;
 	default:
 		/* pf_route6() returns unlocked. */
 		if (r->rt) {
 			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
 			return (action);
 		}
 		break;
 	}
 
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	/* If reassembled packet passed, create new fragments. */
 	if (action == PF_PASS && *m0 && fwdir == PF_FWD &&
 	    (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL)
 		action = pf_refragment6(ifp, m0, mtag);
 
 	return (action);
 }
 #endif /* INET6 */
Index: projects/powernv/netsmb/smb_iod.c
===================================================================
--- projects/powernv/netsmb/smb_iod.c	(revision 290990)
+++ projects/powernv/netsmb/smb_iod.c	(revision 290991)
@@ -1,715 +1,717 @@
 /*-
  * Copyright (c) 2000-2001 Boris Popov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
  
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/endian.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/unistd.h>
 
 #include <netsmb/smb.h>
 #include <netsmb/smb_conn.h>
 #include <netsmb/smb_rq.h>
 #include <netsmb/smb_tran.h>
 #include <netsmb/smb_trantcp.h>
 
 
 #define SMBIOD_SLEEP_TIMO	2
 #define	SMBIOD_PING_TIMO	60	/* seconds */
 
 #define	SMB_IOD_EVLOCKPTR(iod)	(&((iod)->iod_evlock))
 #define	SMB_IOD_EVLOCK(iod)	smb_sl_lock(&((iod)->iod_evlock))
 #define	SMB_IOD_EVUNLOCK(iod)	smb_sl_unlock(&((iod)->iod_evlock))
 
 #define	SMB_IOD_RQLOCKPTR(iod)	(&((iod)->iod_rqlock))
 #define	SMB_IOD_RQLOCK(iod)	smb_sl_lock(&((iod)->iod_rqlock))
 #define	SMB_IOD_RQUNLOCK(iod)	smb_sl_unlock(&((iod)->iod_rqlock))
 
 #define	smb_iod_wakeup(iod)	wakeup(&(iod)->iod_flags)
 
 
 static MALLOC_DEFINE(M_SMBIOD, "SMBIOD", "SMB network io daemon");
 
 static int smb_iod_next;
 
 static int  smb_iod_sendall(struct smbiod *iod);
 static int  smb_iod_disconnect(struct smbiod *iod);
 static void smb_iod_thread(void *);
 
 static __inline void
 smb_iod_rqprocessed(struct smb_rq *rqp, int error)
 {
 	SMBRQ_SLOCK(rqp);
 	rqp->sr_lerror = error;
 	rqp->sr_rpgen++;
 	rqp->sr_state = SMBRQ_NOTIFIED;
 	wakeup(&rqp->sr_state);
 	SMBRQ_SUNLOCK(rqp);
 }
 
 static void
 smb_iod_invrq(struct smbiod *iod)
 {
 	struct smb_rq *rqp;
 
 	/*
 	 * Invalidate all outstanding requests for this connection
 	 */
 	SMB_IOD_RQLOCK(iod);
 	TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) {
 		rqp->sr_flags |= SMBR_RESTART;
 		smb_iod_rqprocessed(rqp, ENOTCONN);
 	}
 	SMB_IOD_RQUNLOCK(iod);
 }
 
 static void
 smb_iod_closetran(struct smbiod *iod)
 {
 	struct smb_vc *vcp = iod->iod_vc;
 	struct thread *td = iod->iod_td;
 
 	if (vcp->vc_tdata == NULL)
 		return;
 	SMB_TRAN_DISCONNECT(vcp, td);
 	SMB_TRAN_DONE(vcp, td);
 	vcp->vc_tdata = NULL;
 }
 
 static void
 smb_iod_dead(struct smbiod *iod)
 {
 	iod->iod_state = SMBIOD_ST_DEAD;
 	smb_iod_closetran(iod);
 	smb_iod_invrq(iod);
 }
 
 static int
 smb_iod_connect(struct smbiod *iod)
 {
 	struct smb_vc *vcp = iod->iod_vc;
 	struct thread *td = iod->iod_td;
 	int error;
 
 	SMBIODEBUG("%d\n", iod->iod_state);
 	switch(iod->iod_state) {
 	    case SMBIOD_ST_VCACTIVE:
 		SMBERROR("called for already opened connection\n");
 		return EISCONN;
 	    case SMBIOD_ST_DEAD:
 		return ENOTCONN;	/* XXX: last error code ? */
 	    default:
 		break;
 	}
 	vcp->vc_genid++;
 	error = 0;
 
 	error = (int)SMB_TRAN_CREATE(vcp, td);
 	if (error)
 		goto fail;
 	SMBIODEBUG("tcreate\n");
 	if (vcp->vc_laddr) {
 		error = (int)SMB_TRAN_BIND(vcp, vcp->vc_laddr, td);
 		if (error)
 			goto fail;
 	}
 	SMBIODEBUG("tbind\n");
 	error = (int)SMB_TRAN_CONNECT(vcp, vcp->vc_paddr, td);
 	if (error)
 		goto fail;
 	SMB_TRAN_SETPARAM(vcp, SMBTP_SELECTID, &iod->iod_flags);
 	iod->iod_state = SMBIOD_ST_TRANACTIVE;
 	SMBIODEBUG("tconnect\n");
 	/* vcp->vc_mid = 0;*/
 	error = (int)smb_smb_negotiate(vcp, &iod->iod_scred);
 	if (error)
 		goto fail;
 	SMBIODEBUG("snegotiate\n");
 	error = (int)smb_smb_ssnsetup(vcp, &iod->iod_scred);
 	if (error)
 		goto fail;
 	iod->iod_state = SMBIOD_ST_VCACTIVE;
 	SMBIODEBUG("completed\n");
 	smb_iod_invrq(iod);
 	return (0);
 
  fail:
 	smb_iod_dead(iod);
 	return (error);
 }
 
 static int
 smb_iod_disconnect(struct smbiod *iod)
 {
 	struct smb_vc *vcp = iod->iod_vc;
 
 	SMBIODEBUG("\n");
 	if (iod->iod_state == SMBIOD_ST_VCACTIVE) {
 		smb_smb_ssnclose(vcp, &iod->iod_scred);
 		iod->iod_state = SMBIOD_ST_TRANACTIVE;
 	}
 	vcp->vc_smbuid = SMB_UID_UNKNOWN;
 	smb_iod_closetran(iod);
 	iod->iod_state = SMBIOD_ST_NOTCONN;
 	return 0;
 }
 
 static int
 smb_iod_treeconnect(struct smbiod *iod, struct smb_share *ssp)
 {
 	int error;
 
 	if (iod->iod_state != SMBIOD_ST_VCACTIVE) {
 		if (iod->iod_state != SMBIOD_ST_DEAD)
 			return ENOTCONN;
 		iod->iod_state = SMBIOD_ST_RECONNECT;
 		error = smb_iod_connect(iod);
 		if (error)
 			return error;
 	}
 	SMBIODEBUG("tree reconnect\n");
 	SMBS_ST_LOCK(ssp);
 	ssp->ss_flags |= SMBS_RECONNECTING;
 	SMBS_ST_UNLOCK(ssp);
 	error = smb_smb_treeconnect(ssp, &iod->iod_scred);
 	SMBS_ST_LOCK(ssp);
 	ssp->ss_flags &= ~SMBS_RECONNECTING;
 	SMBS_ST_UNLOCK(ssp);
 	wakeup(&ssp->ss_vcgenid);
 	return error;
 }
 
 static int
 smb_iod_sendrq(struct smbiod *iod, struct smb_rq *rqp)
 {
 	struct thread *td = iod->iod_td;
 	struct smb_vc *vcp = iod->iod_vc;
 	struct smb_share *ssp = rqp->sr_share;
 	struct mbuf *m;
 	int error;
 
 	SMBIODEBUG("iod_state = %d\n", iod->iod_state);
 	switch (iod->iod_state) {
 	    case SMBIOD_ST_NOTCONN:
 		smb_iod_rqprocessed(rqp, ENOTCONN);
 		return 0;
 	    case SMBIOD_ST_DEAD:
 		iod->iod_state = SMBIOD_ST_RECONNECT;
 		return 0;
 	    case SMBIOD_ST_RECONNECT:
 		return 0;
 	    default:
 		break;
 	}
 	if (rqp->sr_sendcnt == 0) {
 #ifdef movedtoanotherplace
 		if (vcp->vc_maxmux != 0 && iod->iod_muxcnt >= vcp->vc_maxmux)
 			return 0;
 #endif
 		le16enc(rqp->sr_rqtid, ssp ? ssp->ss_tid : SMB_TID_UNKNOWN);
 		le16enc(rqp->sr_rquid, vcp ? vcp->vc_smbuid : 0);
 		mb_fixhdr(&rqp->sr_rq);
 		if (vcp->vc_hflags2 & SMB_FLAGS2_SECURITY_SIGNATURE)
 			smb_rq_sign(rqp);
 	}
 	if (rqp->sr_sendcnt++ > 5) {
 		rqp->sr_flags |= SMBR_RESTART;
 		smb_iod_rqprocessed(rqp, rqp->sr_lerror);
 		/*
 		 * If all attempts to send a request failed, then
 		 * something is seriously hosed.
 		 */
 		return ENOTCONN;
 	}
 	SMBSDEBUG("M:%04x, P:%04x, U:%04x, T:%04x\n", rqp->sr_mid, 0, 0, 0);
 	m_dumpm(rqp->sr_rq.mb_top);
 	m = m_copym(rqp->sr_rq.mb_top, 0, M_COPYALL, M_WAITOK);
 	error = rqp->sr_lerror = SMB_TRAN_SEND(vcp, m, td);
 	if (error == 0) {
 		getnanotime(&rqp->sr_timesent);
 		iod->iod_lastrqsent = rqp->sr_timesent;
 		rqp->sr_flags |= SMBR_SENT;
 		rqp->sr_state = SMBRQ_SENT;
 		return 0;
 	}
 	/*
 	 * Check for fatal errors
 	 */
 	if (SMB_TRAN_FATAL(vcp, error)) {
 		/*
 		 * No further attempts should be made
 		 */
 		return ENOTCONN;
 	}
 	if (smb_rq_intr(rqp))
 		smb_iod_rqprocessed(rqp, EINTR);
 	return 0;
 }
 
 /*
  * Process incoming packets
  */
 static int
 smb_iod_recvall(struct smbiod *iod)
 {
 	struct smb_vc *vcp = iod->iod_vc;
 	struct thread *td = iod->iod_td;
 	struct smb_rq *rqp;
 	struct mbuf *m;
 	u_char *hp;
 	u_short mid;
 	int error;
 
 	switch (iod->iod_state) {
 	    case SMBIOD_ST_NOTCONN:
 	    case SMBIOD_ST_DEAD:
 	    case SMBIOD_ST_RECONNECT:
 		return 0;
 	    default:
 		break;
 	}
 	for (;;) {
 		m = NULL;
 		error = SMB_TRAN_RECV(vcp, &m, td);
 		if (error == EWOULDBLOCK)
 			break;
 		if (SMB_TRAN_FATAL(vcp, error)) {
 			smb_iod_dead(iod);
 			break;
 		}
 		if (error)
 			break;
 		if (m == NULL) {
 			SMBERROR("tran return NULL without error\n");
 			error = EPIPE;
 			continue;
 		}
 		m = m_pullup(m, SMB_HDRLEN);
 		if (m == NULL)
 			continue;	/* wait for a good packet */
 		/*
 		 * Now we got an entire and possibly invalid SMB packet.
 		 * Be careful while parsing it.
 		 */
 		m_dumpm(m);
 		hp = mtod(m, u_char*);
 		if (bcmp(hp, SMB_SIGNATURE, SMB_SIGLEN) != 0) {
 			m_freem(m);
 			continue;
 		}
 		mid = SMB_HDRMID(hp);
 		SMBSDEBUG("mid %04x\n", (u_int)mid);
 		SMB_IOD_RQLOCK(iod);
 		TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) {
 			if (rqp->sr_mid != mid)
 				continue;
 			SMBRQ_SLOCK(rqp);
 			if (rqp->sr_rp.md_top == NULL) {
 				md_initm(&rqp->sr_rp, m);
 			} else {
 				if (rqp->sr_flags & SMBR_MULTIPACKET) {
 					md_append_record(&rqp->sr_rp, m);
 				} else {
 					SMBRQ_SUNLOCK(rqp);
 					SMBERROR("duplicate response %d (ignored)\n", mid);
 					break;
 				}
 			}
 			SMBRQ_SUNLOCK(rqp);
 			smb_iod_rqprocessed(rqp, 0);
 			break;
 		}
 		SMB_IOD_RQUNLOCK(iod);
 		if (rqp == NULL) {
 			SMBERROR("drop resp with mid %d\n", (u_int)mid);
 /*			smb_printrqlist(vcp);*/
 			m_freem(m);
 		}
 	}
 	/*
 	 * check for interrupts
 	 */
 	SMB_IOD_RQLOCK(iod);
 	TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) {
 		if (smb_td_intr(rqp->sr_cred->scr_td)) {
 			smb_iod_rqprocessed(rqp, EINTR);
 		}
 	}
 	SMB_IOD_RQUNLOCK(iod);
 	return 0;
 }
 
 int
 smb_iod_request(struct smbiod *iod, int event, void *ident)
 {
 	struct smbiod_event *evp;
 	int error;
 
 	SMBIODEBUG("\n");
 	evp = smb_zmalloc(sizeof(*evp), M_SMBIOD, M_WAITOK);
 	evp->ev_type = event;
 	evp->ev_ident = ident;
 	SMB_IOD_EVLOCK(iod);
 	STAILQ_INSERT_TAIL(&iod->iod_evlist, evp, ev_link);
 	if ((event & SMBIOD_EV_SYNC) == 0) {
 		SMB_IOD_EVUNLOCK(iod);
 		smb_iod_wakeup(iod);
 		return 0;
 	}
 	smb_iod_wakeup(iod);
 	msleep(evp, SMB_IOD_EVLOCKPTR(iod), PWAIT | PDROP, "90evw", 0);
 	error = evp->ev_error;
 	free(evp, M_SMBIOD);
 	return error;
 }
 
 /*
  * Place request in the queue.
  * Request from smbiod have a high priority.
  */
 int
 smb_iod_addrq(struct smb_rq *rqp)
 {
 	struct smb_vc *vcp = rqp->sr_vc;
 	struct smbiod *iod = vcp->vc_iod;
 	int error;
 
 	SMBIODEBUG("\n");
 	if (rqp->sr_cred->scr_td != NULL &&
 	    rqp->sr_cred->scr_td->td_proc == iod->iod_p) {
 		rqp->sr_flags |= SMBR_INTERNAL;
 		SMB_IOD_RQLOCK(iod);
 		TAILQ_INSERT_HEAD(&iod->iod_rqlist, rqp, sr_link);
 		SMB_IOD_RQUNLOCK(iod);
 		for (;;) {
 			if (smb_iod_sendrq(iod, rqp) != 0) {
 				smb_iod_dead(iod);
 				break;
 			}
 			/*
 			 * we don't need to lock state field here
 			 */
 			if (rqp->sr_state != SMBRQ_NOTSENT)
 				break;
 			tsleep(&iod->iod_flags, PWAIT, "90sndw", hz);
 		}
 		if (rqp->sr_lerror)
 			smb_iod_removerq(rqp);
 		return rqp->sr_lerror;
 	}
 
 	switch (iod->iod_state) {
 	    case SMBIOD_ST_NOTCONN:
 		return ENOTCONN;
 	    case SMBIOD_ST_DEAD:
 		error = smb_iod_request(vcp->vc_iod, SMBIOD_EV_CONNECT | SMBIOD_EV_SYNC, NULL);
 		if (error)
 			return error;
 		return EXDEV;
 	    default:
 		break;
 	}
 
 	SMB_IOD_RQLOCK(iod);
 	for (;;) {
 		if (vcp->vc_maxmux == 0) {
 			SMBERROR("maxmux == 0\n");
 			break;
 		}
 		if (iod->iod_muxcnt < vcp->vc_maxmux)
 			break;
 		iod->iod_muxwant++;
 		msleep(&iod->iod_muxwant, SMB_IOD_RQLOCKPTR(iod),
 		    PWAIT, "90mux", 0);
 	}
 	iod->iod_muxcnt++;
 	TAILQ_INSERT_TAIL(&iod->iod_rqlist, rqp, sr_link);
 	SMB_IOD_RQUNLOCK(iod);
 	smb_iod_wakeup(iod);
 	return 0;
 }
 
 int
 smb_iod_removerq(struct smb_rq *rqp)
 {
 	struct smb_vc *vcp = rqp->sr_vc;
 	struct smbiod *iod = vcp->vc_iod;
 
 	SMBIODEBUG("\n");
 	if (rqp->sr_flags & SMBR_INTERNAL) {
 		SMB_IOD_RQLOCK(iod);
 		TAILQ_REMOVE(&iod->iod_rqlist, rqp, sr_link);
 		SMB_IOD_RQUNLOCK(iod);
 		return 0;
 	}
 	SMB_IOD_RQLOCK(iod);
 	while (rqp->sr_flags & SMBR_XLOCK) {
 		rqp->sr_flags |= SMBR_XLOCKWANT;
 		msleep(rqp, SMB_IOD_RQLOCKPTR(iod), PWAIT, "90xrm", 0);
 	}
 	TAILQ_REMOVE(&iod->iod_rqlist, rqp, sr_link);
 	iod->iod_muxcnt--;
 	if (iod->iod_muxwant) {
 		iod->iod_muxwant--;
 		wakeup(&iod->iod_muxwant);
 	}
 	SMB_IOD_RQUNLOCK(iod);
 	return 0;
 }
 
 int
 smb_iod_waitrq(struct smb_rq *rqp)
 {
 	struct smbiod *iod = rqp->sr_vc->vc_iod;
 	int error;
 
 	SMBIODEBUG("\n");
 	if (rqp->sr_flags & SMBR_INTERNAL) {
 		for (;;) {
 			smb_iod_sendall(iod);
 			smb_iod_recvall(iod);
 			if (rqp->sr_rpgen != rqp->sr_rplast)
 				break;
 			tsleep(&iod->iod_flags, PWAIT, "90irq", hz);
 		}
 		smb_iod_removerq(rqp);
 		return rqp->sr_lerror;
 
 	}
 	SMBRQ_SLOCK(rqp);
 	if (rqp->sr_rpgen == rqp->sr_rplast)
 		msleep(&rqp->sr_state, SMBRQ_SLOCKPTR(rqp), PWAIT, "90wrq", 0);
 	rqp->sr_rplast++;
 	SMBRQ_SUNLOCK(rqp);
 	error = rqp->sr_lerror;
 	if (rqp->sr_flags & SMBR_MULTIPACKET) {
 		/*
 		 * If request should stay in the list, then reinsert it
 		 * at the end of queue so other waiters have chance to concur
 		 */
 		SMB_IOD_RQLOCK(iod);
 		TAILQ_REMOVE(&iod->iod_rqlist, rqp, sr_link);
 		TAILQ_INSERT_TAIL(&iod->iod_rqlist, rqp, sr_link);
 		SMB_IOD_RQUNLOCK(iod);
 	} else
 		smb_iod_removerq(rqp);
 	return error;
 }
 
 
 static int
 smb_iod_sendall(struct smbiod *iod)
 {
 	struct smb_vc *vcp = iod->iod_vc;
 	struct smb_rq *rqp;
 	struct timespec ts, tstimeout;
 	int herror;
 
 	herror = 0;
 	/*
 	 * Loop through the list of requests and send them if possible
 	 */
 	SMB_IOD_RQLOCK(iod);
 	TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) {
 		switch (rqp->sr_state) {
 		    case SMBRQ_NOTSENT:
 			rqp->sr_flags |= SMBR_XLOCK;
 			SMB_IOD_RQUNLOCK(iod);
 			herror = smb_iod_sendrq(iod, rqp);
 			SMB_IOD_RQLOCK(iod);
 			rqp->sr_flags &= ~SMBR_XLOCK;
 			if (rqp->sr_flags & SMBR_XLOCKWANT) {
 				rqp->sr_flags &= ~SMBR_XLOCKWANT;
 				wakeup(rqp);
 			}
 			break;
 		    case SMBRQ_SENT:
 			SMB_TRAN_GETPARAM(vcp, SMBTP_TIMEOUT, &tstimeout);
 			timespecadd(&tstimeout, &tstimeout);
 			getnanotime(&ts);
 			timespecsub(&ts, &tstimeout);
 			if (timespeccmp(&ts, &rqp->sr_timesent, >)) {
 				smb_iod_rqprocessed(rqp, ETIMEDOUT);
 			}
 			break;
 		    default:
 			break;
 		}
 		if (herror)
 			break;
 	}
 	SMB_IOD_RQUNLOCK(iod);
 	if (herror == ENOTCONN)
 		smb_iod_dead(iod);
 	return 0;
 }
 
 /*
  * "main" function for smbiod daemon
  */
 static __inline void
 smb_iod_main(struct smbiod *iod)
 {
 /*	struct smb_vc *vcp = iod->iod_vc;*/
 	struct smbiod_event *evp;
 /*	struct timespec tsnow;*/
 	int error;
 
 	SMBIODEBUG("\n");
 	error = 0;
 
 	/*
 	 * Check all interesting events
 	 */
 	for (;;) {
 		SMB_IOD_EVLOCK(iod);
 		evp = STAILQ_FIRST(&iod->iod_evlist);
 		if (evp == NULL) {
 			SMB_IOD_EVUNLOCK(iod);
 			break;
 		}
 		STAILQ_REMOVE_HEAD(&iod->iod_evlist, ev_link);
 		evp->ev_type |= SMBIOD_EV_PROCESSING;
 		SMB_IOD_EVUNLOCK(iod);
 		switch (evp->ev_type & SMBIOD_EV_MASK) {
 		    case SMBIOD_EV_CONNECT:
 			iod->iod_state = SMBIOD_ST_RECONNECT;
 			evp->ev_error = smb_iod_connect(iod);
 			break;
 		    case SMBIOD_EV_DISCONNECT:
 			evp->ev_error = smb_iod_disconnect(iod);
 			break;
 		    case SMBIOD_EV_TREECONNECT:
 			evp->ev_error = smb_iod_treeconnect(iod, evp->ev_ident);
 			break;
 		    case SMBIOD_EV_SHUTDOWN:
 			iod->iod_flags |= SMBIOD_SHUTDOWN;
 			break;
 		    case SMBIOD_EV_NEWRQ:
 			break;
 		}
 		if (evp->ev_type & SMBIOD_EV_SYNC) {
 			SMB_IOD_EVLOCK(iod);
 			wakeup(evp);
 			SMB_IOD_EVUNLOCK(iod);
 		} else
 			free(evp, M_SMBIOD);
 	}
 #if 0
 	if (iod->iod_state == SMBIOD_ST_VCACTIVE) {
 		getnanotime(&tsnow);
 		timespecsub(&tsnow, &iod->iod_pingtimo);
 		if (timespeccmp(&tsnow, &iod->iod_lastrqsent, >)) {
 			smb_smb_echo(vcp, &iod->iod_scred);
 		}
 	}
 #endif
 	smb_iod_sendall(iod);
 	smb_iod_recvall(iod);
 	return;
 }
 
 void
 smb_iod_thread(void *arg)
 {
 	struct smbiod *iod = arg;
 
 	mtx_lock(&Giant);
 
 	/*
 	 * Here we assume that the thread structure will be the same
 	 * for an entire kthread (kproc, to be more precise) life.
 	 */
 	iod->iod_td = curthread;
 	smb_makescred(&iod->iod_scred, iod->iod_td, NULL);
 	while ((iod->iod_flags & SMBIOD_SHUTDOWN) == 0) {
 		smb_iod_main(iod);
 		SMBIODEBUG("going to sleep for %d ticks\n", iod->iod_sleeptimo);
 		if (iod->iod_flags & SMBIOD_SHUTDOWN)
 			break;
 		tsleep(&iod->iod_flags, PWAIT, "90idle", iod->iod_sleeptimo);
 	}
+
+	/* We can now safely destroy the mutexes and free the iod structure. */
+	smb_sl_destroy(&iod->iod_rqlock);
+	smb_sl_destroy(&iod->iod_evlock);
+	free(iod, M_SMBIOD);
 	mtx_unlock(&Giant);
 	kproc_exit(0);
 }
 
 int
 smb_iod_create(struct smb_vc *vcp)
 {
 	struct smbiod *iod;
 	int error;
 
 	iod = smb_zmalloc(sizeof(*iod), M_SMBIOD, M_WAITOK);
 	iod->iod_id = smb_iod_next++;
 	iod->iod_state = SMBIOD_ST_NOTCONN;
 	iod->iod_vc = vcp;
 	iod->iod_sleeptimo = hz * SMBIOD_SLEEP_TIMO;
 	iod->iod_pingtimo.tv_sec = SMBIOD_PING_TIMO;
 	getnanotime(&iod->iod_lastrqsent);
 	vcp->vc_iod = iod;
 	smb_sl_init(&iod->iod_rqlock, "90rql");
 	TAILQ_INIT(&iod->iod_rqlist);
 	smb_sl_init(&iod->iod_evlock, "90evl");
 	STAILQ_INIT(&iod->iod_evlist);
 	error = kproc_create(smb_iod_thread, iod, &iod->iod_p,
 	    RFNOWAIT, 0, "smbiod%d", iod->iod_id);
 	if (error) {
 		SMBERROR("can't start smbiod: %d", error);
 		free(iod, M_SMBIOD);
 		return error;
 	}
 	return 0;
 }
 
 int
 smb_iod_destroy(struct smbiod *iod)
 {
 	smb_iod_request(iod, SMBIOD_EV_SHUTDOWN | SMBIOD_EV_SYNC, NULL);
-	smb_sl_destroy(&iod->iod_rqlock);
-	smb_sl_destroy(&iod->iod_evlock);
-	free(iod, M_SMBIOD);
 	return 0;
 }
 
 int
 smb_iod_init(void)
 {
 	return 0;
 }
 
 int
 smb_iod_done(void)
 {
 	return 0;
 }
 
Index: projects/powernv/opencrypto/xform.c
===================================================================
--- projects/powernv/opencrypto/xform.c	(revision 290990)
+++ projects/powernv/opencrypto/xform.c	(revision 290991)
@@ -1,985 +1,985 @@
 /*	$OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $	*/
 /*-
  * The authors of this code are John Ioannidis (ji@tla.org),
  * Angelos D. Keromytis (kermit@csd.uch.gr),
  * Niels Provos (provos@physnet.uni-hamburg.de) and
  * Damien Miller (djm@mindrot.org).
  *
  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
  * in November 1995.
  *
  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
  * by Angelos D. Keromytis.
  *
  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
  * and Niels Provos.
  *
  * Additional features in 1999 by Angelos D. Keromytis.
  *
  * AES XTS implementation in 2008 by Damien Miller
  *
  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
  * Angelos D. Keromytis and Niels Provos.
  *
  * Copyright (C) 2001, Angelos D. Keromytis.
  *
  * Copyright (C) 2008, Damien Miller
  * Copyright (c) 2014 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by John-Mark Gurney
  * under sponsorship of the FreeBSD Foundation and
  * Rubicon Communications, LLC (Netgate).
  *
  * Permission to use, copy, and modify this software with or without fee
  * is hereby granted, provided that this entire notice is included in
  * all copies of any software which is or includes a copy or
  * modification of this software.
  * You may use this code under the GNU public license if you so wish. Please
  * contribute changes back to the authors under this freer than GPL license
  * so that we may further the use of strong encryption without limitations to
  * all.
  *
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
  * PURPOSE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <machine/cpu.h>
 
 #include <crypto/blowfish/blowfish.h>
 #include <crypto/des/des.h>
 #include <crypto/rijndael/rijndael.h>
 #include <crypto/camellia/camellia.h>
 #include <crypto/sha1.h>
 
 #include <opencrypto/cast.h>
 #include <opencrypto/deflate.h>
 #include <opencrypto/rmd160.h>
 #include <opencrypto/skipjack.h>
 
 #include <sys/md5.h>
 
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>
 
 static	int null_setkey(u_int8_t **, u_int8_t *, int);
 static	int des1_setkey(u_int8_t **, u_int8_t *, int);
 static	int des3_setkey(u_int8_t **, u_int8_t *, int);
 static	int blf_setkey(u_int8_t **, u_int8_t *, int);
 static	int cast5_setkey(u_int8_t **, u_int8_t *, int);
 static	int skipjack_setkey(u_int8_t **, u_int8_t *, int);
 static	int rijndael128_setkey(u_int8_t **, u_int8_t *, int);
 static	int aes_icm_setkey(u_int8_t **, u_int8_t *, int);
 static	int aes_xts_setkey(u_int8_t **, u_int8_t *, int);
 static	int cml_setkey(u_int8_t **, u_int8_t *, int);
 
 static	void null_encrypt(caddr_t, u_int8_t *);
 static	void des1_encrypt(caddr_t, u_int8_t *);
 static	void des3_encrypt(caddr_t, u_int8_t *);
 static	void blf_encrypt(caddr_t, u_int8_t *);
 static	void cast5_encrypt(caddr_t, u_int8_t *);
 static	void skipjack_encrypt(caddr_t, u_int8_t *);
 static	void rijndael128_encrypt(caddr_t, u_int8_t *);
 static	void aes_xts_encrypt(caddr_t, u_int8_t *);
 static	void cml_encrypt(caddr_t, u_int8_t *);
 
 static	void null_decrypt(caddr_t, u_int8_t *);
 static	void des1_decrypt(caddr_t, u_int8_t *);
 static	void des3_decrypt(caddr_t, u_int8_t *);
 static	void blf_decrypt(caddr_t, u_int8_t *);
 static	void cast5_decrypt(caddr_t, u_int8_t *);
 static	void skipjack_decrypt(caddr_t, u_int8_t *);
 static	void rijndael128_decrypt(caddr_t, u_int8_t *);
 static	void aes_xts_decrypt(caddr_t, u_int8_t *);
 static	void cml_decrypt(caddr_t, u_int8_t *);
 
 static void aes_icm_crypt(caddr_t, u_int8_t *);
 
 static	void null_zerokey(u_int8_t **);
 static	void des1_zerokey(u_int8_t **);
 static	void des3_zerokey(u_int8_t **);
 static	void blf_zerokey(u_int8_t **);
 static	void cast5_zerokey(u_int8_t **);
 static	void skipjack_zerokey(u_int8_t **);
 static	void rijndael128_zerokey(u_int8_t **);
 static	void aes_icm_zerokey(u_int8_t **);
 static	void aes_xts_zerokey(u_int8_t **);
 static	void cml_zerokey(u_int8_t **);
 
 static	void aes_icm_reinit(caddr_t, u_int8_t *);
 static	void aes_xts_reinit(caddr_t, u_int8_t *);
 static	void aes_gcm_reinit(caddr_t, u_int8_t *);
 
 static	void null_init(void *);
 static	void null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len);
 static	int null_update(void *, const u_int8_t *, u_int16_t);
 static	void null_final(u_int8_t *, void *);
 static	int MD5Update_int(void *, const u_int8_t *, u_int16_t);
 static	void SHA1Init_int(void *);
 static	int SHA1Update_int(void *, const u_int8_t *, u_int16_t);
 static	void SHA1Final_int(u_int8_t *, void *);
 static	int RMD160Update_int(void *, const u_int8_t *, u_int16_t);
 static	int SHA256Update_int(void *, const u_int8_t *, u_int16_t);
 static	int SHA384Update_int(void *, const u_int8_t *, u_int16_t);
 static	int SHA512Update_int(void *, const u_int8_t *, u_int16_t);
 
 static	u_int32_t deflate_compress(u_int8_t *, u_int32_t, u_int8_t **);
 static	u_int32_t deflate_decompress(u_int8_t *, u_int32_t, u_int8_t **);
 
 #define AESICM_BLOCKSIZE	AES_BLOCK_LEN
 
 struct aes_icm_ctx {
 	u_int32_t	ac_ek[4*(RIJNDAEL_MAXNR + 1)];
 	/* ac_block is initalized to IV */
 	u_int8_t	ac_block[AESICM_BLOCKSIZE];
 	int		ac_nr;
 };
 
 MALLOC_DEFINE(M_XDATA, "xform", "xform data buffers");
 
 /* Encryption instances */
 struct enc_xform enc_xform_null = {
 	CRYPTO_NULL_CBC, "NULL",
 	/* NB: blocksize of 4 is to generate a properly aligned ESP header */
-	NULL_BLOCK_LEN, NULL_BLOCK_LEN, NULL_MIN_KEY, NULL_MAX_KEY, 
+	NULL_BLOCK_LEN, 0, NULL_MIN_KEY, NULL_MAX_KEY, 
 	null_encrypt,
 	null_decrypt,
 	null_setkey,
 	null_zerokey,
 	NULL,
 };
 
 struct enc_xform enc_xform_des = {
 	CRYPTO_DES_CBC, "DES",
 	DES_BLOCK_LEN, DES_BLOCK_LEN, DES_MIN_KEY, DES_MAX_KEY,
 	des1_encrypt,
 	des1_decrypt,
 	des1_setkey,
 	des1_zerokey,
 	NULL,
 };
 
 struct enc_xform enc_xform_3des = {
 	CRYPTO_3DES_CBC, "3DES",
 	DES3_BLOCK_LEN, DES3_BLOCK_LEN, TRIPLE_DES_MIN_KEY,
 	TRIPLE_DES_MAX_KEY,
 	des3_encrypt,
 	des3_decrypt,
 	des3_setkey,
 	des3_zerokey,
 	NULL,
 };
 
 struct enc_xform enc_xform_blf = {
 	CRYPTO_BLF_CBC, "Blowfish",
 	BLOWFISH_BLOCK_LEN, BLOWFISH_BLOCK_LEN, BLOWFISH_MIN_KEY,
 	BLOWFISH_MAX_KEY,
 	blf_encrypt,
 	blf_decrypt,
 	blf_setkey,
 	blf_zerokey,
 	NULL,
 };
 
 struct enc_xform enc_xform_cast5 = {
 	CRYPTO_CAST_CBC, "CAST-128",
 	CAST128_BLOCK_LEN, CAST128_BLOCK_LEN, CAST_MIN_KEY, CAST_MAX_KEY,
 	cast5_encrypt,
 	cast5_decrypt,
 	cast5_setkey,
 	cast5_zerokey,
 	NULL,
 };
 
 struct enc_xform enc_xform_skipjack = {
 	CRYPTO_SKIPJACK_CBC, "Skipjack",
 	SKIPJACK_BLOCK_LEN, SKIPJACK_BLOCK_LEN, SKIPJACK_MIN_KEY,
 	SKIPJACK_MAX_KEY,
 	skipjack_encrypt,
 	skipjack_decrypt, skipjack_setkey,
 	skipjack_zerokey,
 	NULL,
 };
 
 struct enc_xform enc_xform_rijndael128 = {
 	CRYPTO_RIJNDAEL128_CBC, "Rijndael-128/AES",
 	RIJNDAEL128_BLOCK_LEN, RIJNDAEL128_BLOCK_LEN, RIJNDAEL_MIN_KEY,
 	RIJNDAEL_MAX_KEY,
 	rijndael128_encrypt,
 	rijndael128_decrypt,
 	rijndael128_setkey,
 	rijndael128_zerokey,
 	NULL,
 };
 
 struct enc_xform enc_xform_aes_icm = {
 	CRYPTO_AES_ICM, "AES-ICM",
 	AES_BLOCK_LEN, AES_BLOCK_LEN, AES_MIN_KEY, AES_MAX_KEY,
 	aes_icm_crypt,
 	aes_icm_crypt,
 	aes_icm_setkey,
 	rijndael128_zerokey,
 	aes_icm_reinit,
 };
 
 struct enc_xform enc_xform_aes_nist_gcm = {
 	CRYPTO_AES_NIST_GCM_16, "AES-GCM",
 	AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY,
 	aes_icm_crypt,
 	aes_icm_crypt,
 	aes_icm_setkey,
 	aes_icm_zerokey,
 	aes_gcm_reinit,
 };
 
 struct enc_xform enc_xform_aes_nist_gmac = {
 	CRYPTO_AES_NIST_GMAC, "AES-GMAC",
 	AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 };
 
 struct enc_xform enc_xform_aes_xts = {
 	CRYPTO_AES_XTS, "AES-XTS",
 	AES_BLOCK_LEN, AES_XTS_IV_LEN, AES_XTS_MIN_KEY, AES_XTS_MAX_KEY,
 	aes_xts_encrypt,
 	aes_xts_decrypt,
 	aes_xts_setkey,
 	aes_xts_zerokey,
 	aes_xts_reinit
 };
 
 struct enc_xform enc_xform_arc4 = {
 	CRYPTO_ARC4, "ARC4",
 	ARC4_BLOCK_LEN, ARC4_IV_LEN, ARC4_MIN_KEY, ARC4_MAX_KEY,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 };
 
 struct enc_xform enc_xform_camellia = {
 	CRYPTO_CAMELLIA_CBC, "Camellia",
 	CAMELLIA_BLOCK_LEN, CAMELLIA_BLOCK_LEN, CAMELLIA_MIN_KEY,
 	CAMELLIA_MAX_KEY,
 	cml_encrypt,
 	cml_decrypt,
 	cml_setkey,
 	cml_zerokey,
 	NULL,
 };
 
 /* Authentication instances */
 struct auth_hash auth_hash_null = {	/* NB: context isn't used */
 	CRYPTO_NULL_HMAC, "NULL-HMAC",
 	NULL_HMAC_KEY_LEN, NULL_HASH_LEN, sizeof(int), NULL_HMAC_BLOCK_LEN,
 	null_init, null_reinit, null_reinit, null_update, null_final
 };
 
 struct auth_hash auth_hash_hmac_md5 = {
 	CRYPTO_MD5_HMAC, "HMAC-MD5",
 	MD5_HMAC_KEY_LEN, MD5_HASH_LEN, sizeof(MD5_CTX), MD5_HMAC_BLOCK_LEN,
 	(void (*) (void *)) MD5Init, NULL, NULL, MD5Update_int,
 	(void (*) (u_int8_t *, void *)) MD5Final
 };
 
 struct auth_hash auth_hash_hmac_sha1 = {
 	CRYPTO_SHA1_HMAC, "HMAC-SHA1",
 	SHA1_HMAC_KEY_LEN, SHA1_HASH_LEN, sizeof(SHA1_CTX), SHA1_HMAC_BLOCK_LEN,
 	SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int
 };
 
 struct auth_hash auth_hash_hmac_ripemd_160 = {
 	CRYPTO_RIPEMD160_HMAC, "HMAC-RIPEMD-160",
 	RIPEMD160_HMAC_KEY_LEN, RIPEMD160_HASH_LEN, sizeof(RMD160_CTX),
 	RIPEMD160_HMAC_BLOCK_LEN,
 	(void (*)(void *)) RMD160Init, NULL, NULL, RMD160Update_int,
 	(void (*)(u_int8_t *, void *)) RMD160Final
 };
 
 struct auth_hash auth_hash_key_md5 = {
 	CRYPTO_MD5_KPDK, "Keyed MD5",
 	NULL_HMAC_KEY_LEN, MD5_KPDK_HASH_LEN, sizeof(MD5_CTX), 0,
 	(void (*)(void *)) MD5Init, NULL, NULL, MD5Update_int,
 	(void (*)(u_int8_t *, void *)) MD5Final
 };
 
 struct auth_hash auth_hash_key_sha1 = {
 	CRYPTO_SHA1_KPDK, "Keyed SHA1",
 	NULL_HMAC_KEY_LEN, SHA1_KPDK_HASH_LEN, sizeof(SHA1_CTX), 0,
 	SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int
 };
 
 struct auth_hash auth_hash_hmac_sha2_256 = {
 	CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256",
 	SHA2_256_HMAC_KEY_LEN, SHA2_256_HASH_LEN, sizeof(SHA256_CTX),
 	SHA2_256_HMAC_BLOCK_LEN,
 	(void (*)(void *)) SHA256_Init, NULL, NULL, SHA256Update_int,
 	(void (*)(u_int8_t *, void *)) SHA256_Final
 };
 
 struct auth_hash auth_hash_hmac_sha2_384 = {
 	CRYPTO_SHA2_384_HMAC, "HMAC-SHA2-384",
 	SHA2_384_HMAC_KEY_LEN, SHA2_384_HASH_LEN, sizeof(SHA384_CTX),
 	SHA2_384_HMAC_BLOCK_LEN,
 	(void (*)(void *)) SHA384_Init, NULL, NULL, SHA384Update_int,
 	(void (*)(u_int8_t *, void *)) SHA384_Final
 };
 
 struct auth_hash auth_hash_hmac_sha2_512 = {
 	CRYPTO_SHA2_512_HMAC, "HMAC-SHA2-512",
 	SHA2_512_HMAC_KEY_LEN, SHA2_512_HASH_LEN, sizeof(SHA512_CTX),
 	SHA2_512_HMAC_BLOCK_LEN,
 	(void (*)(void *)) SHA512_Init, NULL, NULL, SHA512Update_int,
 	(void (*)(u_int8_t *, void *)) SHA512_Final
 };
 
 struct auth_hash auth_hash_nist_gmac_aes_128 = {
 	CRYPTO_AES_128_NIST_GMAC, "GMAC-AES-128",
 	AES_128_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx),
 	GMAC_BLOCK_LEN,
 	(void (*)(void *)) AES_GMAC_Init,
 	(void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey,
 	(void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit,
 	(int  (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update,
 	(void (*)(u_int8_t *, void *)) AES_GMAC_Final
 };
 
 struct auth_hash auth_hash_nist_gmac_aes_192 = {
 	CRYPTO_AES_192_NIST_GMAC, "GMAC-AES-192",
 	AES_192_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx),
 	GMAC_BLOCK_LEN,
 	(void (*)(void *)) AES_GMAC_Init,
 	(void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey,
 	(void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit,
 	(int  (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update,
 	(void (*)(u_int8_t *, void *)) AES_GMAC_Final
 };
 
 struct auth_hash auth_hash_nist_gmac_aes_256 = {
 	CRYPTO_AES_256_NIST_GMAC, "GMAC-AES-256",
 	AES_256_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx),
 	GMAC_BLOCK_LEN,
 	(void (*)(void *)) AES_GMAC_Init,
 	(void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey,
 	(void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit,
 	(int  (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update,
 	(void (*)(u_int8_t *, void *)) AES_GMAC_Final
 };
 
 /* Compression instance */
 struct comp_algo comp_algo_deflate = {
 	CRYPTO_DEFLATE_COMP, "Deflate",
 	90, deflate_compress,
 	deflate_decompress
 };
 
 /*
  * Encryption wrapper routines.
  */
 static void
 null_encrypt(caddr_t key, u_int8_t *blk)
 {
 }
 static void
 null_decrypt(caddr_t key, u_int8_t *blk)
 {
 }
 static int
 null_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	*sched = NULL;
 	return 0;
 }
 static void
 null_zerokey(u_int8_t **sched)
 {
 	*sched = NULL;
 }
 
 static void
 des1_encrypt(caddr_t key, u_int8_t *blk)
 {
 	des_cblock *cb = (des_cblock *) blk;
 	des_key_schedule *p = (des_key_schedule *) key;
 
 	des_ecb_encrypt(cb, cb, p[0], DES_ENCRYPT);
 }
 
 static void
 des1_decrypt(caddr_t key, u_int8_t *blk)
 {
 	des_cblock *cb = (des_cblock *) blk;
 	des_key_schedule *p = (des_key_schedule *) key;
 
 	des_ecb_encrypt(cb, cb, p[0], DES_DECRYPT);
 }
 
 static int
 des1_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	des_key_schedule *p;
 	int err;
 
 	p = malloc(sizeof (des_key_schedule),
 		M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
 	if (p != NULL) {
 		des_set_key((des_cblock *) key, p[0]);
 		err = 0;
 	} else
 		err = ENOMEM;
 	*sched = (u_int8_t *) p;
 	return err;
 }
 
 static void
 des1_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, sizeof (des_key_schedule));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 static void
 des3_encrypt(caddr_t key, u_int8_t *blk)
 {
 	des_cblock *cb = (des_cblock *) blk;
 	des_key_schedule *p = (des_key_schedule *) key;
 
 	des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_ENCRYPT);
 }
 
 static void
 des3_decrypt(caddr_t key, u_int8_t *blk)
 {
 	des_cblock *cb = (des_cblock *) blk;
 	des_key_schedule *p = (des_key_schedule *) key;
 
 	des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_DECRYPT);
 }
 
 static int
 des3_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	des_key_schedule *p;
 	int err;
 
 	p = malloc(3*sizeof (des_key_schedule),
 		M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
 	if (p != NULL) {
 		des_set_key((des_cblock *)(key +  0), p[0]);
 		des_set_key((des_cblock *)(key +  8), p[1]);
 		des_set_key((des_cblock *)(key + 16), p[2]);
 		err = 0;
 	} else
 		err = ENOMEM;
 	*sched = (u_int8_t *) p;
 	return err;
 }
 
 static void
 des3_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, 3*sizeof (des_key_schedule));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 static void
 blf_encrypt(caddr_t key, u_int8_t *blk)
 {
 	BF_LONG t[2];
 
 	memcpy(t, blk, sizeof (t));
 	t[0] = ntohl(t[0]);
 	t[1] = ntohl(t[1]);
 	/* NB: BF_encrypt expects the block in host order! */
 	BF_encrypt(t, (BF_KEY *) key);
 	t[0] = htonl(t[0]);
 	t[1] = htonl(t[1]);
 	memcpy(blk, t, sizeof (t));
 }
 
 static void
 blf_decrypt(caddr_t key, u_int8_t *blk)
 {
 	BF_LONG t[2];
 
 	memcpy(t, blk, sizeof (t));
 	t[0] = ntohl(t[0]);
 	t[1] = ntohl(t[1]);
 	/* NB: BF_decrypt expects the block in host order! */
 	BF_decrypt(t, (BF_KEY *) key);
 	t[0] = htonl(t[0]);
 	t[1] = htonl(t[1]);
 	memcpy(blk, t, sizeof (t));
 }
 
 static int
 blf_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	int err;
 
 	*sched = malloc(sizeof(BF_KEY),
 		M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
 	if (*sched != NULL) {
 		BF_set_key((BF_KEY *) *sched, len, key);
 		err = 0;
 	} else
 		err = ENOMEM;
 	return err;
 }
 
 static void
 blf_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, sizeof(BF_KEY));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 static void
 cast5_encrypt(caddr_t key, u_int8_t *blk)
 {
 	cast_encrypt((cast_key *) key, blk, blk);
 }
 
 static void
 cast5_decrypt(caddr_t key, u_int8_t *blk)
 {
 	cast_decrypt((cast_key *) key, blk, blk);
 }
 
 static int
 cast5_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	int err;
 
 	*sched = malloc(sizeof(cast_key), M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
 	if (*sched != NULL) {
 		cast_setkey((cast_key *)*sched, key, len);
 		err = 0;
 	} else
 		err = ENOMEM;
 	return err;
 }
 
 static void
 cast5_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, sizeof(cast_key));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 static void
 skipjack_encrypt(caddr_t key, u_int8_t *blk)
 {
 	skipjack_forwards(blk, blk, (u_int8_t **) key);
 }
 
 static void
 skipjack_decrypt(caddr_t key, u_int8_t *blk)
 {
 	skipjack_backwards(blk, blk, (u_int8_t **) key);
 }
 
 static int
 skipjack_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	int err;
 
 	/* NB: allocate all the memory that's needed at once */
 	*sched = malloc(10 * (sizeof(u_int8_t *) + 0x100),
 		M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
 	if (*sched != NULL) {
 		u_int8_t** key_tables = (u_int8_t**) *sched;
 		u_int8_t* table = (u_int8_t*) &key_tables[10];
 		int k;
 
 		for (k = 0; k < 10; k++) {
 			key_tables[k] = table;
 			table += 0x100;
 		}
 		subkey_table_gen(key, (u_int8_t **) *sched);
 		err = 0;
 	} else
 		err = ENOMEM;
 	return err;
 }
 
 static void
 skipjack_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, 10 * (sizeof(u_int8_t *) + 0x100));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 static void
 rijndael128_encrypt(caddr_t key, u_int8_t *blk)
 {
 	rijndael_encrypt((rijndael_ctx *) key, (u_char *) blk, (u_char *) blk);
 }
 
 static void
 rijndael128_decrypt(caddr_t key, u_int8_t *blk)
 {
 	rijndael_decrypt(((rijndael_ctx *) key), (u_char *) blk,
 	    (u_char *) blk);
 }
 
 static int
 rijndael128_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	int err;
 
 	if (len != 16 && len != 24 && len != 32)
 		return (EINVAL);
 	*sched = malloc(sizeof(rijndael_ctx), M_CRYPTO_DATA,
 	    M_NOWAIT|M_ZERO);
 	if (*sched != NULL) {
 		rijndael_set_key((rijndael_ctx *) *sched, (u_char *) key,
 		    len * 8);
 		err = 0;
 	} else
 		err = ENOMEM;
 	return err;
 }
 
 static void
 rijndael128_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, sizeof(rijndael_ctx));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 void
 aes_icm_reinit(caddr_t key, u_int8_t *iv)
 {
 	struct aes_icm_ctx *ctx;
 
 	ctx = (struct aes_icm_ctx *)key;
 	bcopy(iv, ctx->ac_block, AESICM_BLOCKSIZE);
 }
 
 void
 aes_gcm_reinit(caddr_t key, u_int8_t *iv)
 {
 	struct aes_icm_ctx *ctx;
 
 	aes_icm_reinit(key, iv);
 
 	ctx = (struct aes_icm_ctx *)key;
 	/* GCM starts with 2 as counter 1 is used for final xor of tag. */
 	bzero(&ctx->ac_block[AESICM_BLOCKSIZE - 4], 4);
 	ctx->ac_block[AESICM_BLOCKSIZE - 1] = 2;
 }
 
 void
 aes_icm_crypt(caddr_t key, u_int8_t *data)
 {
 	struct aes_icm_ctx *ctx;
 	u_int8_t keystream[AESICM_BLOCKSIZE];
 	int i;
 
 	ctx = (struct aes_icm_ctx *)key;
 	rijndaelEncrypt(ctx->ac_ek, ctx->ac_nr, ctx->ac_block, keystream);
 	for (i = 0; i < AESICM_BLOCKSIZE; i++)
 		data[i] ^= keystream[i];
 	explicit_bzero(keystream, sizeof(keystream));
 
 	/* increment counter */
 	for (i = AESICM_BLOCKSIZE - 1;
 	     i >= 0; i--)
 		if (++ctx->ac_block[i])   /* continue on overflow */
 			break;
 }
 
 int
 aes_icm_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	struct aes_icm_ctx *ctx;
 
 	if (len != 16 && len != 24 && len != 32)
 		return EINVAL;
 
 	*sched = malloc(sizeof(struct aes_icm_ctx), M_CRYPTO_DATA,
 	    M_NOWAIT | M_ZERO);
 	if (*sched == NULL)
 		return ENOMEM;
 
 	ctx = (struct aes_icm_ctx *)*sched;
 	ctx->ac_nr = rijndaelKeySetupEnc(ctx->ac_ek, (u_char *)key, len * 8);
 	return 0;
 }
 
 void
 aes_icm_zerokey(u_int8_t **sched)
 {
 
 	bzero(*sched, sizeof(struct aes_icm_ctx));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 #define	AES_XTS_BLOCKSIZE	16
 #define	AES_XTS_IVSIZE		8
 #define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
 
 struct aes_xts_ctx {
 	rijndael_ctx key1;
 	rijndael_ctx key2;
 	u_int8_t tweak[AES_XTS_BLOCKSIZE];
 };
 
 void
 aes_xts_reinit(caddr_t key, u_int8_t *iv)
 {
 	struct aes_xts_ctx *ctx = (struct aes_xts_ctx *)key;
 	u_int64_t blocknum;
 	u_int i;
 
 	/*
 	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
 	 * of a 64-bit block number which we allow to be passed in directly.
 	 */
 	bcopy(iv, &blocknum, AES_XTS_IVSIZE);
 	for (i = 0; i < AES_XTS_IVSIZE; i++) {
 		ctx->tweak[i] = blocknum & 0xff;
 		blocknum >>= 8;
 	}
 	/* Last 64 bits of IV are always zero */
 	bzero(ctx->tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
 
 	rijndael_encrypt(&ctx->key2, ctx->tweak, ctx->tweak);
 }
 
 static void
 aes_xts_crypt(struct aes_xts_ctx *ctx, u_int8_t *data, u_int do_encrypt)
 {
 	u_int8_t block[AES_XTS_BLOCKSIZE];
 	u_int i, carry_in, carry_out;
 
 	for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
 		block[i] = data[i] ^ ctx->tweak[i];
 
 	if (do_encrypt)
 		rijndael_encrypt(&ctx->key1, block, data);
 	else
 		rijndael_decrypt(&ctx->key1, block, data);
 
 	for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
 		data[i] ^= ctx->tweak[i];
 
 	/* Exponentiate tweak */
 	carry_in = 0;
 	for (i = 0; i < AES_XTS_BLOCKSIZE; i++) {
 		carry_out = ctx->tweak[i] & 0x80;
 		ctx->tweak[i] = (ctx->tweak[i] << 1) | (carry_in ? 1 : 0);
 		carry_in = carry_out;
 	}
 	if (carry_in)
 		ctx->tweak[0] ^= AES_XTS_ALPHA;
 	bzero(block, sizeof(block));
 }
 
 void
 aes_xts_encrypt(caddr_t key, u_int8_t *data)
 {
 	aes_xts_crypt((struct aes_xts_ctx *)key, data, 1);
 }
 
 void
 aes_xts_decrypt(caddr_t key, u_int8_t *data)
 {
 	aes_xts_crypt((struct aes_xts_ctx *)key, data, 0);
 }
 
 int
 aes_xts_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	struct aes_xts_ctx *ctx;
 
 	if (len != 32 && len != 64)
 		return EINVAL;
 
 	*sched = malloc(sizeof(struct aes_xts_ctx), M_CRYPTO_DATA,
 	    M_NOWAIT | M_ZERO);
 	if (*sched == NULL)
 		return ENOMEM;
 	ctx = (struct aes_xts_ctx *)*sched;
 
 	rijndael_set_key(&ctx->key1, key, len * 4);
 	rijndael_set_key(&ctx->key2, key + (len / 2), len * 4);
 
 	return 0;
 }
 
 void
 aes_xts_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, sizeof(struct aes_xts_ctx));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 static void
 cml_encrypt(caddr_t key, u_int8_t *blk)
 {
 	camellia_encrypt((camellia_ctx *) key, (u_char *) blk, (u_char *) blk);
 }
 
 static void
 cml_decrypt(caddr_t key, u_int8_t *blk)
 {
 	camellia_decrypt(((camellia_ctx *) key), (u_char *) blk,
 	    (u_char *) blk);
 }
 
 static int
 cml_setkey(u_int8_t **sched, u_int8_t *key, int len)
 {
 	int err;
 
 	if (len != 16 && len != 24 && len != 32)
 		return (EINVAL);
 	*sched = malloc(sizeof(camellia_ctx), M_CRYPTO_DATA,
 	    M_NOWAIT|M_ZERO);
 	if (*sched != NULL) {
 		camellia_set_key((camellia_ctx *) *sched, (u_char *) key,
 		    len * 8);
 		err = 0;
 	} else
 		err = ENOMEM;
 	return err;
 }
 
 static void
 cml_zerokey(u_int8_t **sched)
 {
 	bzero(*sched, sizeof(camellia_ctx));
 	free(*sched, M_CRYPTO_DATA);
 	*sched = NULL;
 }
 
 /*
  * And now for auth.
  */
 
 static void
 null_init(void *ctx)
 {
 }
 
 static void
 null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 }
 
 static int
 null_update(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 	return 0;
 }
 
 static void
 null_final(u_int8_t *buf, void *ctx)
 {
 	if (buf != (u_int8_t *) 0)
 		bzero(buf, 12);
 }
 
 static int
 RMD160Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 	RMD160Update(ctx, buf, len);
 	return 0;
 }
 
 static int
 MD5Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 	MD5Update(ctx, buf, len);
 	return 0;
 }
 
 static void
 SHA1Init_int(void *ctx)
 {
 	SHA1Init(ctx);
 }
 
 static int
 SHA1Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 	SHA1Update(ctx, buf, len);
 	return 0;
 }
 
 static void
 SHA1Final_int(u_int8_t *blk, void *ctx)
 {
 	SHA1Final(blk, ctx);
 }
 
 static int
 SHA256Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 	SHA256_Update(ctx, buf, len);
 	return 0;
 }
 
 static int
 SHA384Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 	SHA384_Update(ctx, buf, len);
 	return 0;
 }
 
 static int
 SHA512Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
 {
 	SHA512_Update(ctx, buf, len);
 	return 0;
 }
 
 /*
  * And compression
  */
 
 static u_int32_t
 deflate_compress(data, size, out)
 	u_int8_t *data;
 	u_int32_t size;
 	u_int8_t **out;
 {
 	return deflate_global(data, size, 0, out);
 }
 
 static u_int32_t
 deflate_decompress(data, size, out)
 	u_int8_t *data;
 	u_int32_t size;
 	u_int8_t **out;
 {
 	return deflate_global(data, size, 1, out);
 }
Index: projects/powernv/powerpc/aim/mmu_oea64.c
===================================================================
--- projects/powernv/powerpc/aim/mmu_oea64.c	(revision 290990)
+++ projects/powernv/powerpc/aim/mmu_oea64.c	(revision 290991)
@@ -1,2725 +1,2725 @@
 /*-
  * Copyright (c) 2008-2015 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Manages physical address maps.
  *
  * Since the information managed by this module is also stored by the
  * logical address mapping module, this module may throw away valid virtual
  * to physical mappings at almost any time.  However, invalidations of
  * mappings must be done as requested.
  *
  * In order to cope with hardware architectures which make virtual to
  * physical map invalidates expensive, this module may delay invalidate
  * reduced protection operations until such time as they are actually
  * necessary.  This module is given full information as to which processors
  * are currently using which maps, and to when physical maps must be made
  * correct.
  */
 
 #include "opt_compat.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/queue.h>
 #include <sys/cpuset.h>
 #include <sys/kerneldump.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/msgbuf.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 #include <sys/smp.h>
 
 #include <sys/kdb.h>
 
 #include <dev/ofw/openfirm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/uma.h>
 
 #include <machine/_inttypes.h>
 #include <machine/cpu.h>
 #include <machine/platform.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/psl.h>
 #include <machine/bat.h>
 #include <machine/hid.h>
 #include <machine/pte.h>
 #include <machine/sr.h>
 #include <machine/trap.h>
 #include <machine/mmuvar.h>
 
 #include "mmu_oea64.h"
 #include "mmu_if.h"
 #include "moea64_if.h"
 
 void moea64_release_vsid(uint64_t vsid);
 uintptr_t moea64_get_unique_vsid(void); 
 
 #define DISABLE_TRANS(msr)	msr = mfmsr(); mtmsr(msr & ~PSL_DR)
 #define ENABLE_TRANS(msr)	mtmsr(msr)
 
 #define	VSID_MAKE(sr, hash)	((sr) | (((hash) & 0xfffff) << 4))
 #define	VSID_TO_HASH(vsid)	(((vsid) >> 4) & 0xfffff)
 #define	VSID_HASH_MASK		0x0000007fffffffffULL
 
 /*
  * Locking semantics:
  * 
  * There are two locks of interest: the page locks and the pmap locks, which
  * protect their individual PVO lists and are locked in that order. The contents
  * of all PVO entries are protected by the locks of their respective pmaps.
  * The pmap of any PVO is guaranteed not to change so long as the PVO is linked
  * into any list.
  *
  */
 
 #define PV_LOCK_COUNT	PA_LOCK_COUNT*3
 static struct mtx_padalign pv_lock[PV_LOCK_COUNT];
  
 #define PV_LOCKPTR(pa)	((struct mtx *)(&pv_lock[pa_index(pa) % PV_LOCK_COUNT]))
 #define PV_LOCK(pa)		mtx_lock(PV_LOCKPTR(pa))
 #define PV_UNLOCK(pa)		mtx_unlock(PV_LOCKPTR(pa))
 #define PV_LOCKASSERT(pa) 	mtx_assert(PV_LOCKPTR(pa), MA_OWNED)
 #define PV_PAGE_LOCK(m)		PV_LOCK(VM_PAGE_TO_PHYS(m))
 #define PV_PAGE_UNLOCK(m)	PV_UNLOCK(VM_PAGE_TO_PHYS(m))
 #define PV_PAGE_LOCKASSERT(m)	PV_LOCKASSERT(VM_PAGE_TO_PHYS(m))
 
 struct ofw_map {
 	cell_t	om_va;
 	cell_t	om_len;
 	uint64_t om_pa;
 	cell_t	om_mode;
 };
 
 extern unsigned char _etext[];
 extern unsigned char _end[];
 
 /*
  * Map of physical memory regions.
  */
 static struct	mem_region *regions;
 static struct	mem_region *pregions;
 static u_int	phys_avail_count;
 static int	regions_sz, pregions_sz;
 
 extern void bs_remap_earlyboot(void);
 
 /*
  * Lock for the SLB tables.
  */
 struct mtx	moea64_slb_mutex;
 
 /*
  * PTEG data.
  */
 u_int		moea64_pteg_count;
 u_int		moea64_pteg_mask;
 
 /*
  * PVO data.
  */
 
 uma_zone_t	moea64_pvo_zone; /* zone for pvo entries */
 
 static struct	pvo_entry *moea64_bpvo_pool;
 static int	moea64_bpvo_pool_index = 0;
 static int	moea64_bpvo_pool_size = 327680;
 TUNABLE_INT("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size);
 SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD, 
     &moea64_bpvo_pool_index, 0, "");
 
 #define	VSID_NBPW	(sizeof(u_int32_t) * 8)
 #ifdef __powerpc64__
 #define	NVSIDS		(NPMAPS * 16)
 #define VSID_HASHMASK	0xffffffffUL
 #else
 #define NVSIDS		NPMAPS
 #define VSID_HASHMASK	0xfffffUL
 #endif
 static u_int	moea64_vsid_bitmap[NVSIDS / VSID_NBPW];
 
 static boolean_t moea64_initialized = FALSE;
 
 /*
  * Statistics.
  */
 u_int	moea64_pte_valid = 0;
 u_int	moea64_pte_overflow = 0;
 u_int	moea64_pvo_entries = 0;
 u_int	moea64_pvo_enter_calls = 0;
 u_int	moea64_pvo_remove_calls = 0;
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, 
     &moea64_pte_valid, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD,
     &moea64_pte_overflow, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, 
     &moea64_pvo_entries, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD,
     &moea64_pvo_enter_calls, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD,
     &moea64_pvo_remove_calls, 0, "");
 
 vm_offset_t	moea64_scratchpage_va[2];
 struct pvo_entry *moea64_scratchpage_pvo[2];
 struct	mtx	moea64_scratchpage_mtx;
 
 uint64_t 	moea64_large_page_mask = 0;
 uint64_t	moea64_large_page_size = 0;
 int		moea64_large_page_shift = 0;
 
 /*
  * PVO calls.
  */
 static int	moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo,
 		    struct pvo_head *pvo_head);
 static void	moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo);
 static void	moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo);
 static struct	pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t);
 
 /*
  * Utility routines.
  */
 static boolean_t	moea64_query_bit(mmu_t, vm_page_t, uint64_t);
 static u_int		moea64_clear_bit(mmu_t, vm_page_t, uint64_t);
 static void		moea64_kremove(mmu_t, vm_offset_t);
 static void		moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, 
 			    vm_paddr_t pa, vm_size_t sz);
 static void		moea64_pmap_init_qpages(void);
 
 /*
  * Kernel MMU interface
  */
 void moea64_clear_modify(mmu_t, vm_page_t);
 void moea64_copy_page(mmu_t, vm_page_t, vm_page_t);
 void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
     vm_page_t *mb, vm_offset_t b_offset, int xfersize);
 int moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t,
     u_int flags, int8_t psind);
 void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
     vm_prot_t);
 void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
 vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t);
 vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t);
 void moea64_init(mmu_t);
 boolean_t moea64_is_modified(mmu_t, vm_page_t);
 boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
 boolean_t moea64_is_referenced(mmu_t, vm_page_t);
 int moea64_ts_referenced(mmu_t, vm_page_t);
 vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
 boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t);
 int moea64_page_wired_mappings(mmu_t, vm_page_t);
 void moea64_pinit(mmu_t, pmap_t);
 void moea64_pinit0(mmu_t, pmap_t);
 void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
 void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
 void moea64_qremove(mmu_t, vm_offset_t, int);
 void moea64_release(mmu_t, pmap_t);
 void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
 void moea64_remove_pages(mmu_t, pmap_t);
 void moea64_remove_all(mmu_t, vm_page_t);
 void moea64_remove_write(mmu_t, vm_page_t);
 void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
 void moea64_zero_page(mmu_t, vm_page_t);
 void moea64_zero_page_area(mmu_t, vm_page_t, int, int);
 void moea64_zero_page_idle(mmu_t, vm_page_t);
 void moea64_activate(mmu_t, struct thread *);
 void moea64_deactivate(mmu_t, struct thread *);
 void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t);
 void *moea64_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t);
 void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t);
 vm_paddr_t moea64_kextract(mmu_t, vm_offset_t);
 void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma);
 void moea64_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t ma);
 void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t);
 boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
 static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
 void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz,
     void **va);
 void moea64_scan_init(mmu_t mmu);
 vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m);
 void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr);
 
 static mmu_method_t moea64_methods[] = {
 	MMUMETHOD(mmu_clear_modify,	moea64_clear_modify),
 	MMUMETHOD(mmu_copy_page,	moea64_copy_page),
 	MMUMETHOD(mmu_copy_pages,	moea64_copy_pages),
 	MMUMETHOD(mmu_enter,		moea64_enter),
 	MMUMETHOD(mmu_enter_object,	moea64_enter_object),
 	MMUMETHOD(mmu_enter_quick,	moea64_enter_quick),
 	MMUMETHOD(mmu_extract,		moea64_extract),
 	MMUMETHOD(mmu_extract_and_hold,	moea64_extract_and_hold),
 	MMUMETHOD(mmu_init,		moea64_init),
 	MMUMETHOD(mmu_is_modified,	moea64_is_modified),
 	MMUMETHOD(mmu_is_prefaultable,	moea64_is_prefaultable),
 	MMUMETHOD(mmu_is_referenced,	moea64_is_referenced),
 	MMUMETHOD(mmu_ts_referenced,	moea64_ts_referenced),
 	MMUMETHOD(mmu_map,     		moea64_map),
 	MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick),
 	MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings),
 	MMUMETHOD(mmu_pinit,		moea64_pinit),
 	MMUMETHOD(mmu_pinit0,		moea64_pinit0),
 	MMUMETHOD(mmu_protect,		moea64_protect),
 	MMUMETHOD(mmu_qenter,		moea64_qenter),
 	MMUMETHOD(mmu_qremove,		moea64_qremove),
 	MMUMETHOD(mmu_release,		moea64_release),
 	MMUMETHOD(mmu_remove,		moea64_remove),
 	MMUMETHOD(mmu_remove_pages,	moea64_remove_pages),
 	MMUMETHOD(mmu_remove_all,      	moea64_remove_all),
 	MMUMETHOD(mmu_remove_write,	moea64_remove_write),
 	MMUMETHOD(mmu_sync_icache,	moea64_sync_icache),
 	MMUMETHOD(mmu_unwire,		moea64_unwire),
 	MMUMETHOD(mmu_zero_page,       	moea64_zero_page),
 	MMUMETHOD(mmu_zero_page_area,	moea64_zero_page_area),
 	MMUMETHOD(mmu_zero_page_idle,	moea64_zero_page_idle),
 	MMUMETHOD(mmu_activate,		moea64_activate),
 	MMUMETHOD(mmu_deactivate,      	moea64_deactivate),
 	MMUMETHOD(mmu_page_set_memattr,	moea64_page_set_memattr),
 	MMUMETHOD(mmu_quick_enter_page, moea64_quick_enter_page),
 	MMUMETHOD(mmu_quick_remove_page, moea64_quick_remove_page),
 
 	/* Internal interfaces */
 	MMUMETHOD(mmu_mapdev,		moea64_mapdev),
 	MMUMETHOD(mmu_mapdev_attr,	moea64_mapdev_attr),
 	MMUMETHOD(mmu_unmapdev,		moea64_unmapdev),
 	MMUMETHOD(mmu_kextract,		moea64_kextract),
 	MMUMETHOD(mmu_kenter,		moea64_kenter),
 	MMUMETHOD(mmu_kenter_attr,	moea64_kenter_attr),
 	MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped),
 	MMUMETHOD(mmu_scan_init,	moea64_scan_init),
 	MMUMETHOD(mmu_dumpsys_map,	moea64_dumpsys_map),
 
 	{ 0, 0 }
 };
 
 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0);
 
 static struct pvo_head *
 vm_page_to_pvoh(vm_page_t m)
 {
 
 	mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED);
 	return (&m->md.mdpg_pvoh);
 }
 
 static struct pvo_entry *
 alloc_pvo_entry(int bootstrap)
 {
 	struct pvo_entry *pvo;
 
 	if (!moea64_initialized || bootstrap) {
 		if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) {
 			panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd",
 			      moea64_bpvo_pool_index, moea64_bpvo_pool_size, 
 			      moea64_bpvo_pool_size * sizeof(struct pvo_entry));
 		}
 		pvo = &moea64_bpvo_pool[
 		    atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)];
 		bzero(pvo, sizeof(*pvo));
 		pvo->pvo_vaddr = PVO_BOOTSTRAP;
 	} else {
 		pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT);
 		bzero(pvo, sizeof(*pvo));
 	}
 
 	return (pvo);
 }
 
 
 static void
 init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va)
 {
 	uint64_t vsid;
 	uint64_t hash;
 	int shift;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	pvo->pvo_pmap = pmap;
 	va &= ~ADDR_POFF;
 	pvo->pvo_vaddr |= va;
 	vsid = va_to_vsid(pmap, va);
 	pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
 	    | (vsid << 16);
 
 	shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift :
 	    ADDR_PIDX_SHFT;
 	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift);
 	pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3;
 }
 
 static void
 free_pvo_entry(struct pvo_entry *pvo)
 {
 
 	if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
 		uma_zfree(moea64_pvo_zone, pvo);
 }
 
 void
 moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte)
 {
 
 	lpte->pte_hi = (pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) &
 	    LPTE_AVPN_MASK;
 	lpte->pte_hi |= LPTE_VALID;
 	
 	if (pvo->pvo_vaddr & PVO_LARGE)
 		lpte->pte_hi |= LPTE_BIG;
 	if (pvo->pvo_vaddr & PVO_WIRED)
 		lpte->pte_hi |= LPTE_WIRED;
 	if (pvo->pvo_vaddr & PVO_HID)
 		lpte->pte_hi |= LPTE_HID;
 
 	lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */
 	if (pvo->pvo_pte.prot & VM_PROT_WRITE)
 		lpte->pte_lo |= LPTE_BW;
 	else
 		lpte->pte_lo |= LPTE_BR;
 
 	if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE))
 		lpte->pte_lo |= LPTE_NOEXEC;
 }
 
 static __inline uint64_t
 moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
 {
 	uint64_t pte_lo;
 	int i;
 
 	if (ma != VM_MEMATTR_DEFAULT) {
 		switch (ma) {
 		case VM_MEMATTR_UNCACHEABLE:
 			return (LPTE_I | LPTE_G);
 		case VM_MEMATTR_WRITE_COMBINING:
 		case VM_MEMATTR_WRITE_BACK:
 		case VM_MEMATTR_PREFETCHABLE:
 			return (LPTE_I);
 		case VM_MEMATTR_WRITE_THROUGH:
 			return (LPTE_W | LPTE_M);
 		}
 	}
 
 	/*
 	 * Assume the page is cache inhibited and access is guarded unless
 	 * it's in our available memory array.
 	 */
 	pte_lo = LPTE_I | LPTE_G;
 	for (i = 0; i < pregions_sz; i++) {
 		if ((pa >= pregions[i].mr_start) &&
 		    (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
 			pte_lo &= ~(LPTE_I | LPTE_G);
 			pte_lo |= LPTE_M;
 			break;
 		}
 	}
 
 	return pte_lo;
 }
 
 /*
  * Quick sort callout for comparing memory regions.
  */
 static int	om_cmp(const void *a, const void *b);
 
 static int
 om_cmp(const void *a, const void *b)
 {
 	const struct	ofw_map *mapa;
 	const struct	ofw_map *mapb;
 
 	mapa = a;
 	mapb = b;
 	if (mapa->om_pa < mapb->om_pa)
 		return (-1);
 	else if (mapa->om_pa > mapb->om_pa)
 		return (1);
 	else
 		return (0);
 }
 
 static void
 moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz)
 {
 	struct ofw_map	translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */
 	pcell_t		acells, trans_cells[sz/sizeof(cell_t)];
 	struct pvo_entry *pvo;
 	register_t	msr;
 	vm_offset_t	off;
 	vm_paddr_t	pa_base;
 	int		i, j;
 
 	bzero(translations, sz);
-	OF_getprop(OF_finddevice("/"), "#address-cells", &acells,
+	OF_getencprop(OF_finddevice("/"), "#address-cells", &acells,
 	    sizeof(acells));
-	if (OF_getprop(mmu, "translations", trans_cells, sz) == -1)
+	if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1)
 		panic("moea64_bootstrap: can't get ofw translations");
 
 	CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations");
 	sz /= sizeof(cell_t);
 	for (i = 0, j = 0; i < sz; j++) {
 		translations[j].om_va = trans_cells[i++];
 		translations[j].om_len = trans_cells[i++];
 		translations[j].om_pa = trans_cells[i++];
 		if (acells == 2) {
 			translations[j].om_pa <<= 32;
 			translations[j].om_pa |= trans_cells[i++];
 		}
 		translations[j].om_mode = trans_cells[i++];
 	}
 	KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)",
 	    i, sz));
 
 	sz = j;
 	qsort(translations, sz, sizeof (*translations), om_cmp);
 
 	for (i = 0; i < sz; i++) {
 		pa_base = translations[i].om_pa;
 	      #ifndef __powerpc64__
 		if ((translations[i].om_pa >> 32) != 0)
 			panic("OFW translations above 32-bit boundary!");
 	      #endif
 
 		if (pa_base % PAGE_SIZE)
 			panic("OFW translation not page-aligned (phys)!");
 		if (translations[i].om_va % PAGE_SIZE)
 			panic("OFW translation not page-aligned (virt)!");
 
 		CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x",
 		    pa_base, translations[i].om_va, translations[i].om_len);
 
 		/* Now enter the pages for this mapping */
 
 		DISABLE_TRANS(msr);
 		for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
 			/* If this address is direct-mapped, skip remapping */
 			if (hw_direct_map && translations[i].om_va == pa_base &&
 			    moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) 			    == LPTE_M)
 				continue;
 
 			PMAP_LOCK(kernel_pmap);
 			pvo = moea64_pvo_find_va(kernel_pmap,
 			    translations[i].om_va + off);
 			PMAP_UNLOCK(kernel_pmap);
 			if (pvo != NULL)
 				continue;
 
 			moea64_kenter(mmup, translations[i].om_va + off,
 			    pa_base + off);
 		}
 		ENABLE_TRANS(msr);
 	}
 }
 
 #ifdef __powerpc64__
 static void
 moea64_probe_large_page(void)
 {
 	uint16_t pvr = mfpvr() >> 16;
 
 	switch (pvr) {
 	case IBM970:
 	case IBM970FX:
 	case IBM970MP:
 		powerpc_sync(); isync();
 		mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG);
 		powerpc_sync(); isync();
 		
 		/* FALLTHROUGH */
 	default:
 		moea64_large_page_size = 0x1000000; /* 16 MB */
 		moea64_large_page_shift = 24;
 	}
 
 	moea64_large_page_mask = moea64_large_page_size - 1;
 }
 
 static void
 moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
 {
 	struct slb *cache;
 	struct slb entry;
 	uint64_t esid, slbe;
 	uint64_t i;
 
 	cache = PCPU_GET(slb);
 	esid = va >> ADDR_SR_SHFT;
 	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
 
 	for (i = 0; i < 64; i++) {
 		if (cache[i].slbe == (slbe | i))
 			return;
 	}
 
 	entry.slbe = slbe;
 	entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT;
 	if (large)
 		entry.slbv |= SLBV_L;
 
 	slb_insert_kernel(entry.slbe, entry.slbv);
 }
 #endif
 
 static void
 moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart,
     vm_offset_t kernelend)
 {
 	struct pvo_entry *pvo;
 	register_t msr;
 	vm_paddr_t pa;
 	vm_offset_t size, off;
 	uint64_t pte_lo;
 	int i;
 
 	if (moea64_large_page_size == 0) 
 		hw_direct_map = 0;
 
 	DISABLE_TRANS(msr);
 	if (hw_direct_map) {
 		PMAP_LOCK(kernel_pmap);
 		for (i = 0; i < pregions_sz; i++) {
 		  for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
 		     pregions[i].mr_size; pa += moea64_large_page_size) {
 			pte_lo = LPTE_M;
 
 			pvo = alloc_pvo_entry(1 /* bootstrap */);
 			pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE;
 			init_pvo_entry(pvo, kernel_pmap, pa);
 
 			/*
 			 * Set memory access as guarded if prefetch within
 			 * the page could exit the available physmem area.
 			 */
 			if (pa & moea64_large_page_mask) {
 				pa &= moea64_large_page_mask;
 				pte_lo |= LPTE_G;
 			}
 			if (pa + moea64_large_page_size >
 			    pregions[i].mr_start + pregions[i].mr_size)
 				pte_lo |= LPTE_G;
 
 			pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE |
 			    VM_PROT_EXECUTE;
 			pvo->pvo_pte.pa = pa | pte_lo;
 			moea64_pvo_enter(mmup, pvo, NULL);
 		  }
 		}
 		PMAP_UNLOCK(kernel_pmap);
 	} else {
 		size = moea64_bpvo_pool_size*sizeof(struct pvo_entry);
 		off = (vm_offset_t)(moea64_bpvo_pool);
 		for (pa = off; pa < off + size; pa += PAGE_SIZE) 
 		moea64_kenter(mmup, pa, pa);
 
 		/*
 		 * Map certain important things, like ourselves.
 		 *
 		 * NOTE: We do not map the exception vector space. That code is
 		 * used only in real mode, and leaving it unmapped allows us to
 		 * catch NULL pointer deferences, instead of making NULL a valid
 		 * address.
 		 */
 
 		for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
 		    pa += PAGE_SIZE) 
 			moea64_kenter(mmup, pa, pa);
 	}
 	ENABLE_TRANS(msr);
 
 	/*
 	 * Allow user to override unmapped_buf_allowed for testing.
 	 * XXXKIB Only direct map implementation was tested.
 	 */
 	if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed",
 	    &unmapped_buf_allowed))
 		unmapped_buf_allowed = hw_direct_map;
 }
 
 void
 moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	int		i, j;
 	vm_size_t	physsz, hwphyssz;
 
 #ifndef __powerpc64__
 	/* We don't have a direct map since there is no BAT */
 	hw_direct_map = 0;
 
 	/* Make sure battable is zero, since we have no BAT */
 	for (i = 0; i < 16; i++) {
 		battable[i].batu = 0;
 		battable[i].batl = 0;
 	}
 #else
 	moea64_probe_large_page();
 
 	/* Use a direct map if we have large page support */
 	if (moea64_large_page_size > 0)
 		hw_direct_map = 1;
 	else
 		hw_direct_map = 0;
 #endif
 
 	/* Get physical memory regions from firmware */
 	mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
 	CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
 
 	if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz)
 		panic("moea64_bootstrap: phys_avail too small");
 
 	phys_avail_count = 0;
 	physsz = 0;
 	hwphyssz = 0;
 	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
 	for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
 		CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)",
 		    regions[i].mr_start, regions[i].mr_start +
 		    regions[i].mr_size, regions[i].mr_size);
 		if (hwphyssz != 0 &&
 		    (physsz + regions[i].mr_size) >= hwphyssz) {
 			if (physsz < hwphyssz) {
 				phys_avail[j] = regions[i].mr_start;
 				phys_avail[j + 1] = regions[i].mr_start +
 				    hwphyssz - physsz;
 				physsz = hwphyssz;
 				phys_avail_count++;
 			}
 			break;
 		}
 		phys_avail[j] = regions[i].mr_start;
 		phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
 		phys_avail_count++;
 		physsz += regions[i].mr_size;
 	}
 
 	/* Check for overlap with the kernel and exception vectors */
 	for (j = 0; j < 2*phys_avail_count; j+=2) {
 		if (phys_avail[j] < EXC_LAST)
 			phys_avail[j] += EXC_LAST;
 
 		if (kernelstart >= phys_avail[j] &&
 		    kernelstart < phys_avail[j+1]) {
 			if (kernelend < phys_avail[j+1]) {
 				phys_avail[2*phys_avail_count] =
 				    (kernelend & ~PAGE_MASK) + PAGE_SIZE;
 				phys_avail[2*phys_avail_count + 1] =
 				    phys_avail[j+1];
 				phys_avail_count++;
 			}
 
 			phys_avail[j+1] = kernelstart & ~PAGE_MASK;
 		}
 
 		if (kernelend >= phys_avail[j] &&
 		    kernelend < phys_avail[j+1]) {
 			if (kernelstart > phys_avail[j]) {
 				phys_avail[2*phys_avail_count] = phys_avail[j];
 				phys_avail[2*phys_avail_count + 1] =
 				    kernelstart & ~PAGE_MASK;
 				phys_avail_count++;
 			}
 
 			phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE;
 		}
 	}
 
 	physmem = btoc(physsz);
 
 #ifdef PTEGCOUNT
 	moea64_pteg_count = PTEGCOUNT;
 #else
 	moea64_pteg_count = 0x1000;
 
 	while (moea64_pteg_count < physmem)
 		moea64_pteg_count <<= 1;
 
 	moea64_pteg_count >>= 1;
 #endif /* PTEGCOUNT */
 }
 
 void
 moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	int		i;
 
 	/*
 	 * Set PTEG mask
 	 */
 	moea64_pteg_mask = moea64_pteg_count - 1;
 
 	/*
 	 * Initialize SLB table lock and page locks
 	 */
 	mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF);
 	for (i = 0; i < PV_LOCK_COUNT; i++)
 		mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF);
 
 	/*
 	 * Initialise the bootstrap pvo pool.
 	 */
 	moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc(
 		moea64_bpvo_pool_size*sizeof(struct pvo_entry), 0);
 	moea64_bpvo_pool_index = 0;
 
 	/*
 	 * Make sure kernel vsid is allocated as well as VSID 0.
 	 */
 	#ifndef __powerpc64__
 	moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW]
 		|= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
 	moea64_vsid_bitmap[0] |= 1;
 	#endif
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	#ifdef __powerpc64__
 	for (i = 0; i < 64; i++) {
 		pcpup->pc_slb[i].slbv = 0;
 		pcpup->pc_slb[i].slbe = 0;
 	}
 	#else
 	for (i = 0; i < 16; i++) 
 		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
 	#endif
 
 	kernel_pmap->pmap_phys = kernel_pmap;
 	CPU_FILL(&kernel_pmap->pm_active);
 	RB_INIT(&kernel_pmap->pmap_pvo);
 
 	PMAP_LOCK_INIT(kernel_pmap);
 
 	/*
 	 * Now map in all the other buffers we allocated earlier
 	 */
 
 	moea64_setup_direct_map(mmup, kernelstart, kernelend);
 }
 
 void
 moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	ihandle_t	mmui;
 	phandle_t	chosen;
 	phandle_t	mmu;
 	ssize_t		sz;
 	int		i;
 	vm_offset_t	pa, va;
 	void		*dpcpu;
 
 	/*
 	 * Set up the Open Firmware pmap and add its mappings if not in real
 	 * mode.
 	 */
 
 	chosen = OF_finddevice("/chosen");
-	if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) {
+	if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) {
 		mmu = OF_instance_to_package(mmui);
 		if (mmu == -1 ||
 		    (sz = OF_getproplen(mmu, "translations")) == -1)
 			sz = 0;
 		if (sz > 6144 /* tmpstksz - 2 KB headroom */)
 			panic("moea64_bootstrap: too many ofw translations");
 
 		if (sz > 0)
 			moea64_add_ofw_mappings(mmup, mmu, sz);
 	}
 
 	/*
 	 * Calculate the last available physical address.
 	 */
 	for (i = 0; phys_avail[i + 2] != 0; i += 2)
 		;
 	Maxmem = powerpc_btop(phys_avail[i + 1]);
 
 	/*
 	 * Initialize MMU and remap early physical mappings
 	 */
 	MMU_CPU_BOOTSTRAP(mmup,0);
 	mtmsr(mfmsr() | PSL_DR | PSL_IR);
 	pmap_bootstrapped++;
 	bs_remap_earlyboot();
 
 	/*
 	 * Set the start and end of kva.
 	 */
 	virtual_avail = VM_MIN_KERNEL_ADDRESS;
 	virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 
 
 	/*
 	 * Map the entire KVA range into the SLB. We must not fault there.
 	 */
 	#ifdef __powerpc64__
 	for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
 		moea64_bootstrap_slb_prefault(va, 0);
 	#endif
 
 	/*
 	 * Figure out how far we can extend virtual_end into segment 16
 	 * without running into existing mappings. Segment 16 is guaranteed
 	 * to contain neither RAM nor devices (at least on Apple hardware),
 	 * but will generally contain some OFW mappings we should not
 	 * step on.
 	 */
 
 	#ifndef __powerpc64__	/* KVA is in high memory on PPC64 */
 	PMAP_LOCK(kernel_pmap);
 	while (virtual_end < VM_MAX_KERNEL_ADDRESS &&
 	    moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL)
 		virtual_end += PAGE_SIZE;
 	PMAP_UNLOCK(kernel_pmap);
 	#endif
 
 	/*
 	 * Allocate a kernel stack with a guard page for thread0 and map it
 	 * into the kernel page map.
 	 */
 	pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE);
 	va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
 	virtual_avail = va + kstack_pages * PAGE_SIZE;
 	CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va);
 	thread0.td_kstack = va;
 	thread0.td_kstack_pages = kstack_pages;
 	for (i = 0; i < kstack_pages; i++) {
 		moea64_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 
 	/*
 	 * Allocate virtual address space for the message buffer.
 	 */
 	pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE);
 	msgbufp = (struct msgbuf *)virtual_avail;
 	va = virtual_avail;
 	virtual_avail += round_page(msgbufsize);
 	while (va < virtual_avail) {
 		moea64_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 
 	/*
 	 * Allocate virtual address space for the dynamic percpu area.
 	 */
 	pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
 	dpcpu = (void *)virtual_avail;
 	va = virtual_avail;
 	virtual_avail += DPCPU_SIZE;
 	while (va < virtual_avail) {
 		moea64_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 	dpcpu_init(dpcpu, 0);
 
 	/*
 	 * Allocate some things for page zeroing. We put this directly
 	 * in the page table and use MOEA64_PTE_REPLACE to avoid any
 	 * of the PVO book-keeping or other parts of the VM system
 	 * from even knowing that this hack exists.
 	 */
 
 	if (!hw_direct_map) {
 		mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL,
 		    MTX_DEF);
 		for (i = 0; i < 2; i++) {
 			moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE;
 			virtual_end -= PAGE_SIZE;
 
 			moea64_kenter(mmup, moea64_scratchpage_va[i], 0);
 
 			PMAP_LOCK(kernel_pmap);
 			moea64_scratchpage_pvo[i] = moea64_pvo_find_va(
 			    kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]);
 			PMAP_UNLOCK(kernel_pmap);
 		}
 	}
 }
 
 static void
 moea64_pmap_init_qpages(void)
 {
 	struct pcpu *pc;
 	int i;
 
 	if (hw_direct_map)
 		return;
 
 	CPU_FOREACH(i) {
 		pc = pcpu_find(i);
 		pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
 		if (pc->pc_qmap_addr == 0)
 			panic("pmap_init_qpages: unable to allocate KVA");
 		PMAP_LOCK(kernel_pmap);
 		pc->pc_qmap_pvo = moea64_pvo_find_va(kernel_pmap, pc->pc_qmap_addr);
 		PMAP_UNLOCK(kernel_pmap);
 		mtx_init(&pc->pc_qmap_lock, "qmap lock", NULL, MTX_DEF);
 	}
 }
 
 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL);
 
 /*
  * Activate a user pmap.  This mostly involves setting some non-CPU
  * state.
  */
 void
 moea64_activate(mmu_t mmu, struct thread *td)
 {
 	pmap_t	pm;
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
 
 	#ifdef __powerpc64__
 	PCPU_SET(userslb, pm->pm_slb);
 	__asm __volatile("slbmte %0, %1; isync" ::
 	    "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
 	#else
 	PCPU_SET(curpmap, pm->pmap_phys);
 	mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid);
 	#endif
 }
 
 void
 moea64_deactivate(mmu_t mmu, struct thread *td)
 {
 	pmap_t	pm;
 
 	__asm __volatile("isync; slbie %0" :: "r"(USER_ADDR));
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
 	#ifdef __powerpc64__
 	PCPU_SET(userslb, NULL);
 	#else
 	PCPU_SET(curpmap, NULL);
 	#endif
 }
 
 void
 moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 {
 	struct	pvo_entry key, *pvo;
 	vm_page_t m;
 	int64_t	refchg;
 
 	key.pvo_vaddr = sva;
 	PMAP_LOCK(pm);
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva;
 	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
 		if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
 			panic("moea64_unwire: pvo %p is missing PVO_WIRED",
 			    pvo);
 		pvo->pvo_vaddr &= ~PVO_WIRED;
 		refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */);
 		if ((pvo->pvo_vaddr & PVO_MANAGED) &&
 		    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
 			if (refchg < 0)
 				refchg = LPTE_CHG;
 			m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
 
 			refchg |= atomic_readandclear_32(&m->md.mdpg_attrs);
 			if (refchg & LPTE_CHG)
 				vm_page_dirty(m);
 			if (refchg & LPTE_REF)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 		}
 		pm->pm_stats.wired_count--;
 	}
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * This goes through and sets the physical address of our
  * special scratch PTE to the PA we want to zero or copy. Because
  * of locking issues (this can get called in pvo_enter() by
  * the UMA allocator), we can't use most other utility functions here
  */
 
 static __inline
 void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_paddr_t pa) {
 
 	KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!"));
 	mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
 
 	moea64_scratchpage_pvo[which]->pvo_pte.pa =
 	    moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa;
 	MOEA64_PTE_REPLACE(mmup, moea64_scratchpage_pvo[which],
 	    MOEA64_PTE_INVALIDATE);
 	isync();
 }
 
 void
 moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t	dst;
 	vm_offset_t	src;
 
 	dst = VM_PAGE_TO_PHYS(mdst);
 	src = VM_PAGE_TO_PHYS(msrc);
 
 	if (hw_direct_map) {
 		bcopy((void *)src, (void *)dst, PAGE_SIZE);
 	} else {
 		mtx_lock(&moea64_scratchpage_mtx);
 
 		moea64_set_scratchpage_pa(mmu, 0, src);
 		moea64_set_scratchpage_pa(mmu, 1, dst);
 
 		bcopy((void *)moea64_scratchpage_va[0], 
 		    (void *)moea64_scratchpage_va[1], PAGE_SIZE);
 
 		mtx_unlock(&moea64_scratchpage_mtx);
 	}
 }
 
 static inline void
 moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		a_cp = (char *)VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]) +
 		    a_pg_offset;
 		b_pg_offset = b_offset & PAGE_MASK;
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		b_cp = (char *)VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]) +
 		    b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 }
 
 static inline void
 moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	mtx_lock(&moea64_scratchpage_mtx);
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		moea64_set_scratchpage_pa(mmu, 0,
 		    VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
 		a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset;
 		b_pg_offset = b_offset & PAGE_MASK;
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		moea64_set_scratchpage_pa(mmu, 1,
 		    VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
 		b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 	mtx_unlock(&moea64_scratchpage_mtx);
 }
 
 void
 moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
 {
 
 	if (hw_direct_map) {
 		moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset,
 		    xfersize);
 	} else {
 		moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset,
 		    xfersize);
 	}
 }
 
 void
 moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
 {
 	vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
 
 	if (size + off > PAGE_SIZE)
 		panic("moea64_zero_page: size + off > PAGE_SIZE");
 
 	if (hw_direct_map) {
 		bzero((caddr_t)pa + off, size);
 	} else {
 		mtx_lock(&moea64_scratchpage_mtx);
 		moea64_set_scratchpage_pa(mmu, 0, pa);
 		bzero((caddr_t)moea64_scratchpage_va[0] + off, size);
 		mtx_unlock(&moea64_scratchpage_mtx);
 	}
 }
 
 /*
  * Zero a page of physical memory by temporarily mapping it
  */
 void
 moea64_zero_page(mmu_t mmu, vm_page_t m)
 {
 	vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
 	vm_offset_t va, off;
 
 	if (!hw_direct_map) {
 		mtx_lock(&moea64_scratchpage_mtx);
 
 		moea64_set_scratchpage_pa(mmu, 0, pa);
 		va = moea64_scratchpage_va[0];
 	} else {
 		va = pa;
 	}
 
 	for (off = 0; off < PAGE_SIZE; off += cacheline_size)
 		__asm __volatile("dcbz 0,%0" :: "r"(va + off));
 
 	if (!hw_direct_map)
 		mtx_unlock(&moea64_scratchpage_mtx);
 }
 
 void
 moea64_zero_page_idle(mmu_t mmu, vm_page_t m)
 {
 
 	moea64_zero_page(mmu, m);
 }
 
 vm_offset_t
 moea64_quick_enter_page(mmu_t mmu, vm_page_t m)
 {
 	struct pvo_entry *pvo;
 	vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
 
 	if (hw_direct_map)
 		return (pa);
 
 	/*
  	 * MOEA64_PTE_REPLACE does some locking, so we can't just grab
 	 * a critical section and access the PCPU data like on i386.
 	 * Instead, pin the thread and grab the PCPU lock to prevent
 	 * a preempting thread from using the same PCPU data.
 	 */
 	sched_pin();
 
 	mtx_assert(PCPU_PTR(qmap_lock), MA_NOTOWNED);
 	pvo = PCPU_GET(qmap_pvo);
 
 	mtx_lock(PCPU_PTR(qmap_lock));
 	pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) |
 	    (uint64_t)pa;
 	MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE);
 	isync();
 
 	return (PCPU_GET(qmap_addr));
 }
 
 void
 moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr)
 {
 	if (hw_direct_map)
 		return;
 
 	mtx_assert(PCPU_PTR(qmap_lock), MA_OWNED);
 	KASSERT(PCPU_GET(qmap_addr) == addr,
 	    ("moea64_quick_remove_page: invalid address"));
 	mtx_unlock(PCPU_PTR(qmap_lock));
 	sched_unpin();	
 }
 
 /*
  * Map the given physical page at the specified virtual address in the
  * target pmap with the protection requested.  If specified the page
  * will be wired down.
  */
 
 int
 moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 
     vm_prot_t prot, u_int flags, int8_t psind)
 {
 	struct		pvo_entry *pvo, *oldpvo;
 	struct		pvo_head *pvo_head;
 	uint64_t	pte_lo;
 	int		error;
 
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 
 	pvo = alloc_pvo_entry(0);
 	pvo->pvo_pmap = NULL; /* to be filled in later */
 	pvo->pvo_pte.prot = prot;
 
 	pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m));
 	pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo;
 
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		pvo->pvo_vaddr |= PVO_WIRED;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) {
 		pvo_head = NULL;
 	} else {
 		pvo_head = &m->md.mdpg_pvoh;
 		pvo->pvo_vaddr |= PVO_MANAGED;
 	}
 	
 	for (;;) {
 		PV_PAGE_LOCK(m);
 		PMAP_LOCK(pmap);
 		if (pvo->pvo_pmap == NULL)
 			init_pvo_entry(pvo, pmap, va);
 		if (prot & VM_PROT_WRITE)
 			if (pmap_bootstrapped &&
 			    (m->oflags & VPO_UNMANAGED) == 0)
 				vm_page_aflag_set(m, PGA_WRITEABLE);
 
 		oldpvo = moea64_pvo_find_va(pmap, va);
 		if (oldpvo != NULL) {
 			if (oldpvo->pvo_vaddr == pvo->pvo_vaddr &&
 			    oldpvo->pvo_pte.pa == pvo->pvo_pte.pa &&
 			    oldpvo->pvo_pte.prot == prot) {
 				/* Identical mapping already exists */
 				error = 0;
 
 				/* If not in page table, reinsert it */
 				if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) {
 					moea64_pte_overflow--;
 					MOEA64_PTE_INSERT(mmu, oldpvo);
 				}
 
 				/* Then just clean up and go home */
 				PV_PAGE_UNLOCK(m);
 				PMAP_UNLOCK(pmap);
 				free_pvo_entry(pvo);
 				break;
 			}
 
 			/* Otherwise, need to kill it first */
 			KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old "
 			    "mapping does not match new mapping"));
 			moea64_pvo_remove_from_pmap(mmu, oldpvo);
 		}
 		error = moea64_pvo_enter(mmu, pvo, pvo_head);
 		PV_PAGE_UNLOCK(m);
 		PMAP_UNLOCK(pmap);
 
 		/* Free any dead pages */
 		if (oldpvo != NULL) {
 			PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
 			moea64_pvo_remove_from_page(mmu, oldpvo);
 			PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
 			free_pvo_entry(oldpvo);
 		}
 
 		if (error != ENOMEM)
 			break;
 		if ((flags & PMAP_ENTER_NOSLEEP) != 0)
 			return (KERN_RESOURCE_SHORTAGE);
 		VM_OBJECT_ASSERT_UNLOCKED(m->object);
 		VM_WAIT;
 	}
 
 	/*
 	 * Flush the page from the instruction cache if this page is
 	 * mapped executable and cacheable.
 	 */
 	if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) &&
 	    (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
 		vm_page_aflag_set(m, PGA_EXECUTABLE);
 		moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE);
 	}
 	return (KERN_SUCCESS);
 }
 
 static void
 moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     vm_size_t sz)
 {
 
 	/*
 	 * This is much trickier than on older systems because
 	 * we can't sync the icache on physical addresses directly
 	 * without a direct map. Instead we check a couple of cases
 	 * where the memory is already mapped in and, failing that,
 	 * use the same trick we use for page zeroing to create
 	 * a temporary mapping for this physical address.
 	 */
 
 	if (!pmap_bootstrapped) {
 		/*
 		 * If PMAP is not bootstrapped, we are likely to be
 		 * in real mode.
 		 */
 		__syncicache((void *)pa, sz);
 	} else if (pmap == kernel_pmap) {
 		__syncicache((void *)va, sz);
 	} else if (hw_direct_map) {
 		__syncicache((void *)pa, sz);
 	} else {
 		/* Use the scratch page to set up a temp mapping */
 
 		mtx_lock(&moea64_scratchpage_mtx);
 
 		moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF);
 		__syncicache((void *)(moea64_scratchpage_va[1] + 
 		    (va & ADDR_POFF)), sz);
 
 		mtx_unlock(&moea64_scratchpage_mtx);
 	}
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_page_t m;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	m = m_start;
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		moea64_enter(mmu, pm, start + ptoa(diff), m, prot &
 		    (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0);
 		m = TAILQ_NEXT(m, listq);
 	}
 }
 
 void
 moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m,
     vm_prot_t prot)
 {
 
 	moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
 	    PMAP_ENTER_NOSLEEP, 0);
 }
 
 vm_paddr_t
 moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va)
 {
 	struct	pvo_entry *pvo;
 	vm_paddr_t pa;
 
 	PMAP_LOCK(pm);
 	pvo = moea64_pvo_find_va(pm, va);
 	if (pvo == NULL)
 		pa = 0;
 	else
 		pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo));
 	PMAP_UNLOCK(pm);
 
 	return (pa);
 }
 
 /*
  * Atomically extract and hold the physical page with the given
  * pmap and virtual address pair if that mapping permits the given
  * protection.
  */
 vm_page_t
 moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	struct	pvo_entry *pvo;
 	vm_page_t m;
         vm_paddr_t pa;
         
 	m = NULL;
 	pa = 0;
 	PMAP_LOCK(pmap);
 retry:
 	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
 	if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) {
 		if (vm_page_pa_tryrelock(pmap,
 		    pvo->pvo_pte.pa & LPTE_RPGN, &pa))
 			goto retry;
 		m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
 		vm_page_hold(m);
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 static mmu_t installed_mmu;
 
 static void *
 moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags,
     int wait)
 {
 	struct pvo_entry *pvo;
         vm_offset_t va;
         vm_page_t m;
         int pflags, needed_lock;
 
 	/*
 	 * This entire routine is a horrible hack to avoid bothering kmem
 	 * for new KVA addresses. Because this can get called from inside
 	 * kmem allocation routines, calling kmem for a new address here
 	 * can lead to multiply locking non-recursive mutexes.
 	 */
 
 	*flags = UMA_SLAB_PRIV;
 	needed_lock = !PMAP_LOCKED(kernel_pmap);
 	pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;
 
         for (;;) {
                 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
                 if (m == NULL) {
                         if (wait & M_NOWAIT)
                                 return (NULL);
                         VM_WAIT;
                 } else
                         break;
         }
 
 	va = VM_PAGE_TO_PHYS(m);
 
 	pvo = alloc_pvo_entry(1 /* bootstrap */);
 
 	pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE;
 	pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M;
 
 	if (needed_lock)
 		PMAP_LOCK(kernel_pmap);
 
 	init_pvo_entry(pvo, kernel_pmap, va);
 	pvo->pvo_vaddr |= PVO_WIRED;
 
 	moea64_pvo_enter(installed_mmu, pvo, NULL);
 
 	if (needed_lock)
 		PMAP_UNLOCK(kernel_pmap);
 	
 	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
                 bzero((void *)va, PAGE_SIZE);
 
 	return (void *)va;
 }
 
 extern int elf32_nxstack;
 
 void
 moea64_init(mmu_t mmu)
 {
 
 	CTR0(KTR_PMAP, "moea64_init");
 
 	moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 
 	if (!hw_direct_map) {
 		installed_mmu = mmu;
 		uma_zone_set_allocf(moea64_pvo_zone,moea64_uma_page_alloc);
 	}
 
 #ifdef COMPAT_FREEBSD32
 	elf32_nxstack = 1;
 #endif
 
 	moea64_initialized = TRUE;
 }
 
 boolean_t
 moea64_is_referenced(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_is_referenced: page %p is not managed", m));
 
 	return (moea64_query_bit(mmu, m, LPTE_REF));
 }
 
 boolean_t
 moea64_is_modified(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have LPTE_CHG set.
 	 */
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (moea64_query_bit(mmu, m, LPTE_CHG));
 }
 
 boolean_t
 moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 {
 	struct pvo_entry *pvo;
 	boolean_t rv = TRUE;
 
 	PMAP_LOCK(pmap);
 	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
 	if (pvo != NULL)
 		rv = FALSE;
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 void
 moea64_clear_modify(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("moea64_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG
 	 * set.  If the object containing the page is locked and the page is
 	 * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	moea64_clear_bit(mmu, m, LPTE_CHG);
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 moea64_remove_write(mmu_t mmu, vm_page_t m)
 {
 	struct	pvo_entry *pvo;
 	int64_t	refchg, ret;
 	pmap_t	pmap;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	powerpc_sync();
 	PV_PAGE_LOCK(m);
 	refchg = 0;
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		if (!(pvo->pvo_vaddr & PVO_DEAD) &&
 		    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
 			pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
 			ret = MOEA64_PTE_REPLACE(mmu, pvo,
 			    MOEA64_PTE_PROT_UPDATE);
 			if (ret < 0)
 				ret = LPTE_CHG;
 			refchg |= ret;
 			if (pvo->pvo_pmap == kernel_pmap)
 				isync();
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG)
 		vm_page_dirty(m);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	PV_PAGE_UNLOCK(m);
 }
 
 /*
  *	moea64_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 moea64_ts_referenced(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_ts_referenced: page %p is not managed", m));
 	return (moea64_clear_bit(mmu, m, LPTE_REF));
 }
 
 /*
  * Modify the WIMG settings of all mappings for a page.
  */
 void
 moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma)
 {
 	struct	pvo_entry *pvo;
 	int64_t	refchg;
 	pmap_t	pmap;
 	uint64_t lo;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		m->md.mdpg_cache_attrs = ma;
 		return;
 	}
 
 	lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
 
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		if (!(pvo->pvo_vaddr & PVO_DEAD)) {
 			pvo->pvo_pte.pa &= ~LPTE_WIMG;
 			pvo->pvo_pte.pa |= lo;
 			refchg = MOEA64_PTE_REPLACE(mmu, pvo,
 			    MOEA64_PTE_INVALIDATE);
 			if (refchg < 0)
 				refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ?
 				    LPTE_CHG : 0;
 			if ((pvo->pvo_vaddr & PVO_MANAGED) &&
 			    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
 				refchg |=
 				    atomic_readandclear_32(&m->md.mdpg_attrs);
 				if (refchg & LPTE_CHG)
 					vm_page_dirty(m);
 				if (refchg & LPTE_REF)
 					vm_page_aflag_set(m, PGA_REFERENCED);
 			}
 			if (pvo->pvo_pmap == kernel_pmap)
 				isync();
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	m->md.mdpg_cache_attrs = ma;
 	PV_PAGE_UNLOCK(m);
 }
 
 /*
  * Map a wired page into kernel virtual address space.
  */
 void
 moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
 {
 	int		error;	
 	struct pvo_entry *pvo, *oldpvo;
 
 	pvo = alloc_pvo_entry(0);
 	pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
 	pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma);
 	pvo->pvo_vaddr |= PVO_WIRED;
 
 	PMAP_LOCK(kernel_pmap);
 	oldpvo = moea64_pvo_find_va(kernel_pmap, va);
 	if (oldpvo != NULL)
 		moea64_pvo_remove_from_pmap(mmu, oldpvo);
 	init_pvo_entry(pvo, kernel_pmap, va);
 	error = moea64_pvo_enter(mmu, pvo, NULL);
 	PMAP_UNLOCK(kernel_pmap);
 
 	/* Free any dead pages */
 	if (oldpvo != NULL) {
 		PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
 		moea64_pvo_remove_from_page(mmu, oldpvo);
 		PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
 		free_pvo_entry(oldpvo);
 	}
 
 	if (error != 0 && error != ENOENT)
 		panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va,
 		    pa, error);
 }
 
 void
 moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
 {
 
 	moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
 }
 
 /*
  * Extract the physical page address associated with the given kernel virtual
  * address.
  */
 vm_paddr_t
 moea64_kextract(mmu_t mmu, vm_offset_t va)
 {
 	struct		pvo_entry *pvo;
 	vm_paddr_t pa;
 
 	/*
 	 * Shortcut the direct-mapped case when applicable.  We never put
 	 * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS.
 	 */
 	if (va < VM_MIN_KERNEL_ADDRESS)
 		return (va);
 
 	PMAP_LOCK(kernel_pmap);
 	pvo = moea64_pvo_find_va(kernel_pmap, va);
 	KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
 	    va));
 	pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo));
 	PMAP_UNLOCK(kernel_pmap);
 	return (pa);
 }
 
 /*
  * Remove a wired page from kernel virtual address space.
  */
 void
 moea64_kremove(mmu_t mmu, vm_offset_t va)
 {
 	moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE);
 }
 
 /*
  * Map a range of physical addresses into kernel virtual address space.
  *
  * The value passed in *virt is a suggested virtual address for the mapping.
  * Architectures which can support a direct-mapped physical to virtual region
  * can return the appropriate address within that region, leaving '*virt'
  * unchanged.  Other architectures should map the pages starting at '*virt' and
  * update '*virt' with the first usable address after the mapped region.
  */
 vm_offset_t
 moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
     vm_paddr_t pa_end, int prot)
 {
 	vm_offset_t	sva, va;
 
 	if (hw_direct_map) {
 		/*
 		 * Check if every page in the region is covered by the direct
 		 * map. The direct map covers all of physical memory. Use
 		 * moea64_calc_wimg() as a shortcut to see if the page is in
 		 * physical memory as a way to see if the direct map covers it.
 		 */
 		for (va = pa_start; va < pa_end; va += PAGE_SIZE)
 			if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M)
 				break;
 		if (va == pa_end)
 			return (pa_start);
 	}
 	sva = *virt;
 	va = sva;
 	/* XXX respect prot argument */
 	for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
 		moea64_kenter(mmu, va, pa_start);
 	*virt = va;
 
 	return (sva);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
 {
         int loops;
 	struct pvo_entry *pvo;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_page_exists_quick: page %p is not managed", m));
 	loops = 0;
 	rv = FALSE;
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) {
 			rv = TRUE;
 			break;
 		}
 		if (++loops >= 16)
 			break;
 	}
 	PV_PAGE_UNLOCK(m);
 	return (rv);
 }
 
 /*
  * Return the number of managed mappings to the given physical page
  * that are wired.
  */
 int
 moea64_page_wired_mappings(mmu_t mmu, vm_page_t m)
 {
 	struct pvo_entry *pvo;
 	int count;
 
 	count = 0;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
 		if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED)
 			count++;
 	PV_PAGE_UNLOCK(m);
 	return (count);
 }
 
 static uintptr_t	moea64_vsidcontext;
 
 uintptr_t
 moea64_get_unique_vsid(void) {
 	u_int entropy;
 	register_t hash;
 	uint32_t mask;
 	int i;
 
 	entropy = 0;
 	__asm __volatile("mftb %0" : "=r"(entropy));
 
 	mtx_lock(&moea64_slb_mutex);
 	for (i = 0; i < NVSIDS; i += VSID_NBPW) {
 		u_int	n;
 
 		/*
 		 * Create a new value by mutiplying by a prime and adding in
 		 * entropy from the timebase register.  This is to make the
 		 * VSID more random so that the PT hash function collides
 		 * less often.  (Note that the prime casues gcc to do shifts
 		 * instead of a multiply.)
 		 */
 		moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy;
 		hash = moea64_vsidcontext & (NVSIDS - 1);
 		if (hash == 0)		/* 0 is special, avoid it */
 			continue;
 		n = hash >> 5;
 		mask = 1 << (hash & (VSID_NBPW - 1));
 		hash = (moea64_vsidcontext & VSID_HASHMASK);
 		if (moea64_vsid_bitmap[n] & mask) {	/* collision? */
 			/* anything free in this bucket? */
 			if (moea64_vsid_bitmap[n] == 0xffffffff) {
 				entropy = (moea64_vsidcontext >> 20);
 				continue;
 			}
 			i = ffs(~moea64_vsid_bitmap[n]) - 1;
 			mask = 1 << i;
 			hash &= VSID_HASHMASK & ~(VSID_NBPW - 1);
 			hash |= i;
 		}
 		if (hash == VSID_VRMA)	/* also special, avoid this too */
 			continue;
 		KASSERT(!(moea64_vsid_bitmap[n] & mask),
 		    ("Allocating in-use VSID %#zx\n", hash));
 		moea64_vsid_bitmap[n] |= mask;
 		mtx_unlock(&moea64_slb_mutex);
 		return (hash);
 	}
 
 	mtx_unlock(&moea64_slb_mutex);
 	panic("%s: out of segments",__func__);
 }
 
 #ifdef __powerpc64__
 void
 moea64_pinit(mmu_t mmu, pmap_t pmap)
 {
 
 	RB_INIT(&pmap->pmap_pvo);
 
 	pmap->pm_slb_tree_root = slb_alloc_tree();
 	pmap->pm_slb = slb_alloc_user_cache();
 	pmap->pm_slb_len = 0;
 }
 #else
 void
 moea64_pinit(mmu_t mmu, pmap_t pmap)
 {
 	int	i;
 	uint32_t hash;
 
 	RB_INIT(&pmap->pmap_pvo);
 
 	if (pmap_bootstrapped)
 		pmap->pmap_phys = (pmap_t)moea64_kextract(mmu,
 		    (vm_offset_t)pmap);
 	else
 		pmap->pmap_phys = pmap;
 
 	/*
 	 * Allocate some segment registers for this pmap.
 	 */
 	hash = moea64_get_unique_vsid();
 
 	for (i = 0; i < 16; i++) 
 		pmap->pm_sr[i] = VSID_MAKE(i, hash);
 
 	KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0"));
 }
 #endif
 
 /*
  * Initialize the pmap associated with process 0.
  */
 void
 moea64_pinit0(mmu_t mmu, pmap_t pm)
 {
 
 	PMAP_LOCK_INIT(pm);
 	moea64_pinit(mmu, pm);
 	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
 }
 
 /*
  * Set the physical protection on the specified range of this map as requested.
  */
 static void
 moea64_pvo_protect(mmu_t mmu,  pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot)
 {
 	struct vm_page *pg;
 	vm_prot_t oldprot;
 	int32_t refchg;
 
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 
 	/*
 	 * Change the protection of the page.
 	 */
 	oldprot = pvo->pvo_pte.prot;
 	pvo->pvo_pte.prot = prot;
 	pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
 
 	/*
 	 * If the PVO is in the page table, update mapping
 	 */
 	refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE);
 	if (refchg < 0)
 		refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0;
 
 	if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) &&
 	    (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
 		if ((pg->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(pg, PGA_EXECUTABLE);
 		moea64_syncicache(mmu, pm, PVO_VADDR(pvo),
 		    pvo->pvo_pte.pa & LPTE_RPGN, PAGE_SIZE);
 	}
 
 	/*
 	 * Update vm about the REF/CHG bits if the page is managed and we have
 	 * removed write access.
 	 */
 	if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) &&
 	    (oldprot & VM_PROT_WRITE)) {
 		refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
 		if (refchg & LPTE_CHG)
 			vm_page_dirty(pg);
 		if (refchg & LPTE_REF)
 			vm_page_aflag_set(pg, PGA_REFERENCED);
 	}
 }
 
 void
 moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
     vm_prot_t prot)
 {
 	struct	pvo_entry *pvo, *tpvo, key;
 
 	CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
 	    sva, eva, prot);
 
 	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
 	    ("moea64_protect: non current pmap"));
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		moea64_remove(mmu, pm, sva, eva);
 		return;
 	}
 
 	PMAP_LOCK(pm);
 	key.pvo_vaddr = sva;
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
 		moea64_pvo_protect(mmu, pm, pvo, prot);
 	}
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * Map a list of wired pages into kernel virtual address space.  This is
  * intended for temporary mappings which do not need page modification or
  * references recorded.  Existing mappings in the region are overwritten.
  */
 void
 moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count)
 {
 	while (count-- > 0) {
 		moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
 		va += PAGE_SIZE;
 		m++;
 	}
 }
 
 /*
  * Remove page mappings from kernel virtual address space.  Intended for
  * temporary mappings entered by moea64_qenter.
  */
 void
 moea64_qremove(mmu_t mmu, vm_offset_t va, int count)
 {
 	while (count-- > 0) {
 		moea64_kremove(mmu, va);
 		va += PAGE_SIZE;
 	}
 }
 
 void
 moea64_release_vsid(uint64_t vsid)
 {
 	int idx, mask;
 
 	mtx_lock(&moea64_slb_mutex);
 	idx = vsid & (NVSIDS-1);
 	mask = 1 << (idx % VSID_NBPW);
 	idx /= VSID_NBPW;
 	KASSERT(moea64_vsid_bitmap[idx] & mask,
 	    ("Freeing unallocated VSID %#jx", vsid));
 	moea64_vsid_bitmap[idx] &= ~mask;
 	mtx_unlock(&moea64_slb_mutex);
 }
 	
 
 void
 moea64_release(mmu_t mmu, pmap_t pmap)
 {
         
 	/*
 	 * Free segment registers' VSIDs
 	 */
     #ifdef __powerpc64__
 	slb_free_tree(pmap);
 	slb_free_user_cache(pmap->pm_slb);
     #else
 	KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
 
 	moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0]));
     #endif
 }
 
 /*
  * Remove all pages mapped by the specified pmap
  */
 void
 moea64_remove_pages(mmu_t mmu, pmap_t pm)
 {
 	struct pvo_entry *pvo, *tpvo;
 	struct pvo_tree tofree;
 
 	RB_INIT(&tofree);
 
 	PMAP_LOCK(pm);
 	RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) {
 		if (pvo->pvo_vaddr & PVO_WIRED)
 			continue;
 
 		/*
 		 * For locking reasons, remove this from the page table and
 		 * pmap, but save delinking from the vm_page for a second
 		 * pass
 		 */
 		moea64_pvo_remove_from_pmap(mmu, pvo);
 		RB_INSERT(pvo_tree, &tofree, pvo);
 	}
 	PMAP_UNLOCK(pm);
 
 	RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) {
 		PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN);
 		moea64_pvo_remove_from_page(mmu, pvo);
 		PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN);
 		RB_REMOVE(pvo_tree, &tofree, pvo);
 		free_pvo_entry(pvo);
 	}
 }
 
 /*
  * Remove the given range of addresses from the specified map.
  */
 void
 moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 {
 	struct  pvo_entry *pvo, *tpvo, key;
 	struct pvo_tree tofree;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pm->pm_stats.resident_count == 0)
 		return;
 
 	key.pvo_vaddr = sva;
 
 	RB_INIT(&tofree);
 
 	PMAP_LOCK(pm);
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
 
 		/*
 		 * For locking reasons, remove this from the page table and
 		 * pmap, but save delinking from the vm_page for a second
 		 * pass
 		 */
 		moea64_pvo_remove_from_pmap(mmu, pvo);
 		RB_INSERT(pvo_tree, &tofree, pvo);
 	}
 	PMAP_UNLOCK(pm);
 
 	RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) {
 		PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN);
 		moea64_pvo_remove_from_page(mmu, pvo);
 		PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN);
 		RB_REMOVE(pvo_tree, &tofree, pvo);
 		free_pvo_entry(pvo);
 	}
 }
 
 /*
  * Remove physical page from all pmaps in which it resides. moea64_pvo_remove()
  * will reflect changes in pte's back to the vm_page.
  */
 void
 moea64_remove_all(mmu_t mmu, vm_page_t m)
 {
 	struct	pvo_entry *pvo, *next_pvo;
 	struct	pvo_head freequeue;
 	int	wasdead;
 	pmap_t	pmap;
 
 	LIST_INIT(&freequeue);
 
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		wasdead = (pvo->pvo_vaddr & PVO_DEAD);
 		if (!wasdead)
 			moea64_pvo_remove_from_pmap(mmu, pvo);
 		moea64_pvo_remove_from_page(mmu, pvo);
 		if (!wasdead)
 			LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink);
 		PMAP_UNLOCK(pmap);
 		
 	}
 	KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings"));
 	KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable"));
 	PV_PAGE_UNLOCK(m);
 
 	/* Clean up UMA allocations */
 	LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo)
 		free_pvo_entry(pvo);
 }
 
 /*
  * Allocate a physical page of memory directly from the phys_avail map.
  * Can only be called from moea64_bootstrap before avail start and end are
  * calculated.
  */
 vm_offset_t
 moea64_bootstrap_alloc(vm_size_t size, u_int align)
 {
 	vm_offset_t	s, e;
 	int		i, j;
 
 	size = round_page(size);
 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 		if (align != 0)
 			s = (phys_avail[i] + align - 1) & ~(align - 1);
 		else
 			s = phys_avail[i];
 		e = s + size;
 
 		if (s < phys_avail[i] || e > phys_avail[i + 1])
 			continue;
 
 		if (s + size > platform_real_maxaddr())
 			continue;
 
 		if (s == phys_avail[i]) {
 			phys_avail[i] += size;
 		} else if (e == phys_avail[i + 1]) {
 			phys_avail[i + 1] -= size;
 		} else {
 			for (j = phys_avail_count * 2; j > i; j -= 2) {
 				phys_avail[j] = phys_avail[j - 2];
 				phys_avail[j + 1] = phys_avail[j - 1];
 			}
 
 			phys_avail[i + 3] = phys_avail[i + 1];
 			phys_avail[i + 1] = s;
 			phys_avail[i + 2] = e;
 			phys_avail_count++;
 		}
 
 		return (s);
 	}
 	panic("moea64_bootstrap_alloc: could not allocate memory");
 }
 
 static int
 moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head)
 {
 	int first, err;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 	KASSERT(moea64_pvo_find_va(pvo->pvo_pmap, PVO_VADDR(pvo)) == NULL,
 	    ("Existing mapping for VA %#jx", (uintmax_t)PVO_VADDR(pvo)));
 
 	moea64_pvo_enter_calls++;
 
 	/*
 	 * Add to pmap list
 	 */
 	RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
 
 	/*
 	 * Remember if the list was empty and therefore will be the first
 	 * item.
 	 */
 	if (pvo_head != NULL) {
 		if (LIST_FIRST(pvo_head) == NULL)
 			first = 1;
 		LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
 	}
 
 	if (pvo->pvo_vaddr & PVO_WIRED)
 		pvo->pvo_pmap->pm_stats.wired_count++;
 	pvo->pvo_pmap->pm_stats.resident_count++;
 
 	/*
 	 * Insert it into the hardware page table
 	 */
 	err = MOEA64_PTE_INSERT(mmu, pvo);
 	if (err != 0) {
 		panic("moea64_pvo_enter: overflow");
 	}
 
 	moea64_pvo_entries++;
 
 	if (pvo->pvo_pmap == kernel_pmap)
 		isync();
 
 #ifdef __powerpc64__
 	/*
 	 * Make sure all our bootstrap mappings are in the SLB as soon
 	 * as virtual memory is switched on.
 	 */
 	if (!pmap_bootstrapped)
 		moea64_bootstrap_slb_prefault(PVO_VADDR(pvo),
 		    pvo->pvo_vaddr & PVO_LARGE);
 #endif
 
 	return (first ? ENOENT : 0);
 }
 
 static void
 moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct	vm_page *pg;
 	int32_t refchg;
 
 	KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap"));
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 	KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO"));
 
 	/*
 	 * If there is an active pte entry, we need to deactivate it
 	 */
 	refchg = MOEA64_PTE_UNSET(mmu, pvo);
 	if (refchg < 0) {
 		/*
 		 * If it was evicted from the page table, be pessimistic and
 		 * dirty the page.
 		 */
 		if (pvo->pvo_pte.prot & VM_PROT_WRITE)
 			refchg = LPTE_CHG;
 		else
 			refchg = 0;
 	}
 
 	/*
 	 * Update our statistics.
 	 */
 	pvo->pvo_pmap->pm_stats.resident_count--;
 	if (pvo->pvo_vaddr & PVO_WIRED)
 		pvo->pvo_pmap->pm_stats.wired_count--;
 
 	/*
 	 * Remove this PVO from the pmap list.
 	 */
 	RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
 
 	/*
 	 * Mark this for the next sweep
 	 */
 	pvo->pvo_vaddr |= PVO_DEAD;
 
 	/* Send RC bits to VM */
 	if ((pvo->pvo_vaddr & PVO_MANAGED) &&
 	    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
 		pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
 		if (pg != NULL) {
 			refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
 			if (refchg & LPTE_CHG)
 				vm_page_dirty(pg);
 			if (refchg & LPTE_REF)
 				vm_page_aflag_set(pg, PGA_REFERENCED);
 		}
 	}
 }
 
 static void
 moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct	vm_page *pg;
 
 	KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page"));
 
 	/* Use NULL pmaps as a sentinel for races in page deletion */
 	if (pvo->pvo_pmap == NULL)
 		return;
 	pvo->pvo_pmap = NULL;
 
 	/*
 	 * Update vm about page writeability/executability if managed
 	 */
 	PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN);
 	pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
 
 	if ((pvo->pvo_vaddr & PVO_MANAGED) && pg != NULL) {
 		LIST_REMOVE(pvo, pvo_vlink);
 		if (LIST_EMPTY(vm_page_to_pvoh(pg)))
 			vm_page_aflag_clear(pg, PGA_WRITEABLE | PGA_EXECUTABLE);
 	}
 
 	moea64_pvo_entries--;
 	moea64_pvo_remove_calls++;
 }
 
 static struct pvo_entry *
 moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
 {
 	struct pvo_entry key;
 
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 
 	key.pvo_vaddr = va & ~ADDR_POFF;
 	return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key));
 }
 
 static boolean_t
 moea64_query_bit(mmu_t mmu, vm_page_t m, uint64_t ptebit)
 {
 	struct	pvo_entry *pvo;
 	int64_t ret;
 	boolean_t rv;
 
 	/*
 	 * See if this bit is stored in the page already.
 	 */
 	if (m->md.mdpg_attrs & ptebit)
 		return (TRUE);
 
 	/*
 	 * Examine each PTE.  Sync so that any pending REF/CHG bits are
 	 * flushed to the PTEs.
 	 */
 	rv = FALSE;
 	powerpc_sync();
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		ret = 0;
 
 		/*
 		 * See if this pvo has a valid PTE.  if so, fetch the
 		 * REF/CHG bits from the valid PTE.  If the appropriate
 		 * ptebit is set, return success.
 		 */
 		PMAP_LOCK(pvo->pvo_pmap);
 		if (!(pvo->pvo_vaddr & PVO_DEAD))
 			ret = MOEA64_PTE_SYNCH(mmu, pvo);
 		PMAP_UNLOCK(pvo->pvo_pmap);
 
 		if (ret > 0) {
 			atomic_set_32(&m->md.mdpg_attrs,
 			    ret & (LPTE_CHG | LPTE_REF));
 			if (ret & ptebit) {
 				rv = TRUE;
 				break;
 			}
 		}
 	}
 	PV_PAGE_UNLOCK(m);
 
 	return (rv);
 }
 
 static u_int
 moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit)
 {
 	u_int	count;
 	struct	pvo_entry *pvo;
 	int64_t ret;
 
 	/*
 	 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
 	 * we can reset the right ones).
 	 */
 	powerpc_sync();
 
 	/*
 	 * For each pvo entry, clear the pte's ptebit.
 	 */
 	count = 0;
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		ret = 0;
 
 		PMAP_LOCK(pvo->pvo_pmap);
 		if (!(pvo->pvo_vaddr & PVO_DEAD))
 			ret = MOEA64_PTE_CLEAR(mmu, pvo, ptebit);
 		PMAP_UNLOCK(pvo->pvo_pmap);
 
 		if (ret > 0 && (ret & ptebit))
 			count++;
 	}
 	atomic_clear_32(&m->md.mdpg_attrs, ptebit);
 	PV_PAGE_UNLOCK(m);
 
 	return (count);
 }
 
 boolean_t
 moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 	struct pvo_entry *pvo, key;
 	vm_offset_t ppa;
 	int error = 0;
 
 	PMAP_LOCK(kernel_pmap);
 	key.pvo_vaddr = ppa = pa & ~ADDR_POFF;
 	for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
 	    ppa < pa + size; ppa += PAGE_SIZE,
 	    pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {
 		if (pvo == NULL || (pvo->pvo_pte.pa & LPTE_RPGN) != ppa) {
 			error = EFAULT;
 			break;
 		}
 	}
 	PMAP_UNLOCK(kernel_pmap);
 
 	return (error);
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 moea64_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
 {
 	vm_offset_t va, tmpva, ppa, offset;
 
 	ppa = trunc_page(pa);
 	offset = pa & PAGE_MASK;
 	size = roundup2(offset + size, PAGE_SIZE);
 
 	va = kva_alloc(size);
 
 	if (!va)
 		panic("moea64_mapdev: Couldn't alloc kernel virtual memory");
 
 	for (tmpva = va; size > 0;) {
 		moea64_kenter_attr(mmu, tmpva, ppa, ma);
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		ppa += PAGE_SIZE;
 	}
 
 	return ((void *)(va + offset));
 }
 
 void *
 moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 
 	return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT);
 }
 
 void
 moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t base, offset;
 
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup2(offset + size, PAGE_SIZE);
 
 	kva_free(base, size);
 }
 
 void
 moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 	struct pvo_entry *pvo;
 	vm_offset_t lim;
 	vm_paddr_t pa;
 	vm_size_t len;
 
 	PMAP_LOCK(pm);
 	while (sz > 0) {
 		lim = round_page(va);
 		len = MIN(lim - va, sz);
 		pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
 		if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) {
 			pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va & ADDR_POFF);
 			moea64_syncicache(mmu, pm, va, pa, len);
 		}
 		va += len;
 		sz -= len;
 	}
 	PMAP_UNLOCK(pm);
 }
 
 void
 moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va)
 {
 
 	*va = (void *)pa;
 }
 
 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
 
 void
 moea64_scan_init(mmu_t mmu)
 {
 	struct pvo_entry *pvo;
 	vm_offset_t va;
 	int i;
 
 	if (!do_minidump) {
 		/* Initialize phys. segments for dumpsys(). */
 		memset(&dump_map, 0, sizeof(dump_map));
 		mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
 		for (i = 0; i < pregions_sz; i++) {
 			dump_map[i].pa_start = pregions[i].mr_start;
 			dump_map[i].pa_size = pregions[i].mr_size;
 		}
 		return;
 	}
 
 	/* Virtual segments for minidumps: */
 	memset(&dump_map, 0, sizeof(dump_map));
 
 	/* 1st: kernel .data and .bss. */
 	dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
 	dump_map[0].pa_size = round_page((uintptr_t)_end) -
 	    dump_map[0].pa_start;
 
 	/* 2nd: msgbuf and tables (see pmap_bootstrap()). */
 	dump_map[1].pa_start = (vm_paddr_t)msgbufp->msg_ptr;
 	dump_map[1].pa_size = round_page(msgbufp->msg_size);
 
 	/* 3rd: kernel VM. */
 	va = dump_map[1].pa_start + dump_map[1].pa_size;
 	/* Find start of next chunk (from va). */
 	while (va < virtual_end) {
 		/* Don't dump the buffer cache. */
 		if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
 			va = kmi.buffer_eva;
 			continue;
 		}
 		pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
 		if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD))
 			break;
 		va += PAGE_SIZE;
 	}
 	if (va < virtual_end) {
 		dump_map[2].pa_start = va;
 		va += PAGE_SIZE;
 		/* Find last page in chunk. */
 		while (va < virtual_end) {
 			/* Don't run into the buffer cache. */
 			if (va == kmi.buffer_sva)
 				break;
 			pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
 			if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD))
 				break;
 			va += PAGE_SIZE;
 		}
 		dump_map[2].pa_size = va - dump_map[2].pa_start;
 	}
 }
 
Index: projects/powernv/powerpc/aim/moea64_native.c
===================================================================
--- projects/powernv/powerpc/aim/moea64_native.c	(revision 290990)
+++ projects/powernv/powerpc/aim/moea64_native.c	(revision 290991)
@@ -1,658 +1,662 @@
 /*-
  * Copyright (c) 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $
  */
 /*-
  * Copyright (C) 2001 Benno Rice.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Native 64-bit page table operations for running without a hypervisor.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/rwlock.h>
 #include <sys/endian.h>
 
 #include <sys/kdb.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 
 #include <machine/md_var.h>
 #include <machine/mmuvar.h>
 
 #include "mmu_oea64.h"
 #include "mmu_if.h"
 #include "moea64_if.h"
 
 #define	PTESYNC()	__asm __volatile("ptesync");
 #define	TLBSYNC()	__asm __volatile("tlbsync; ptesync");
 #define	SYNC()		__asm __volatile("sync");
 #define	EIEIO()		__asm __volatile("eieio");
 
 #define	VSID_HASH_MASK	0x0000007fffffffffULL
 
 static __inline void
 TLBIE(uint64_t vpn) {
 #ifndef __powerpc64__
 	register_t vpn_hi, vpn_lo;
 	register_t msr;
 	register_t scratch, intr;
 #endif
 
 	static volatile u_int tlbie_lock = 0;
 
 	vpn <<= ADDR_PIDX_SHFT;
 	vpn &= ~(0xffffULL << 48);
 
 	/* Hobo spinlock: we need stronger guarantees than mutexes provide */
 	while (!atomic_cmpset_int(&tlbie_lock, 0, 1));
 	isync(); /* Flush instruction queue once lock acquired */
 
 #ifdef __powerpc64__
 	__asm __volatile("tlbie %0" :: "r"(vpn) : "memory");
 	__asm __volatile("eieio; tlbsync; ptesync" ::: "memory");
 #else
 	vpn_hi = (uint32_t)(vpn >> 32);
 	vpn_lo = (uint32_t)vpn;
 
 	intr = intr_disable();
 	__asm __volatile("\
 	    mfmsr %0; \
 	    mr %1, %0; \
 	    insrdi %1,%5,1,0; \
 	    mtmsrd %1; isync; \
 	    \
 	    sld %1,%2,%4; \
 	    or %1,%1,%3; \
 	    tlbie %1; \
 	    \
 	    mtmsrd %0; isync; \
 	    eieio; \
 	    tlbsync; \
 	    ptesync;" 
 	: "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1)
 	    : "memory");
 	intr_restore(intr);
 #endif
 
 	/* No barriers or special ops -- taken care of by ptesync above */
 	tlbie_lock = 0;
 }
 
 #define DISABLE_TRANS(msr)	msr = mfmsr(); mtmsr(msr & ~PSL_DR)
 #define ENABLE_TRANS(msr)	mtmsr(msr)
 
 /*
  * PTEG data.
  */
 static volatile struct lpte *moea64_pteg_table;
 static struct rwlock moea64_eviction_lock;
 
 /*
  * PTE calls.
  */
 static int	moea64_pte_insert_native(mmu_t, struct pvo_entry *);
 static int64_t	moea64_pte_synch_native(mmu_t, struct pvo_entry *);
 static int64_t	moea64_pte_clear_native(mmu_t, struct pvo_entry *, uint64_t);
 static int64_t	moea64_pte_replace_native(mmu_t, struct pvo_entry *, int);
 static int64_t	moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *);
 
 /*
  * Utility routines.
  */
 static void	moea64_bootstrap_native(mmu_t mmup, 
 		    vm_offset_t kernelstart, vm_offset_t kernelend);
 static void	moea64_cpu_bootstrap_native(mmu_t, int ap);
 static void	tlbia(void);
 
 static mmu_method_t moea64_native_methods[] = {
 	/* Internal interfaces */
 	MMUMETHOD(mmu_bootstrap,	moea64_bootstrap_native),
 	MMUMETHOD(mmu_cpu_bootstrap,	moea64_cpu_bootstrap_native),
 
 	MMUMETHOD(moea64_pte_synch,	moea64_pte_synch_native),
 	MMUMETHOD(moea64_pte_clear,	moea64_pte_clear_native),	
 	MMUMETHOD(moea64_pte_unset,	moea64_pte_unset_native),	
 	MMUMETHOD(moea64_pte_replace,	moea64_pte_replace_native),	
 	MMUMETHOD(moea64_pte_insert,	moea64_pte_insert_native),	
 
 	{ 0, 0 }
 };
 
 MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods,
     0, oea64_mmu);
 
 static int64_t
 moea64_pte_synch_native(mmu_t mmu, struct pvo_entry *pvo)
 {
 	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
 	struct lpte properpt;
 	uint64_t ptelo;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 
 	moea64_pte_from_pvo(pvo, &properpt);
 
 	rw_rlock(&moea64_eviction_lock);
-	if ((pt->pte_hi & LPTE_AVPN_MASK) !=
+	if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) !=
 	    (properpt.pte_hi & LPTE_AVPN_MASK)) {
 		/* Evicted */
 		rw_runlock(&moea64_eviction_lock);
 		return (-1);
 	}
 		
 	PTESYNC();
 	ptelo = be64toh(pt->pte_lo);
 
 	rw_runlock(&moea64_eviction_lock);
 	
 	return (ptelo & (LPTE_REF | LPTE_CHG));
 }
 
 static int64_t 
 moea64_pte_clear_native(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit)
 {
 	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
 	struct lpte properpt;
 	uint64_t ptelo;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 
 	moea64_pte_from_pvo(pvo, &properpt);
 
 	rw_rlock(&moea64_eviction_lock);
-	if ((pt->pte_hi & LPTE_AVPN_MASK) !=
+	if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) !=
 	    (properpt.pte_hi & LPTE_AVPN_MASK)) {
 		/* Evicted */
 		rw_runlock(&moea64_eviction_lock);
 		return (-1);
 	}
 
 	if (ptebit == LPTE_REF) {
 		/* See "Resetting the Reference Bit" in arch manual */
 		PTESYNC();
 		/* 2-step here safe: precision is not guaranteed */
-		ptelo = pt->pte_lo;
+		ptelo = be64toh(pt->pte_lo);
 
 		/* One-byte store to avoid touching the C bit */
 		((volatile uint8_t *)(&pt->pte_lo))[6] =
+#if BYTE_ORDER == BIG_ENDIAN
 		    ((uint8_t *)(&properpt.pte_lo))[6];
+#else
+		    ((uint8_t *)(&properpt.pte_lo))[1];
+#endif
 		rw_runlock(&moea64_eviction_lock);
 
 		critical_enter();
 		TLBIE(pvo->pvo_vpn);
 		critical_exit();
 	} else {
 		rw_runlock(&moea64_eviction_lock);
 		ptelo = moea64_pte_unset_native(mmu, pvo);
 		moea64_pte_insert_native(mmu, pvo);
 	}
 
 	return (ptelo & (LPTE_REF | LPTE_CHG));
 }
 
 static int64_t
 moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *pvo)
 {
 	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
 	struct lpte properpt;
 	uint64_t ptelo;
 
 	moea64_pte_from_pvo(pvo, &properpt);
 
 	rw_rlock(&moea64_eviction_lock);
-	if ((pt->pte_hi & LPTE_AVPN_MASK) !=
+	if ((be64toh(pt->pte_hi & LPTE_AVPN_MASK)) !=
 	    (properpt.pte_hi & LPTE_AVPN_MASK)) {
 		/* Evicted */
 		moea64_pte_overflow--;
 		rw_runlock(&moea64_eviction_lock);
 		return (-1);
 	}
 
 	/*
 	 * Invalidate the pte, briefly locking it to collect RC bits. No
 	 * atomics needed since this is protected against eviction by the lock.
 	 */
 	isync();
 	critical_enter();
-	pt->pte_hi = (pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED;
+	pt->pte_hi = be64toh((pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED);
 	PTESYNC();
 	TLBIE(pvo->pvo_vpn);
 	ptelo = be64toh(pt->pte_lo);
 	*((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */
 	critical_exit();
 	rw_runlock(&moea64_eviction_lock);
 
 	/* Keep statistics */
 	moea64_pte_valid--;
 
 	return (ptelo & (LPTE_CHG | LPTE_REF));
 }
 
 static int64_t
 moea64_pte_replace_native(mmu_t mmu, struct pvo_entry *pvo, int flags)
 {
 	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
 	struct lpte properpt;
 	int64_t ptelo;
 
 	if (flags == 0) {
 		/* Just some software bits changing. */
 		moea64_pte_from_pvo(pvo, &properpt);
 
 		rw_rlock(&moea64_eviction_lock);
-		if ((pt->pte_hi & LPTE_AVPN_MASK) !=
+		if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) !=
 		    (properpt.pte_hi & LPTE_AVPN_MASK)) {
 			rw_runlock(&moea64_eviction_lock);
 			return (-1);
 		}
-		pt->pte_hi = properpt.pte_hi;
-		ptelo = pt->pte_lo;
+		pt->pte_hi = htobe64(properpt.pte_hi);
+		ptelo = be64toh(pt->pte_lo);
 		rw_runlock(&moea64_eviction_lock);
 	} else {
 		/* Otherwise, need reinsertion and deletion */
 		ptelo = moea64_pte_unset_native(mmu, pvo);
 		moea64_pte_insert_native(mmu, pvo);
 	}
 
 	return (ptelo);
 }
 
 static void
 moea64_cpu_bootstrap_native(mmu_t mmup, int ap)
 {
 	int i = 0;
 	#ifdef __powerpc64__
 	struct slb *slb = PCPU_GET(slb);
 	register_t seg0;
 	#endif
 
 	/*
 	 * Initialize segment registers and MMU
 	 */
 
 	mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR);
 
 	/*
 	 * Install kernel SLB entries
 	 */
 
 	#ifdef __powerpc64__
 		__asm __volatile ("slbia");
 		__asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) :
 		    "r"(0));
 
 		for (i = 0; i < 64; i++) {
 			if (!(slb[i].slbe & SLBE_VALID))
 				continue;
 
 			__asm __volatile ("slbmte %0, %1" :: 
 			    "r"(slb[i].slbv), "r"(slb[i].slbe)); 
 		}
 	#else
 		for (i = 0; i < 16; i++)
 			mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]);
 	#endif
 
 	/*
 	 * Install page table
 	 */
 
 	__asm __volatile ("ptesync; mtsdr1 %0; isync"
 	    :: "r"((uintptr_t)moea64_pteg_table 
 		     | (uintptr_t)(flsl(moea64_pteg_mask >> 11))));
 	tlbia();
 }
 
 static void
 moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart,
     vm_offset_t kernelend)
 {
 	vm_size_t	size;
 	vm_offset_t	off;
 	vm_paddr_t	pa;
 	register_t	msr;
 
 	moea64_early_bootstrap(mmup, kernelstart, kernelend);
 
 	/*
 	 * Allocate PTEG table.
 	 */
 
 	size = moea64_pteg_count * sizeof(struct lpteg);
 	CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes", 
 	    moea64_pteg_count, size);
 	rw_init(&moea64_eviction_lock, "pte eviction");
 
 	/*
 	 * We now need to allocate memory. This memory, to be allocated,
 	 * has to reside in a page table. The page table we are about to
 	 * allocate. We don't have BAT. So drop to data real mode for a minute
 	 * as a measure of last resort. We do this a couple times.
 	 */
 
 	moea64_pteg_table = (struct lpte *)moea64_bootstrap_alloc(size, size);
 	DISABLE_TRANS(msr);
 	bzero(__DEVOLATILE(void *, moea64_pteg_table), moea64_pteg_count *
 	    sizeof(struct lpteg));
 	ENABLE_TRANS(msr);
 
 	CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table);
 
 	moea64_mid_bootstrap(mmup, kernelstart, kernelend);
 
 	/*
 	 * Add a mapping for the page table itself if there is no direct map.
 	 */
 	if (!hw_direct_map) {
 		size = moea64_pteg_count * sizeof(struct lpteg);
 		off = (vm_offset_t)(moea64_pteg_table);
 		DISABLE_TRANS(msr);
 		for (pa = off; pa < off + size; pa += PAGE_SIZE)
 			pmap_kenter(pa, pa);
 		ENABLE_TRANS(msr);
 	}
 
 	/* Bring up virtual memory */
 	moea64_late_bootstrap(mmup, kernelstart, kernelend);
 }
 
 static void
 tlbia(void)
 {
 	vm_offset_t i;
 	#ifndef __powerpc64__
 	register_t msr, scratch;
 	#endif
 
 	TLBSYNC();
 
 	for (i = 0; i < 0xFF000; i += 0x00001000) {
 		#ifdef __powerpc64__
 		__asm __volatile("tlbiel %0" :: "r"(i));
 		#else
 		__asm __volatile("\
 		    mfmsr %0; \
 		    mr %1, %0; \
 		    insrdi %1,%3,1,0; \
 		    mtmsrd %1; \
 		    isync; \
 		    \
 		    tlbiel %2; \
 		    \
 		    mtmsrd %0; \
 		    isync;" 
 		: "=r"(msr), "=r"(scratch) : "r"(i), "r"(1));
 		#endif
 	}
 
 	EIEIO();
 	TLBSYNC();
 }
 
 static int
 atomic_pte_lock(volatile struct lpte *pte, uint64_t bitmask, uint64_t *oldhi)
 {
 	int	ret;
 	uint32_t oldhihalf;
 
 	/*
 	 * Note: in principle, if just the locked bit were set here, we
 	 * could avoid needing the eviction lock. However, eviction occurs
 	 * so rarely that it isn't worth bothering about in practice.
 	 */
 
 	__asm __volatile (
 		"1:\tlwarx %1, 0, %3\n\t"	/* load old value */
 		"and. %0,%1,%4\n\t"		/* check if any bits set */
 		"bne 2f\n\t"			/* exit if any set */
 		"stwcx. %5, 0, %3\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 		"stwcx. %1, 0, %3\n\t"       	/* clear reservation (74xx) */
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"3:\n\t"
 		: "=&r" (ret), "=&r"(oldhihalf), "=m" (pte->pte_hi)
 		: "r" ((volatile char *)&pte->pte_hi + 4),
 		  "r" ((uint32_t)bitmask), "r" ((uint32_t)LPTE_LOCKED),
 		  "m" (pte->pte_hi)
 		: "cr0", "cr1", "cr2", "memory");
 
 	*oldhi = (pte->pte_hi & 0xffffffff00000000ULL) | oldhihalf;
 
 	return (ret);
 }
 
 static uintptr_t
 moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase,
     uint64_t mask)
 {
 	volatile struct lpte *pt;
 	uint64_t oldptehi, va;
 	uintptr_t k;
 	int i, j;
 
 	/* Start at a random slot */
 	i = mftb() % 8;
 	for (j = 0; j < 8; j++) {
 		k = slotbase + (i + j) % 8;
 		pt = &moea64_pteg_table[k];
 		/* Invalidate and seize lock only if no bits in mask set */
 		if (atomic_pte_lock(pt, mask, &oldptehi)) /* Lock obtained */
 			break;
 	}
 
 	if (j == 8)
 		return (-1);
 
 	if (oldptehi & LPTE_VALID) {
 		KASSERT(!(oldptehi & LPTE_WIRED), ("Unmapped wired entry"));
 		/*
 		 * Need to invalidate old entry completely: see
 		 * "Modifying a Page Table Entry". Need to reconstruct
 		 * the virtual address for the outgoing entry to do that.
 		 */
 		if (oldptehi & LPTE_BIG)
 			va = oldptehi >> moea64_large_page_shift;
 		else
 			va = oldptehi >> ADDR_PIDX_SHFT;
 		if (oldptehi & LPTE_HID)
 			va = (((k >> 3) ^ moea64_pteg_mask) ^ va) &
 			    VSID_HASH_MASK;
 		else
 			va = ((k >> 3) ^ va) & VSID_HASH_MASK;
 		va |= (oldptehi & LPTE_AVPN_MASK) <<
 		    (ADDR_API_SHFT64 - ADDR_PIDX_SHFT);
 		PTESYNC();
 		TLBIE(va);
 		moea64_pte_valid--;
 		moea64_pte_overflow++;
 	}
 
 	/*
 	 * Update the PTE as per "Adding a Page Table Entry". Lock is released
 	 * by setting the high doubleworld.
 	 */
-	pt->pte_lo = pvo_pt->pte_lo;
+	pt->pte_lo = htobe64(pvo_pt->pte_lo);
 	EIEIO();
-	pt->pte_hi = pvo_pt->pte_hi;
+	pt->pte_hi = htobe64(pvo_pt->pte_hi);
 	PTESYNC();
 
 	/* Keep statistics */
 	moea64_pte_valid++;
 
 	return (k);
 }
 
 static int
 moea64_pte_insert_native(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct lpte insertpt;
 	uintptr_t slot;
 
 	/* Initialize PTE */
 	moea64_pte_from_pvo(pvo, &insertpt);
 
 	/* Make sure further insertion is locked out during evictions */
 	rw_rlock(&moea64_eviction_lock);
 
 	/*
 	 * First try primary hash.
 	 */
 	pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
 	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
 	    LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
 	if (slot != -1) {
 		rw_runlock(&moea64_eviction_lock);
 		pvo->pvo_pte.slot = slot;
 		return (0);
 	}
 
 	/*
 	 * Now try secondary hash.
 	 */
 	pvo->pvo_vaddr ^= PVO_HID;
 	insertpt.pte_hi ^= LPTE_HID;
 	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
 	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
 	    LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
 	if (slot != -1) {
 		rw_runlock(&moea64_eviction_lock);
 		pvo->pvo_pte.slot = slot;
 		return (0);
 	}
 
 	/*
 	 * Out of luck. Find a PTE to sacrifice.
 	 */
 
 	/* Lock out all insertions for a bit */
 	if (!rw_try_upgrade(&moea64_eviction_lock)) {
 		rw_runlock(&moea64_eviction_lock);
 		rw_wlock(&moea64_eviction_lock);
 	}
 
 	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
 	    LPTE_WIRED | LPTE_LOCKED);
 	if (slot != -1) {
 		rw_wunlock(&moea64_eviction_lock);
 		pvo->pvo_pte.slot = slot;
 		return (0);
 	}
 
 	/* Try other hash table. Now we're getting desperate... */
 	pvo->pvo_vaddr ^= PVO_HID;
 	insertpt.pte_hi ^= LPTE_HID;
 	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
 	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
 	    LPTE_WIRED | LPTE_LOCKED);
 	if (slot != -1) {
 		rw_wunlock(&moea64_eviction_lock);
 		pvo->pvo_pte.slot = slot;
 		return (0);
 	}
 
 	/* No freeable slots in either PTEG? We're hosed. */
 	rw_wunlock(&moea64_eviction_lock);
 	panic("moea64_pte_insert: overflow");
 	return (-1);
 }
 
Index: projects/powernv/powerpc/ofw/ofw_machdep.c
===================================================================
--- projects/powernv/powerpc/ofw/ofw_machdep.c	(revision 290990)
+++ projects/powernv/powerpc/ofw/ofw_machdep.c	(revision 290991)
@@ -1,699 +1,699 @@
 /*-
  * Copyright (C) 1996 Wolfgang Solfrank.
  * Copyright (C) 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: ofw_machdep.c,v 1.5 2000/05/23 13:25:43 tsubai Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/endian.h>
 
 #include <net/ethernet.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_pci.h>
 #include <dev/ofw/ofw_bus.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/platform.h>
 #include <machine/ofw_machdep.h>
 #include <machine/trap.h>
 
 static void	*fdt;
 int		ofw_real_mode;
 
 #ifdef AIM
 extern register_t ofmsr[5];
 extern void	*openfirmware_entry;
 char		save_trap_init[0x2f00];          /* EXC_LAST */
 char		save_trap_of[0x2f00];            /* EXC_LAST */
 
 int		ofwcall(void *);
 static int	openfirmware(void *args);
 
 __inline void
 ofw_save_trap_vec(char *save_trap_vec)
 {
 	if (!ofw_real_mode)
                 return;
 
 	bcopy((void *)EXC_RST, save_trap_vec, EXC_LAST - EXC_RST);
 }
 
 static __inline void
 ofw_restore_trap_vec(char *restore_trap_vec)
 {
 	if (!ofw_real_mode)
                 return;
 
 	bcopy(restore_trap_vec, (void *)EXC_RST, EXC_LAST - EXC_RST);
 	__syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD);
 }
 
 /*
  * Saved SPRG0-3 from OpenFirmware. Will be restored prior to the callback.
  */
 register_t	ofw_sprg0_save;
 
 static __inline void
 ofw_sprg_prepare(void)
 {
 	if (ofw_real_mode)
 		return;
 	
 	/*
 	 * Assume that interrupt are disabled at this point, or
 	 * SPRG1-3 could be trashed
 	 */
 	__asm __volatile("mfsprg0 %0\n\t"
 			 "mtsprg0 %1\n\t"
 	    		 "mtsprg1 %2\n\t"
 	    		 "mtsprg2 %3\n\t"
 			 "mtsprg3 %4\n\t"
 			 : "=&r"(ofw_sprg0_save)
 			 : "r"(ofmsr[1]),
 			 "r"(ofmsr[2]),
 			 "r"(ofmsr[3]),
 			 "r"(ofmsr[4]));
 }
 
 static __inline void
 ofw_sprg_restore(void)
 {
 	if (ofw_real_mode)
 		return;
 	
 	/*
 	 * Note that SPRG1-3 contents are irrelevant. They are scratch
 	 * registers used in the early portion of trap handling when
 	 * interrupts are disabled.
 	 *
 	 * PCPU data cannot be used until this routine is called !
 	 */
 	__asm __volatile("mtsprg0 %0" :: "r"(ofw_sprg0_save));
 }
 #endif
 
 static int
 parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output)
 {
 	cell_t address_cells, size_cells;
 	cell_t OFmem[4 * PHYS_AVAIL_SZ];
 	int sz, i, j;
 	phandle_t phandle;
 
 	sz = 0;
 
 	/*
 	 * Get #address-cells from root node, defaulting to 1 if it cannot
 	 * be found.
 	 */
 	phandle = OF_finddevice("/");
-	if (OF_getprop(phandle, "#address-cells", &address_cells, 
+	if (OF_getencprop(phandle, "#address-cells", &address_cells, 
 	    sizeof(address_cells)) < (ssize_t)sizeof(address_cells))
 		address_cells = 1;
-	if (OF_getprop(phandle, "#size-cells", &size_cells, 
+	if (OF_getencprop(phandle, "#size-cells", &size_cells, 
 	    sizeof(size_cells)) < (ssize_t)sizeof(size_cells))
 		size_cells = 1;
 
 	/*
 	 * Get memory.
 	 */
-	if (node == -1 || (sz = OF_getprop(node, prop,
+	if (node == -1 || (sz = OF_getencprop(node, prop,
 	    OFmem, sizeof(OFmem))) <= 0)
 		panic("Physical memory map not found");
 
 	i = 0;
 	j = 0;
 	while (i < sz/sizeof(cell_t)) {
 	      #ifndef __powerpc64__
 		/* On 32-bit PPC, ignore regions starting above 4 GB */
 		if (address_cells > 1 && OFmem[i] > 0) {
 			i += address_cells + size_cells;
 			continue;
 		}
 	      #endif
 
 		output[j].mr_start = OFmem[i++];
 		if (address_cells == 2) {
 			#ifdef __powerpc64__
 			output[j].mr_start <<= 32;
 			#endif
 			output[j].mr_start += OFmem[i++];
 		}
 			
 		output[j].mr_size = OFmem[i++];
 		if (size_cells == 2) {
 			#ifdef __powerpc64__
 			output[j].mr_size <<= 32;
 			#endif
 			output[j].mr_size += OFmem[i++];
 		}
 
 	      #ifndef __powerpc64__
 		/*
 		 * Check for memory regions extending above 32-bit
 		 * memory space, and restrict them to stay there.
 		 */
 		if (((uint64_t)output[j].mr_start +
 		    (uint64_t)output[j].mr_size) >
 		    BUS_SPACE_MAXADDR_32BIT) {
 			output[j].mr_size = BUS_SPACE_MAXADDR_32BIT -
 			    output[j].mr_start;
 		}
 	      #endif
 
 		j++;
 	}
 	sz = j*sizeof(output[0]);
 
 	return (sz);
 }
 
 static int
 excise_fdt_reserved(struct mem_region *avail, int asz)
 {
 	struct {
 		uint64_t address;
 		uint64_t size;
 	} fdtmap[16];
 	ssize_t fdtmapsize;
 	phandle_t chosen;
 	int i, j, k;
 
 	chosen = OF_finddevice("/chosen");
 	fdtmapsize = OF_getprop(chosen, "fdtmemreserv", fdtmap, sizeof(fdtmap));
 
 	for (j = 0; j < fdtmapsize/sizeof(fdtmap[0]); j++) {
 		fdtmap[j].address = be64toh(fdtmap[j].address);
 		fdtmap[j].size = be64toh(fdtmap[j].size);
 	}
 
 	for (i = 0; i < asz; i++) {
 		for (j = 0; j < fdtmapsize/sizeof(fdtmap[0]); j++) {
 			/*
 			 * Case 1: Exclusion region encloses complete
 			 * available entry. Drop it and move on.
 			 */
 			if (fdtmap[j].address <= avail[i].mr_start &&
 			    fdtmap[j].address + fdtmap[j].size >=
 			    avail[i].mr_start + avail[i].mr_size) {
 				for (k = i+1; k < asz; k++)
 					avail[k-1] = avail[k];
 				asz--;
 				i--; /* Repeat some entries */
 				continue;
 			}
 
 			/*
 			 * Case 2: Exclusion region starts in available entry.
 			 * Trim it to where the entry begins and append
 			 * a new available entry with the region after
 			 * the excluded region, if any.
 			 */
 			if (fdtmap[j].address >= avail[i].mr_start &&
 			    fdtmap[j].address < avail[i].mr_start +
 			    avail[i].mr_size) {
 				if (fdtmap[j].address + fdtmap[j].size < 
 				    avail[i].mr_start + avail[i].mr_size) {
 					avail[asz].mr_start =
 					    fdtmap[j].address + fdtmap[j].size;
 					avail[asz].mr_size = avail[i].mr_start +
 					     avail[i].mr_size -
 					     avail[asz].mr_start;
 					asz++;
 				}
 
 				avail[i].mr_size = fdtmap[j].address -
 				    avail[i].mr_start;
 			}
 
 			/*
 			 * Case 3: Exclusion region ends in available entry.
 			 * Move start point to where the exclusion zone ends.
 			 * The case of a contained exclusion zone has already
 			 * been caught in case 2.
 			 */
 			if (fdtmap[j].address + fdtmap[j].size >=
 			    avail[i].mr_start && fdtmap[j].address +
 			    fdtmap[j].size < avail[i].mr_start +
 			    avail[i].mr_size) {
 				avail[i].mr_size += avail[i].mr_start;
 				avail[i].mr_start =
 				    fdtmap[j].address + fdtmap[j].size;
 				avail[i].mr_size -= avail[i].mr_start;
 			}
 		}
 	}
 
 	return (asz);
 }
 
 /*
  * This is called during powerpc_init, before the system is really initialized.
  * It shall provide the total and the available regions of RAM.
  * The available regions need not take the kernel into account.
  */
 void
 ofw_mem_regions(struct mem_region *memp, int *memsz,
 		struct mem_region *availp, int *availsz)
 {
 	phandle_t phandle;
 	int asz, msz;
 	int res;
 	char name[31];
 
 	asz = msz = 0;
 
 	/*
 	 * Get memory from all the /memory nodes.
 	 */
 	for (phandle = OF_child(OF_peer(0)); phandle != 0;
 	    phandle = OF_peer(phandle)) {
 		if (OF_getprop(phandle, "name", name, sizeof(name)) <= 0)
 			continue;
 		if (strncmp(name, "memory", sizeof(name)) != 0 &&
 		    strncmp(name, "memory@", strlen("memory@")) != 0)
 			continue;
 
 		res = parse_ofw_memory(phandle, "reg", &memp[msz]);
 		msz += res/sizeof(struct mem_region);
 		if (OF_getproplen(phandle, "available") >= 0)
 			res = parse_ofw_memory(phandle, "available",
 			    &availp[asz]);
 		else
 			res = parse_ofw_memory(phandle, "reg", &availp[asz]);
 		asz += res/sizeof(struct mem_region);
 	}
 
 	phandle = OF_finddevice("/chosen");
 	if (OF_hasprop(phandle, "fdtmemreserv"))
 		asz = excise_fdt_reserved(availp, asz);
 
 	*memsz = msz;
 	*availsz = asz;
 }
 
 void
 OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *))
 {
 #ifdef AIM
 	ofmsr[0] = mfmsr();
 	#ifdef __powerpc64__
 	ofmsr[0] &= ~PSL_SF;
 	#endif
 	__asm __volatile("mfsprg0 %0" : "=&r"(ofmsr[1]));
 	__asm __volatile("mfsprg1 %0" : "=&r"(ofmsr[2]));
 	__asm __volatile("mfsprg2 %0" : "=&r"(ofmsr[3]));
 	__asm __volatile("mfsprg3 %0" : "=&r"(ofmsr[4]));
 	openfirmware_entry = openfirm;
 
 	if (ofmsr[0] & PSL_DR)
 		ofw_real_mode = 0;
 	else
 		ofw_real_mode = 1;
 
 	ofw_save_trap_vec(save_trap_init);
 #else
 	ofw_real_mode = 1;
 #endif
 
 	fdt = fdt_ptr;
 
 	#ifdef FDT_DTB_STATIC
 	/* Check for a statically included blob */
 	if (fdt == NULL)
 		fdt = &fdt_static_dtb;
 	#endif
 }
 
 boolean_t
 OF_bootstrap()
 {
 	boolean_t status = FALSE;
 
 #ifdef AIM
 	if (openfirmware_entry != NULL) {
 		if (ofw_real_mode) {
 			status = OF_install(OFW_STD_REAL, 0);
 		} else {
 			#ifdef __powerpc64__
 			status = OF_install(OFW_STD_32BIT, 0);
 			#else
 			status = OF_install(OFW_STD_DIRECT, 0);
 			#endif
 		}
 
 		if (status != TRUE)
 			return status;
 
 		OF_init(openfirmware);
 	} else
 #endif
 	if (fdt != NULL) {
 		status = OF_install(OFW_FDT, 0);
 
 		if (status != TRUE)
 			return status;
 
 		OF_init(fdt);
 		OF_interpret("perform-fixup", 0);
 	} 
 
 	return (status);
 }
 
 #ifdef AIM
 void
 ofw_quiesce(void)
 {
 	struct {
 		cell_t name;
 		cell_t nargs;
 		cell_t nreturns;
 	} args;
 
 	KASSERT(!pmap_bootstrapped, ("Cannot call ofw_quiesce after VM is up"));
 
 	args.name = (cell_t)(uintptr_t)"quiesce";
 	args.nargs = 0;
 	args.nreturns = 0;
 	openfirmware(&args);
 }
 
 static int
 openfirmware_core(void *args)
 {
 	int		result;
 	register_t	oldmsr;
 
 	if (openfirmware_entry == NULL)
 		return (-1);
 
 	/*
 	 * Turn off exceptions - we really don't want to end up
 	 * anywhere unexpected with PCPU set to something strange
 	 * or the stack pointer wrong.
 	 */
 	oldmsr = intr_disable();
 
 	ofw_sprg_prepare();
 
 	/* Save trap vectors */
 	ofw_save_trap_vec(save_trap_of);
 
 	/* Restore initially saved trap vectors */
 	ofw_restore_trap_vec(save_trap_init);
 
 #if defined(AIM) && !defined(__powerpc64__)
 	/*
 	 * Clear battable[] translations
 	 */
 	if (!(cpu_features & PPC_FEATURE_64))
 		__asm __volatile("mtdbatu 2, %0\n"
 				 "mtdbatu 3, %0" : : "r" (0));
 	isync();
 #endif
 
 	result = ofwcall(args);
 
 	/* Restore trap vecotrs */
 	ofw_restore_trap_vec(save_trap_of);
 
 	ofw_sprg_restore();
 
 	intr_restore(oldmsr);
 
 	return (result);
 }
 
 #ifdef SMP
 struct ofw_rv_args {
 	void *args;
 	int retval;
 	volatile int in_progress;
 };
 
 static void
 ofw_rendezvous_dispatch(void *xargs)
 {
 	struct ofw_rv_args *rv_args = xargs;
 
 	/* NOTE: Interrupts are disabled here */
 
 	if (PCPU_GET(cpuid) == 0) {
 		/*
 		 * Execute all OF calls on CPU 0
 		 */
 		rv_args->retval = openfirmware_core(rv_args->args);
 		rv_args->in_progress = 0;
 	} else {
 		/*
 		 * Spin with interrupts off on other CPUs while OF has
 		 * control of the machine.
 		 */
 		while (rv_args->in_progress)
 			cpu_spinwait();
 	}
 }
 #endif
 
 static int
 openfirmware(void *args)
 {
 	int result;
 	#ifdef SMP
 	struct ofw_rv_args rv_args;
 	#endif
 
 	if (openfirmware_entry == NULL)
 		return (-1);
 
 	#ifdef SMP
 	rv_args.args = args;
 	rv_args.in_progress = 1;
 	smp_rendezvous(smp_no_rendevous_barrier, ofw_rendezvous_dispatch,
 	    smp_no_rendevous_barrier, &rv_args);
 	result = rv_args.retval;
 	#else
 	result = openfirmware_core(args);
 	#endif
 
 	return (result);
 }
 
 void
 OF_reboot()
 {
 	struct {
 		cell_t name;
 		cell_t nargs;
 		cell_t nreturns;
 		cell_t arg;
 	} args;
 
 	args.name = (cell_t)(uintptr_t)"interpret";
 	args.nargs = 1;
 	args.nreturns = 0;
 	args.arg = (cell_t)(uintptr_t)"reset-all";
 	openfirmware_core(&args); /* Don't do rendezvous! */
 
 	for (;;);	/* just in case */
 }
 
 #endif /* AIM */
 
 void
 OF_getetheraddr(device_t dev, u_char *addr)
 {
 	phandle_t	node;
 
 	node = ofw_bus_get_node(dev);
 	OF_getprop(node, "local-mac-address", addr, ETHER_ADDR_LEN);
 }
 
 /*
  * Return a bus handle and bus tag that corresponds to the register
  * numbered regno for the device referenced by the package handle
  * dev. This function is intended to be used by console drivers in
  * early boot only. It works by mapping the address of the device's
  * register in the address space of its parent and recursively walk
  * the device tree upward this way.
  */
 static void
 OF_get_addr_props(phandle_t node, uint32_t *addrp, uint32_t *sizep, int *pcip)
 {
 	char type[64];
 	uint32_t addr, size;
 	int pci, res;
 
-	res = OF_getprop(node, "#address-cells", &addr, sizeof(addr));
+	res = OF_getencprop(node, "#address-cells", &addr, sizeof(addr));
 	if (res == -1)
 		addr = 2;
-	res = OF_getprop(node, "#size-cells", &size, sizeof(size));
+	res = OF_getencprop(node, "#size-cells", &size, sizeof(size));
 	if (res == -1)
 		size = 1;
 	pci = 0;
 	if (addr == 3 && size == 2) {
 		res = OF_getprop(node, "device_type", type, sizeof(type));
 		if (res != -1) {
 			type[sizeof(type) - 1] = '\0';
 			pci = (strcmp(type, "pci") == 0) ? 1 : 0;
 		}
 	}
 	if (addrp != NULL)
 		*addrp = addr;
 	if (sizep != NULL)
 		*sizep = size;
 	if (pcip != NULL)
 		*pcip = pci;
 }
 
 int
 OF_decode_addr(phandle_t dev, int regno, bus_space_tag_t *tag,
     bus_space_handle_t *handle)
 {
 	uint32_t cell[32];
 	bus_addr_t addr, raddr, baddr;
 	bus_size_t size, rsize;
 	uint32_t c, nbridge, naddr, nsize;
 	phandle_t bridge, parent;
 	u_int spc, rspc, prefetch;
 	int pci, pcib, res;
 
 	/* Sanity checking. */
 	if (dev == 0)
 		return (EINVAL);
 	bridge = OF_parent(dev);
 	if (bridge == 0)
 		return (EINVAL);
 	if (regno < 0)
 		return (EINVAL);
 	if (tag == NULL || handle == NULL)
 		return (EINVAL);
 
 	/* Assume big-endian unless we find a PCI device */
 	*tag = &bs_be_tag;
 
 	/* Get the requested register. */
 	OF_get_addr_props(bridge, &naddr, &nsize, &pci);
 	if (pci)
 		*tag = &bs_le_tag;
-	res = OF_getprop(dev, (pci) ? "assigned-addresses" : "reg",
+	res = OF_getencprop(dev, (pci) ? "assigned-addresses" : "reg",
 	    cell, sizeof(cell));
 	if (res == -1)
 		return (ENXIO);
 	if (res % sizeof(cell[0]))
 		return (ENXIO);
 	res /= sizeof(cell[0]);
 	regno *= naddr + nsize;
 	if (regno + naddr + nsize > res)
 		return (EINVAL);
 	spc = (pci) ? cell[regno] & OFW_PCI_PHYS_HI_SPACEMASK : ~0;
 	prefetch = (pci) ? cell[regno] & OFW_PCI_PHYS_HI_PREFETCHABLE : 0;
 	addr = 0;
 	for (c = 0; c < naddr; c++)
 		addr = ((uint64_t)addr << 32) | cell[regno++];
 	size = 0;
 	for (c = 0; c < nsize; c++)
 		size = ((uint64_t)size << 32) | cell[regno++];
 
 	/*
 	 * Map the address range in the bridge's decoding window as given
 	 * by the "ranges" property. If a node doesn't have such property
 	 * then no mapping is done.
 	 */
 	parent = OF_parent(bridge);
 	while (parent != 0) {
 		OF_get_addr_props(parent, &nbridge, NULL, &pcib);
 		if (pcib)
 			*tag = &bs_le_tag;
-		res = OF_getprop(bridge, "ranges", cell, sizeof(cell));
+		res = OF_getencprop(bridge, "ranges", cell, sizeof(cell));
 		if (res == -1)
 			goto next;
 		if (res % sizeof(cell[0]))
 			return (ENXIO);
 		res /= sizeof(cell[0]);
 		regno = 0;
 		while (regno < res) {
 			rspc = (pci)
 			    ? cell[regno] & OFW_PCI_PHYS_HI_SPACEMASK
 			    : ~0;
 			if (rspc != spc) {
 				regno += naddr + nbridge + nsize;
 				continue;
 			}
 			raddr = 0;
 			for (c = 0; c < naddr; c++)
 				raddr = ((uint64_t)raddr << 32) | cell[regno++];
 			rspc = (pcib)
 			    ? cell[regno] & OFW_PCI_PHYS_HI_SPACEMASK
 			    : ~0;
 			baddr = 0;
 			for (c = 0; c < nbridge; c++)
 				baddr = ((uint64_t)baddr << 32) | cell[regno++];
 			rsize = 0;
 			for (c = 0; c < nsize; c++)
 				rsize = ((uint64_t)rsize << 32) | cell[regno++];
 			if (addr < raddr || addr >= raddr + rsize)
 				continue;
 			addr = addr - raddr + baddr;
 			if (rspc != ~0)
 				spc = rspc;
 		}
 
 	next:
 		bridge = parent;
 		parent = OF_parent(bridge);
 		OF_get_addr_props(bridge, &naddr, &nsize, &pci);
 	}
 
 	return (bus_space_map(*tag, addr, size,
 	    prefetch ? BUS_SPACE_MAP_PREFETCHABLE : 0, handle));
 }
 
Index: projects/powernv/powerpc/ofw/ofw_pci.c
===================================================================
--- projects/powernv/powerpc/ofw/ofw_pci.c	(revision 290990)
+++ projects/powernv/powerpc/ofw/ofw_pci.c	(revision 290991)
@@ -1,561 +1,562 @@
 /*-
  * Copyright (c) 2011 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_pci.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/pio.h>
 #include <machine/resource.h>
 
 #include <sys/rman.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <powerpc/ofw/ofw_pci.h>
 
 #include "pcib_if.h"
 
 /*
  * Bus interface.
  */
 static int		ofw_pci_read_ivar(device_t, device_t, int,
 			    uintptr_t *);
 static struct		resource * ofw_pci_alloc_resource(device_t bus,
 			    device_t child, int type, int *rid, u_long start,
 			    u_long end, u_long count, u_int flags);
 static int		ofw_pci_release_resource(device_t bus, device_t child,
     			    int type, int rid, struct resource *res);
 static int		ofw_pci_activate_resource(device_t bus, device_t child,
 			    int type, int rid, struct resource *res);
 static int		ofw_pci_deactivate_resource(device_t bus,
     			    device_t child, int type, int rid,
     			    struct resource *res);
 static int		ofw_pci_adjust_resource(device_t bus, device_t child,
 			    int type, struct resource *res, u_long start,
 			    u_long end);
 
 /*
  * pcib interface.
  */
 static int		ofw_pci_maxslots(device_t);
 static int		ofw_pci_route_interrupt(device_t, device_t, int);
 
 /*
  * ofw_bus interface
  */
 static phandle_t ofw_pci_get_node(device_t bus, device_t dev);
 
 /*
  * local methods
  */
 
 static int ofw_pci_nranges(phandle_t node);
 static int ofw_pci_fill_ranges(phandle_t node, struct ofw_pci_range *ranges);
 
 /*
  * Driver methods.
  */
 static device_method_t	ofw_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_attach,	ofw_pci_attach),
 
 	/* Bus interface */
 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
 	DEVMETHOD(bus_read_ivar,	ofw_pci_read_ivar),
 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
 	DEVMETHOD(bus_alloc_resource,	ofw_pci_alloc_resource),
 	DEVMETHOD(bus_release_resource,	ofw_pci_release_resource),
 	DEVMETHOD(bus_activate_resource,	ofw_pci_activate_resource),
 	DEVMETHOD(bus_deactivate_resource,	ofw_pci_deactivate_resource),
 	DEVMETHOD(bus_adjust_resource,	ofw_pci_adjust_resource),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_maxslots,	ofw_pci_maxslots),
 	DEVMETHOD(pcib_route_interrupt,	ofw_pci_route_interrupt),
 
 	/* ofw_bus interface */
 	DEVMETHOD(ofw_bus_get_node,     ofw_pci_get_node),
 
 	DEVMETHOD_END
 };
 
 DEFINE_CLASS_0(ofw_pci, ofw_pci_driver, ofw_pci_methods, 0);
 
 int
 ofw_pci_init(device_t dev)
 {
 	struct		ofw_pci_softc *sc;
 	phandle_t	node;
 	u_int32_t	busrange[2];
 	struct		ofw_pci_range *rp;
 	int		error;
 
 	node = ofw_bus_get_node(dev);
 	sc = device_get_softc(dev);
 	sc->sc_initialized = 1;
 
-	if (OF_getprop(node, "reg", &sc->sc_pcir, sizeof(sc->sc_pcir)) == -1)
+	if (OF_getencprop(node, "reg", (pcell_t *)&sc->sc_pcir,
+	    sizeof(sc->sc_pcir)) == -1)
 		return (ENXIO);
 
-	if (OF_getprop(node, "bus-range", busrange, sizeof(busrange)) != 8)
+	if (OF_getencprop(node, "bus-range", busrange, sizeof(busrange)) != 8)
 		busrange[0] = 0;
 
 	sc->sc_dev = dev;
 	sc->sc_node = node;
 	sc->sc_bus = busrange[0];
 
 	if (sc->sc_quirks & OFW_PCI_QUIRK_RANGES_ON_CHILDREN) {
 		phandle_t c;
 		int n, i;
 		
 		sc->sc_nrange = 0;
 		for (c = OF_child(node); c != 0; c = OF_peer(c)) {
 			n = ofw_pci_nranges(c);
 			if (n > 0)
 				sc->sc_nrange += n;
 		}
 		if (sc->sc_nrange == 0)
 			return (ENXIO);
 		sc->sc_range = malloc(sc->sc_nrange * sizeof(sc->sc_range[0]),
 		    M_DEVBUF, M_WAITOK);
 		i = 0;
 		for (c = OF_child(node); c != 0; c = OF_peer(c)) {
 			n = ofw_pci_fill_ranges(c, &sc->sc_range[i]);
 			if (n > 0)
 				i += n;
 		}
 		KASSERT(i == sc->sc_nrange, ("range count mismatch"));
 	} else {
 		sc->sc_nrange = ofw_pci_nranges(node);
 		if (sc->sc_nrange <= 0) {
 			device_printf(dev, "could not get ranges\n");
 			return (ENXIO);
 		}
 		sc->sc_range = malloc(sc->sc_nrange * sizeof(sc->sc_range[0]),
 		    M_DEVBUF, M_WAITOK);
 		ofw_pci_fill_ranges(node, sc->sc_range);
 	}
 		
 	sc->sc_io_rman.rm_type = RMAN_ARRAY;
 	sc->sc_io_rman.rm_descr = "PCI I/O Ports";
 	error = rman_init(&sc->sc_io_rman);
 	if (error) {
 		device_printf(dev, "rman_init() failed. error = %d\n", error);
 		return (error);
 	}
 
 	sc->sc_mem_rman.rm_type = RMAN_ARRAY;
 	sc->sc_mem_rman.rm_descr = "PCI Memory";
 	error = rman_init(&sc->sc_mem_rman);
 	if (error) {
 		device_printf(dev, "rman_init() failed. error = %d\n", error);
 		return (error);
 	}
 
 	for (rp = sc->sc_range; rp < sc->sc_range + sc->sc_nrange &&
 	       rp->pci_hi != 0; rp++) {
 		error = 0;
 
 		switch (rp->pci_hi & OFW_PCI_PHYS_HI_SPACEMASK) {
 		case OFW_PCI_PHYS_HI_SPACE_CONFIG:
 			break;
 		case OFW_PCI_PHYS_HI_SPACE_IO:
 			error = rman_manage_region(&sc->sc_io_rman, rp->pci,
 			    rp->pci + rp->size - 1);
 			break;
 		case OFW_PCI_PHYS_HI_SPACE_MEM32:
 		case OFW_PCI_PHYS_HI_SPACE_MEM64:
 			error = rman_manage_region(&sc->sc_mem_rman, rp->pci,
 			    rp->pci + rp->size - 1);
 			break;
 		}
 
 		if (error) {
 			device_printf(dev, 
 			    "rman_manage_region(%x, %#jx, %#jx) failed. "
 			    "error = %d\n", rp->pci_hi &
 			    OFW_PCI_PHYS_HI_SPACEMASK, rp->pci,
 			    rp->pci + rp->size - 1, error);
 			return (error);
 		}
 	}
 
 	ofw_bus_setup_iinfo(node, &sc->sc_pci_iinfo, sizeof(cell_t));
 
 	return (error);
 }
 
 int
 ofw_pci_attach(device_t dev)
 {
 	struct ofw_pci_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	if (!sc->sc_initialized) {
 		error = ofw_pci_init(dev);
 		if (error)
 			return (error);
 	}
 
 	device_add_child(dev, "pci", -1);
 	return (bus_generic_attach(dev));
 }
 
 static int
 ofw_pci_maxslots(device_t dev)
 {
 
 	return (PCI_SLOTMAX);
 }
 
 static int
 ofw_pci_route_interrupt(device_t bus, device_t dev, int pin)
 {
 	struct ofw_pci_softc *sc;
 	struct ofw_pci_register reg;
 	uint32_t pintr, mintr[2];
 	int intrcells;
 	phandle_t iparent;
 
 	sc = device_get_softc(bus);
 	pintr = pin;
 
 	/* Fabricate imap information in case this isn't an OFW device */
 	bzero(&reg, sizeof(reg));
 	reg.phys_hi = (pci_get_bus(dev) << OFW_PCI_PHYS_HI_BUSSHIFT) |
 	    (pci_get_slot(dev) << OFW_PCI_PHYS_HI_DEVICESHIFT) |
 	    (pci_get_function(dev) << OFW_PCI_PHYS_HI_FUNCTIONSHIFT);
 
 	intrcells = ofw_bus_lookup_imap(ofw_bus_get_node(dev),
 	    &sc->sc_pci_iinfo, &reg, sizeof(reg), &pintr, sizeof(pintr),
 	    mintr, sizeof(mintr), &iparent);
 	if (intrcells) {
 		pintr = ofw_bus_map_intr(dev, iparent, intrcells, mintr);
 		return (pintr);
 	}
 
 	/* Maybe it's a real interrupt, not an intpin */
 	if (pin > 4)
 		return (pin);
 
 	device_printf(bus, "could not route pin %d for device %d.%d\n",
 	    pin, pci_get_slot(dev), pci_get_function(dev));
 	return (PCI_INVALID_IRQ);
 }
 
 static int
 ofw_pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
 	struct	ofw_pci_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	switch (which) {
 	case PCIB_IVAR_DOMAIN:
 		*result = device_get_unit(dev);
 		return (0);
 	case PCIB_IVAR_BUS:
 		*result = sc->sc_bus;
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static struct resource *
 ofw_pci_alloc_resource(device_t bus, device_t child, int type, int *rid,
     u_long start, u_long end, u_long count, u_int flags)
 {
 	struct			ofw_pci_softc *sc;
 	struct			resource *rv;
 	struct			rman *rm;
 	int			needactivate;
 
 	needactivate = flags & RF_ACTIVE;
 	flags &= ~RF_ACTIVE;
 
 	sc = device_get_softc(bus);
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 		rm = &sc->sc_mem_rman;
 		break;
 
 	case SYS_RES_IOPORT:
 		rm = &sc->sc_io_rman;
 		break;
 
 	case SYS_RES_IRQ:
 		return (bus_alloc_resource(bus, type, rid, start, end, count,
 		    flags));
 
 	default:
 		device_printf(bus, "unknown resource request from %s\n",
 		    device_get_nameunit(child));
 		return (NULL);
 	}
 
 	rv = rman_reserve_resource(rm, start, end, count, flags, child);
 	if (rv == NULL) {
 		device_printf(bus, "failed to reserve resource for %s\n",
 		    device_get_nameunit(child));
 		return (NULL);
 	}
 
 	rman_set_rid(rv, *rid);
 
 	if (needactivate) {
 		if (bus_activate_resource(child, type, *rid, rv) != 0) {
 			device_printf(bus,
 			    "failed to activate resource for %s\n",
 			    device_get_nameunit(child));
 			rman_release_resource(rv);
 			return (NULL);
 		}
 	}
 
 	return (rv);
 }
 
 static int
 ofw_pci_release_resource(device_t bus, device_t child, int type, int rid,
     struct resource *res)
 {
 	if (rman_get_flags(res) & RF_ACTIVE) {
 		int error = bus_deactivate_resource(child, type, rid, res);
 		if (error)
 			return error;
 	}
 
 	return (rman_release_resource(res));
 }
 
 static int
 ofw_pci_activate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *res)
 {
 	struct ofw_pci_softc *sc;
 	void	*p;
 
 	sc = device_get_softc(bus);
 
 	if (type == SYS_RES_IRQ) {
 		return (bus_activate_resource(bus, type, rid, res));
 	}
 	if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
 		struct ofw_pci_range *rp;
 		vm_offset_t start;
 		int space;
 
 		start = (vm_offset_t)rman_get_start(res);
 
 		/*
 		 * Map this through the ranges list
 		 */
 		for (rp = sc->sc_range; rp < sc->sc_range + sc->sc_nrange &&
 		       rp->pci_hi != 0; rp++) {
 			if (start < rp->pci || start >= rp->pci + rp->size)
 				continue;
 
 			switch (rp->pci_hi & OFW_PCI_PHYS_HI_SPACEMASK) {
 			case OFW_PCI_PHYS_HI_SPACE_IO:
 				space = SYS_RES_IOPORT;
 				break;
 			case OFW_PCI_PHYS_HI_SPACE_MEM32:
 			case OFW_PCI_PHYS_HI_SPACE_MEM64:
 				space = SYS_RES_MEMORY;
 				break;
 			default:
 				space = -1;
 			}
 
 			if (type == space) {
 				start += (rp->host - rp->pci);
 				break;
 			}
 		}
 
 		if (bootverbose)
 			printf("ofw_pci mapdev: start %zx, len %ld\n", start,
 			    rman_get_size(res));
 
 		p = pmap_mapdev(start, (vm_size_t)rman_get_size(res));
 		if (p == NULL)
 			return (ENOMEM);
 
 		rman_set_virtual(res, p);
 		rman_set_bustag(res, &bs_le_tag);
 		rman_set_bushandle(res, (u_long)p);
 	}
 
 	return (rman_activate_resource(res));
 }
 
 static int
 ofw_pci_deactivate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *res)
 {
 	/*
 	 * If this is a memory resource, unmap it.
 	 */
 	if ((type == SYS_RES_MEMORY) || (type == SYS_RES_IOPORT)) {
 		u_int32_t psize;
 
 		psize = rman_get_size(res);
 		pmap_unmapdev((vm_offset_t)rman_get_virtual(res), psize);
 	}
 
 	return (rman_deactivate_resource(res));
 }
 
 static int
 ofw_pci_adjust_resource(device_t bus, device_t child, int type,
     struct resource *res, u_long start, u_long end)
 {
 	struct rman *rm = NULL;
 	struct ofw_pci_softc *sc = device_get_softc(bus);
 
 	KASSERT(!(rman_get_flags(res) & RF_ACTIVE),
 	    ("active resources cannot be adjusted"));
 	if (rman_get_flags(res) & RF_ACTIVE)
 		return (EINVAL);
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 		rm = &sc->sc_mem_rman;
 		break;
 	case SYS_RES_IOPORT:
 		rm = &sc->sc_io_rman;
 		break;
 	default:
 		return (ENXIO);
 	}
 
 	if (!rman_is_region_manager(res, rm))
 		return (EINVAL);
 
 	return (rman_adjust_resource(res, start, end));
 }
 	
 
 static phandle_t
 ofw_pci_get_node(device_t bus, device_t dev)
 {
 	struct ofw_pci_softc *sc;
 
 	sc = device_get_softc(bus);
 	/* We only have one child, the PCI bus, which needs our own node. */
 
 	return (sc->sc_node);
 }
 
 static int
 ofw_pci_nranges(phandle_t node)
 {
 	int host_address_cells = 1, pci_address_cells = 3, size_cells = 2;
 	ssize_t nbase_ranges;
 
-	OF_getprop(OF_parent(node), "#address-cells", &host_address_cells,
+	OF_getencprop(OF_parent(node), "#address-cells", &host_address_cells,
 	    sizeof(host_address_cells));
-	OF_getprop(node, "#address-cells", &pci_address_cells,
+	OF_getencprop(node, "#address-cells", &pci_address_cells,
 	    sizeof(pci_address_cells));
-	OF_getprop(node, "#size-cells", &size_cells, sizeof(size_cells));
+	OF_getencprop(node, "#size-cells", &size_cells, sizeof(size_cells));
 
 	nbase_ranges = OF_getproplen(node, "ranges");
 	if (nbase_ranges <= 0)
 		return (-1);
 
 	return (nbase_ranges / sizeof(cell_t) /
 	    (pci_address_cells + host_address_cells + size_cells));
 }
 
 static int
 ofw_pci_fill_ranges(phandle_t node, struct ofw_pci_range *ranges)
 {
 	int host_address_cells = 1, pci_address_cells = 3, size_cells = 2;
 	cell_t *base_ranges;
 	ssize_t nbase_ranges;
 	int nranges;
 	int i, j, k;
 
-	OF_getprop(OF_parent(node), "#address-cells", &host_address_cells,
+	OF_getencprop(OF_parent(node), "#address-cells", &host_address_cells,
 	    sizeof(host_address_cells));
-	OF_getprop(node, "#address-cells", &pci_address_cells,
+	OF_getencprop(node, "#address-cells", &pci_address_cells,
 	    sizeof(pci_address_cells));
-	OF_getprop(node, "#size-cells", &size_cells, sizeof(size_cells));
+	OF_getencprop(node, "#size-cells", &size_cells, sizeof(size_cells));
 
 	nbase_ranges = OF_getproplen(node, "ranges");
 	if (nbase_ranges <= 0)
 		return (-1);
 	nranges = nbase_ranges / sizeof(cell_t) /
 	    (pci_address_cells + host_address_cells + size_cells);
 
 	base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK);
-	OF_getprop(node, "ranges", base_ranges, nbase_ranges);
+	OF_getencprop(node, "ranges", base_ranges, nbase_ranges);
 
 	for (i = 0, j = 0; i < nranges; i++) {
 		ranges[i].pci_hi = base_ranges[j++];
 		ranges[i].pci = 0;
 		for (k = 0; k < pci_address_cells - 1; k++) {
 			ranges[i].pci <<= 32;
 			ranges[i].pci |= base_ranges[j++];
 		}
 		ranges[i].host = 0;
 		for (k = 0; k < host_address_cells; k++) {
 			ranges[i].host <<= 32;
 			ranges[i].host |= base_ranges[j++];
 		}
 		ranges[i].size = 0;
 		for (k = 0; k < size_cells; k++) {
 			ranges[i].size <<= 32;
 			ranges[i].size |= base_ranges[j++];
 		}
 	}
 
 	free(base_ranges, M_DEVBUF);
 	return (nranges);
 }
 
Index: projects/powernv/powerpc/ofw/ofw_pcibus.c
===================================================================
--- projects/powernv/powerpc/ofw/ofw_pcibus.c	(revision 290990)
+++ projects/powernv/powerpc/ofw/ofw_pcibus.c	(revision 290991)
@@ -1,355 +1,357 @@
 /*-
  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000, BSDi
  * Copyright (c) 2003, Thomas Moestl <tmm@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/libkern.h>
 #include <sys/module.h>
 #include <sys/pciio.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/ofw_pci.h>
 #include <dev/ofw/openfirm.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/resource.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pci_private.h>
 
 #include "ofw_pcibus.h"
 #include "pcib_if.h"
 #include "pci_if.h"
 
 typedef uint32_t ofw_pci_intr_t;
 
 /* Methods */
 static device_probe_t ofw_pcibus_probe;
 static device_attach_t ofw_pcibus_attach;
 static pci_assign_interrupt_t ofw_pcibus_assign_interrupt;
 static ofw_bus_get_devinfo_t ofw_pcibus_get_devinfo;
 static int ofw_pcibus_child_pnpinfo_str_method(device_t cbdev, device_t child,
     char *buf, size_t buflen);
 
 static void ofw_pcibus_enum_devtree(device_t dev, u_int domain, u_int busno);
 static void ofw_pcibus_enum_bus(device_t dev, u_int domain, u_int busno);
 
 static device_method_t ofw_pcibus_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		ofw_pcibus_probe),
 	DEVMETHOD(device_attach,	ofw_pcibus_attach),
 
 	/* Bus interface */
 	DEVMETHOD(bus_child_pnpinfo_str, ofw_pcibus_child_pnpinfo_str_method),
 
 	/* PCI interface */
 	DEVMETHOD(pci_assign_interrupt, ofw_pcibus_assign_interrupt),
 
 	/* ofw_bus interface */
 	DEVMETHOD(ofw_bus_get_devinfo,	ofw_pcibus_get_devinfo),
 	DEVMETHOD(ofw_bus_get_compat,	ofw_bus_gen_get_compat),
 	DEVMETHOD(ofw_bus_get_model,	ofw_bus_gen_get_model),
 	DEVMETHOD(ofw_bus_get_name,	ofw_bus_gen_get_name),
 	DEVMETHOD(ofw_bus_get_node,	ofw_bus_gen_get_node),
 	DEVMETHOD(ofw_bus_get_type,	ofw_bus_gen_get_type),
 
 	DEVMETHOD_END
 };
 
 static devclass_t pci_devclass;
 
 DEFINE_CLASS_1(pci, ofw_pcibus_driver, ofw_pcibus_methods,
     sizeof(struct pci_softc), pci_driver);
 DRIVER_MODULE(ofw_pcibus, pcib, ofw_pcibus_driver, pci_devclass, 0, 0);
 MODULE_VERSION(ofw_pcibus, 1);
 MODULE_DEPEND(ofw_pcibus, pci, 1, 1, 1);
 
 static int ofw_devices_only = 0;
 TUNABLE_INT("hw.pci.ofw_devices_only", &ofw_devices_only);
 
 static int
 ofw_pcibus_probe(device_t dev)
 {
 
 	if (ofw_bus_get_node(dev) == -1)
 		return (ENXIO);
 	device_set_desc(dev, "OFW PCI bus");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 ofw_pcibus_attach(device_t dev)
 {
 	u_int busno, domain;
 	int error;
 
 	error = pci_attach_common(dev);
 	if (error)
 		return (error);
 	domain = pcib_get_domain(dev);
 	busno = pcib_get_bus(dev);
 
 	/*
 	 * Attach those children represented in the device tree.
 	 */
 
 	ofw_pcibus_enum_devtree(dev, domain, busno);
 
 	/*
 	 * We now attach any laggard devices. FDT, for instance, allows
 	 * the device tree to enumerate only some PCI devices. Apple's
 	 * OF device tree on some Grackle-based hardware can also miss
 	 * functions on multi-function cards.
 	 */
 
 	if (!ofw_devices_only)
 		ofw_pcibus_enum_bus(dev, domain, busno);
 
 	return (bus_generic_attach(dev));
 }
 
 static void
 ofw_pcibus_enum_devtree(device_t dev, u_int domain, u_int busno)
 {
 	device_t pcib;
 	struct ofw_pci_register pcir;
 	struct ofw_pcibus_devinfo *dinfo;
 	phandle_t node, child;
 	u_int func, slot;
 	int intline;
 
 	pcib = device_get_parent(dev);
 	node = ofw_bus_get_node(dev);
 
 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
-		if (OF_getprop(child, "reg", &pcir, sizeof(pcir)) == -1)
+		if (OF_getencprop(child, "reg", (pcell_t *)&pcir,
+		    sizeof(pcir)) == -1)
 			continue;
 		slot = OFW_PCI_PHYS_HI_DEVICE(pcir.phys_hi);
 		func = OFW_PCI_PHYS_HI_FUNCTION(pcir.phys_hi);
 
 		/* Some OFW device trees contain dupes. */
 		if (pci_find_dbsf(domain, busno, slot, func) != NULL)
 			continue;
 
 		/*
 		 * The preset in the intline register is usually bogus.  Reset
 		 * it such that the PCI code will reroute the interrupt if
 		 * needed.
 		 */
 
 		intline = PCI_INVALID_IRQ;
 		if (OF_getproplen(child, "interrupts") > 0)
 			intline = 0;
 		PCIB_WRITE_CONFIG(pcib, busno, slot, func, PCIR_INTLINE,
 		    intline, 1);
 
 		/*
 		 * Now set up the PCI and OFW bus layer devinfo and add it
 		 * to the PCI bus.
 		 */
 
 		dinfo = (struct ofw_pcibus_devinfo *)pci_read_device(pcib,
 		    domain, busno, slot, func, sizeof(*dinfo));
 		if (dinfo == NULL)
 			continue;
 		if (ofw_bus_gen_setup_devinfo(&dinfo->opd_obdinfo, child) !=
 		    0) {
 			pci_freecfg((struct pci_devinfo *)dinfo);
 			continue;
 		}
 		dinfo->opd_dma_tag = NULL;
 		pci_add_child(dev, (struct pci_devinfo *)dinfo);
 
 		/*
 		 * Some devices don't have an intpin set, but do have
 		 * interrupts. These are fully specified, and set in the
 		 * interrupts property, so add that value to the device's
 		 * resource list.
 		 */
 		if (dinfo->opd_dinfo.cfg.intpin == 0)
 			ofw_bus_intr_to_rl(dev, child,
 				&dinfo->opd_dinfo.resources, NULL);
 	}
 }
 
 /*
  * The following is an almost exact clone of pci_add_children(), with the
  * addition that it (a) will not add children that have already been added,
  * and (b) will set up the OFW devinfo to point to invalid values. This is
  * to handle non-enumerated PCI children as exist in FDT and on the second
  * function of the Rage 128 in my Blue & White G3.
  */
 
 static void
 ofw_pcibus_enum_bus(device_t dev, u_int domain, u_int busno)
 {
 	device_t pcib;
 	struct ofw_pcibus_devinfo *dinfo;
 	int maxslots;
 	int s, f, pcifunchigh;
 	uint8_t hdrtype;
 
 	pcib = device_get_parent(dev);
 
 	maxslots = PCIB_MAXSLOTS(pcib);
 	for (s = 0; s <= maxslots; s++) {
 		pcifunchigh = 0;
 		f = 0;
 		DELAY(1);
 		hdrtype = PCIB_READ_CONFIG(pcib, busno, s, f, PCIR_HDRTYPE, 1);
 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
 			continue;
 		if (hdrtype & PCIM_MFDEV)
 			pcifunchigh = PCI_FUNCMAX;
 		for (f = 0; f <= pcifunchigh; f++) {
 			/* Filter devices we have already added */
 			if (pci_find_dbsf(domain, busno, s, f) != NULL)
 				continue;
 
 			dinfo = (struct ofw_pcibus_devinfo *)pci_read_device(
 			    pcib, domain, busno, s, f, sizeof(*dinfo));
 			if (dinfo == NULL)
 				continue;
 
 			dinfo->opd_dma_tag = NULL;
 			dinfo->opd_obdinfo.obd_node = -1;
 
 			dinfo->opd_obdinfo.obd_name = NULL;
 			dinfo->opd_obdinfo.obd_compat = NULL;
 			dinfo->opd_obdinfo.obd_type = NULL;
 			dinfo->opd_obdinfo.obd_model = NULL;
 
 			/*
 			 * For non OFW-devices, don't believe 0 
 			 * for an interrupt.
 			 */
 			if (dinfo->opd_dinfo.cfg.intline == 0) {
 				dinfo->opd_dinfo.cfg.intline = PCI_INVALID_IRQ;
 				PCIB_WRITE_CONFIG(pcib, busno, s, f, 
 				    PCIR_INTLINE, PCI_INVALID_IRQ, 1);
 			}
 
 			pci_add_child(dev, (struct pci_devinfo *)dinfo);
 		}
 	}
 }
 
 static int
 ofw_pcibus_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
 	pci_child_pnpinfo_str_method(cbdev, child, buf, buflen);
 
 	if (ofw_bus_get_node(child) != -1)  {
 		strlcat(buf, " ", buflen); /* Separate info */
 		ofw_bus_gen_child_pnpinfo_str(cbdev, child, buf, buflen);
 	}
 
 	return (0);
 }
 	
 static int
 ofw_pcibus_assign_interrupt(device_t dev, device_t child)
 {
 	ofw_pci_intr_t intr[2];
 	phandle_t node, iparent;
 	int isz, icells;
 
 	node = ofw_bus_get_node(child);
 
 	if (node == -1) {
 		/* Non-firmware enumerated child, use standard routing */
 	
 		intr[0] = pci_get_intpin(child);
 		return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child, 
 		    intr[0]));
 	}
 	
 	/*
 	 * Try to determine the node's interrupt parent so we know which
 	 * PIC to use.
 	 */
 
 	iparent = -1;
-	if (OF_getprop(node, "interrupt-parent", &iparent, sizeof(iparent)) < 0)
+	if (OF_getencprop(node, "interrupt-parent", &iparent,
+	    sizeof(iparent)) < 0)
 		iparent = -1;
 	icells = 1;
 	if (iparent != -1)
-		OF_getprop(OF_node_from_xref(iparent), "#interrupt-cells",
+		OF_getencprop(OF_node_from_xref(iparent), "#interrupt-cells",
 		    &icells, sizeof(icells));
 	
 	/*
 	 * Any AAPL,interrupts property gets priority and is
 	 * fully specified (i.e. does not need routing)
 	 */
 
-	isz = OF_getprop(node, "AAPL,interrupts", intr, sizeof(intr));
+	isz = OF_getencprop(node, "AAPL,interrupts", intr, sizeof(intr));
 	if (isz == sizeof(intr[0])*icells)
 		return ((iparent == -1) ? intr[0] : ofw_bus_map_intr(dev,
 		    iparent, icells, intr));
 
-	isz = OF_getprop(node, "interrupts", intr, sizeof(intr));
+	isz = OF_getencprop(node, "interrupts", intr, sizeof(intr));
 	if (isz == sizeof(intr[0])*icells) {
 		if (iparent != -1)
 			intr[0] = ofw_bus_map_intr(dev, iparent, icells, intr);
 	} else {
 		/* No property: our best guess is the intpin. */
 		intr[0] = pci_get_intpin(child);
 	}
 	
 	/*
 	 * If we got intr from a property, it may or may not be an intpin.
 	 * For on-board devices, it frequently is not, and is completely out
 	 * of the valid intpin range.  For PCI slots, it hopefully is,
 	 * otherwise we will have trouble interfacing with non-OFW buses
 	 * such as cardbus.
 	 * Since we cannot tell which it is without violating layering, we
 	 * will always use the route_interrupt method, and treat exceptions
 	 * on the level they become apparent.
 	 */
 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child, intr[0]));
 }
 
 static const struct ofw_bus_devinfo *
 ofw_pcibus_get_devinfo(device_t bus, device_t dev)
 {
 	struct ofw_pcibus_devinfo *dinfo;
 
 	dinfo = device_get_ivars(dev);
 	return (&dinfo->opd_obdinfo);
 }
 
Index: projects/powernv/powerpc/ofw/openpic_ofw.c
===================================================================
--- projects/powernv/powerpc/ofw/openpic_ofw.c	(revision 290990)
+++ projects/powernv/powerpc/ofw/openpic_ofw.c	(revision 290991)
@@ -1,168 +1,168 @@
 /*-
  * Copyright 2003 by Peter Grehan. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/pio.h>
 #include <machine/resource.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <sys/rman.h>
 
 #include <machine/openpicreg.h>
 #include <machine/openpicvar.h>
 
 #include "pic_if.h"
 
 /*
  * OFW interface
  */
 static int	openpic_ofw_probe(device_t);
 static int	openpic_ofw_attach(device_t);
 
 static void	openpic_ofw_translate_code(device_t, u_int irq, int code,
 		    enum intr_trigger *trig, enum intr_polarity *pol);
 
 static device_method_t  openpic_ofw_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		openpic_ofw_probe),
 	DEVMETHOD(device_attach,	openpic_ofw_attach),
 	DEVMETHOD(device_suspend,	openpic_suspend),
 	DEVMETHOD(device_resume,	openpic_resume),
 
 	/* PIC interface */
 	DEVMETHOD(pic_bind,		openpic_bind),
 	DEVMETHOD(pic_config,		openpic_config),
 	DEVMETHOD(pic_dispatch,		openpic_dispatch),
 	DEVMETHOD(pic_enable,		openpic_enable),
 	DEVMETHOD(pic_eoi,		openpic_eoi),
 	DEVMETHOD(pic_ipi,		openpic_ipi),
 	DEVMETHOD(pic_mask,		openpic_mask),
 	DEVMETHOD(pic_unmask,		openpic_unmask),
 
 	DEVMETHOD(pic_translate_code,	openpic_ofw_translate_code),
 
 	DEVMETHOD_END
 };
 
 static driver_t openpic_ofw_driver = {
 	"openpic",
 	openpic_ofw_methods,
 	sizeof(struct openpic_softc),
 };
 
 DRIVER_MODULE(openpic, ofwbus, openpic_ofw_driver, openpic_devclass, 0, 0);
 DRIVER_MODULE(openpic, simplebus, openpic_ofw_driver, openpic_devclass, 0, 0);
 DRIVER_MODULE(openpic, macio, openpic_ofw_driver, openpic_devclass, 0, 0);
 
 static int
 openpic_ofw_probe(device_t dev)
 {
 	const char *type = ofw_bus_get_type(dev);
 
 	if (type == NULL)
                 return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "chrp,open-pic") &&
 	    strcmp(type, "open-pic") != 0)
                 return (ENXIO);
 
 	/*
 	 * On some U4 systems, there is a phantom MPIC in the mac-io cell.
 	 * The uninorth driver will pick up the real PIC, so ignore it here.
 	 */
 	if (OF_finddevice("/u4") != (phandle_t)-1)
 		return (ENXIO);
 
 	device_set_desc(dev, OPENPIC_DEVSTR);
 	return (0);
 }
 
 static int
 openpic_ofw_attach(device_t dev)
 {
 	phandle_t xref, node;
 
 	node = ofw_bus_get_node(dev);
 
-	if (OF_getprop(node, "phandle", &xref, sizeof(xref)) == -1 &&
-	    OF_getprop(node, "ibm,phandle", &xref, sizeof(xref)) == -1 &&
-	    OF_getprop(node, "linux,phandle", &xref, sizeof(xref)) == -1)
+	if (OF_getencprop(node, "phandle", &xref, sizeof(xref)) == -1 &&
+	    OF_getencprop(node, "ibm,phandle", &xref, sizeof(xref)) == -1 &&
+	    OF_getencprop(node, "linux,phandle", &xref, sizeof(xref)) == -1)
 		xref = node;
 
 	return (openpic_common_attach(dev, xref));
 }
 
 static void
 openpic_ofw_translate_code(device_t dev, u_int irq, int code,
     enum intr_trigger *trig, enum intr_polarity *pol)
 {
 	switch (code) {
 	case 0:
 		/* L to H edge */
 		*trig = INTR_TRIGGER_EDGE;
 		*pol = INTR_POLARITY_HIGH;
 		break;
 	case 1:
 		/* Active L level */
 		*trig = INTR_TRIGGER_LEVEL;
 		*pol = INTR_POLARITY_LOW;
 		break;
 	case 2:
 		/* Active H level */
 		*trig = INTR_TRIGGER_LEVEL;
 		*pol = INTR_POLARITY_HIGH;
 		break;
 	case 3:
 		/* H to L edge */
 		*trig = INTR_TRIGGER_EDGE;
 		*pol = INTR_POLARITY_LOW;
 		break;
 	default:
 		*trig = INTR_TRIGGER_CONFORM;
 		*pol = INTR_POLARITY_CONFORM;
 	}
 }
 
Index: projects/powernv/powerpc/powermac/cpcht.c
===================================================================
--- projects/powernv/powerpc/powermac/cpcht.c	(revision 290990)
+++ projects/powernv/powerpc/powermac/cpcht.c	(revision 290991)
@@ -1,735 +1,735 @@
 /*-
  * Copyright (C) 2008-2010 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/pciio.h>
 #include <sys/rman.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_pci.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/openpicreg.h>
 #include <machine/openpicvar.h>
 #include <machine/pio.h>
 #include <machine/resource.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <powerpc/ofw/ofw_pci.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include "pcib_if.h"
 #include "pic_if.h"
 
 /*
  * IBM CPC9X5 Hypertransport Device interface.
  */
 static int		cpcht_probe(device_t);
 static int		cpcht_attach(device_t);
 
 static void		cpcht_configure_htbridge(device_t, phandle_t);
 
 /*
  * pcib interface.
  */
 static u_int32_t	cpcht_read_config(device_t, u_int, u_int, u_int,
 			    u_int, int);
 static void		cpcht_write_config(device_t, u_int, u_int, u_int,
 			    u_int, u_int32_t, int);
 static int		cpcht_route_interrupt(device_t, device_t, int);
 static int		cpcht_alloc_msi(device_t dev, device_t child,
 			    int count, int maxcount, int *irqs);
 static int		cpcht_release_msi(device_t dev, device_t child,
 			    int count, int *irqs);
 static int		cpcht_alloc_msix(device_t dev, device_t child,
 			    int *irq);
 static int		cpcht_release_msix(device_t dev, device_t child,
 			    int irq);
 static int		cpcht_map_msi(device_t dev, device_t child,
 			    int irq, uint64_t *addr, uint32_t *data);
 
 /*
  * Driver methods.
  */
 static device_method_t	cpcht_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		cpcht_probe),
 	DEVMETHOD(device_attach,	cpcht_attach),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_read_config,	cpcht_read_config),
 	DEVMETHOD(pcib_write_config,	cpcht_write_config),
 	DEVMETHOD(pcib_route_interrupt,	cpcht_route_interrupt),
 	DEVMETHOD(pcib_alloc_msi,	cpcht_alloc_msi),
 	DEVMETHOD(pcib_release_msi,	cpcht_release_msi),
 	DEVMETHOD(pcib_alloc_msix,	cpcht_alloc_msix),
 	DEVMETHOD(pcib_release_msix,	cpcht_release_msix),
 	DEVMETHOD(pcib_map_msi,		cpcht_map_msi),
 
 	DEVMETHOD_END
 };
 
 struct cpcht_irq {
 	enum {
 	    IRQ_NONE, IRQ_HT, IRQ_MSI, IRQ_INTERNAL
 	}		irq_type; 
 
 	int		ht_source;
 
 	vm_offset_t	ht_base;
 	vm_offset_t	apple_eoi;
 	uint32_t	eoi_data;
 	int		edge;
 };
 
 static struct cpcht_irq *cpcht_irqmap = NULL;
 uint32_t cpcht_msipic = 0;
 
 struct cpcht_softc {
 	struct ofw_pci_softc	pci_sc;
 	vm_offset_t		sc_data;
 	uint64_t		sc_populated_slots;
 
 	struct cpcht_irq	htirq_map[128];
 	struct mtx		htirq_mtx;
 };
 
 static devclass_t	cpcht_devclass;
 DEFINE_CLASS_1(pcib, cpcht_driver, cpcht_methods, sizeof(struct cpcht_softc),
     ofw_pci_driver);
 DRIVER_MODULE(cpcht, ofwbus, cpcht_driver, cpcht_devclass, 0, 0);
 
 #define CPCHT_IOPORT_BASE	0xf4000000UL /* Hardwired */
 #define CPCHT_IOPORT_SIZE	0x00400000UL
 
 #define HTAPIC_REQUEST_EOI	0x20
 #define HTAPIC_TRIGGER_LEVEL	0x02
 #define HTAPIC_MASK		0x01
 
 static int
 cpcht_probe(device_t dev)
 {
 	const char	*type, *compatible;
 
 	type = ofw_bus_get_type(dev);
 	compatible = ofw_bus_get_compat(dev);
 
 	if (type == NULL || compatible == NULL)
 		return (ENXIO);
 
 	if (strcmp(type, "ht") != 0)
 		return (ENXIO);
 
 	if (strcmp(compatible, "u3-ht") != 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "IBM CPC9X5 HyperTransport Tunnel");
 	return (0);
 }
 
 static int
 cpcht_attach(device_t dev)
 {
 	struct		cpcht_softc *sc;
 	phandle_t	node, child;
 	u_int32_t	reg[3];
 	int		i;
 
 	node = ofw_bus_get_node(dev);
 	sc = device_get_softc(dev);
 
-	if (OF_getprop(node, "reg", reg, sizeof(reg)) < 12)
+	if (OF_getencprop(node, "reg", reg, sizeof(reg)) < 12)
 		return (ENXIO);
 
 	if (OF_getproplen(node, "ranges") <= 0)
 		sc->pci_sc.sc_quirks = OFW_PCI_QUIRK_RANGES_ON_CHILDREN;
 	sc->sc_populated_slots = 0;
 	sc->sc_data = (vm_offset_t)pmap_mapdev(reg[1], reg[2]);
 
 	/*
 	 * Set up the resource manager and the HT->MPIC mapping. For cpcht,
 	 * the ranges are properties of the child bridges, and this is also
 	 * where we get the HT interrupts properties.
 	 */
 
 #if 0
 	/* I/O port mappings are usually not in the device tree */
 	rman_manage_region(&sc->pci_sc.sc_io_rman, 0, CPCHT_IOPORT_SIZE - 1);
 #endif
 
 	bzero(sc->htirq_map, sizeof(sc->htirq_map));
 	mtx_init(&sc->htirq_mtx, "cpcht irq", NULL, MTX_DEF);
 	for (i = 0; i < 8; i++)
 		sc->htirq_map[i].irq_type = IRQ_INTERNAL;
 	for (child = OF_child(node); child != 0; child = OF_peer(child))
 		cpcht_configure_htbridge(dev, child);
 
 	/* Now make the mapping table available to the MPIC */
 	cpcht_irqmap = sc->htirq_map;
 
 	return (ofw_pci_attach(dev));
 }
 
 static void
 cpcht_configure_htbridge(device_t dev, phandle_t child)
 {
 	struct cpcht_softc *sc;
 	struct ofw_pci_register pcir;
 	int ptr, nextptr;
 	uint32_t vend, val;
 	int i, nirq, irq;
 	u_int b, f, s;
 
 	sc = device_get_softc(dev);
-	if (OF_getprop(child, "reg", &pcir, sizeof(pcir)) == -1)
+	if (OF_getencprop(child, "reg", (pcell_t *)&pcir, sizeof(pcir)) == -1)
 		return;
 
 	b = OFW_PCI_PHYS_HI_BUS(pcir.phys_hi);
 	s = OFW_PCI_PHYS_HI_DEVICE(pcir.phys_hi);
 	f = OFW_PCI_PHYS_HI_FUNCTION(pcir.phys_hi);
 
 	/*
 	 * Mark this slot is populated. The remote south bridge does
 	 * not like us talking to unpopulated slots on the root bus.
 	 */
 	sc->sc_populated_slots |= (1 << s);
 
 	/*
 	 * Next build up any HT->MPIC mappings for this sub-bus. One would
 	 * naively hope that enabling, disabling, and EOIing interrupts would
 	 * cause the appropriate HT bus transactions to that effect. This is
 	 * not the case.
 	 *
 	 * Instead, we have to muck about on the HT peer's root PCI bridges,
 	 * figure out what interrupts they send, enable them, and cache
 	 * the location of their WaitForEOI registers so that we can
 	 * send EOIs later.
 	 */
 
 	/* All the devices we are interested in have caps */
 	if (!(PCIB_READ_CONFIG(dev, b, s, f, PCIR_STATUS, 2)
 	    & PCIM_STATUS_CAPPRESENT))
 		return;
 
 	nextptr = PCIB_READ_CONFIG(dev, b, s, f, PCIR_CAP_PTR, 1);
 	while (nextptr != 0) {
 		ptr = nextptr;
 		nextptr = PCIB_READ_CONFIG(dev, b, s, f,
 		    ptr + PCICAP_NEXTPTR, 1);
 
 		/* Find the HT IRQ capabilities */
 		if (PCIB_READ_CONFIG(dev, b, s, f,
 		    ptr + PCICAP_ID, 1) != PCIY_HT)
 			continue;
 
 		val = PCIB_READ_CONFIG(dev, b, s, f, ptr + PCIR_HT_COMMAND, 2);
 		if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_INTERRUPT)
 			continue;
 
 		/* Ask for the IRQ count */
 		PCIB_WRITE_CONFIG(dev, b, s, f, ptr + PCIR_HT_COMMAND, 0x1, 1);
 		nirq = PCIB_READ_CONFIG(dev, b, s, f, ptr + 4, 4);
 		nirq = ((nirq >> 16) & 0xff) + 1;
 
 		device_printf(dev, "%d HT IRQs on device %d.%d\n", nirq, s, f);
 
 		for (i = 0; i < nirq; i++) {
 			PCIB_WRITE_CONFIG(dev, b, s, f,
 			     ptr + PCIR_HT_COMMAND, 0x10 + (i << 1), 1);
 			irq = PCIB_READ_CONFIG(dev, b, s, f, ptr + 4, 4);
 
 			/*
 			 * Mask this interrupt for now.
 			 */
 			PCIB_WRITE_CONFIG(dev, b, s, f, ptr + 4,
 			    irq | HTAPIC_MASK, 4);
 			irq = (irq >> 16) & 0xff;
 
 			sc->htirq_map[irq].irq_type = IRQ_HT;
 			sc->htirq_map[irq].ht_source = i;
 			sc->htirq_map[irq].ht_base = sc->sc_data + 
 			    (((((s & 0x1f) << 3) | (f & 0x07)) << 8) | (ptr));
 
 			PCIB_WRITE_CONFIG(dev, b, s, f,
 			     ptr + PCIR_HT_COMMAND, 0x11 + (i << 1), 1);
 			sc->htirq_map[irq].eoi_data =
 			    PCIB_READ_CONFIG(dev, b, s, f, ptr + 4, 4) |
 			    0x80000000;
 
 			/*
 			 * Apple uses a non-compliant IO/APIC that differs
 			 * in how we signal EOIs. Check if this device was 
 			 * made by Apple, and act accordingly.
 			 */
 			vend = PCIB_READ_CONFIG(dev, b, s, f,
 			    PCIR_DEVVENDOR, 4);
 			if ((vend & 0xffff) == 0x106b)
 				sc->htirq_map[irq].apple_eoi = 
 				 (sc->htirq_map[irq].ht_base - ptr) + 0x60;
 		}
 	}
 }
 
 static u_int32_t
 cpcht_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg,
     int width)
 {
 	struct		cpcht_softc *sc;
 	vm_offset_t	caoff;
 
 	sc = device_get_softc(dev);
 	caoff = sc->sc_data + 
 		(((((slot & 0x1f) << 3) | (func & 0x07)) << 8) | reg);
 
 	if (bus == 0 && (!(sc->sc_populated_slots & (1 << slot)) || func > 0))
 		return (0xffffffff);
 
 	if (bus > 0)
 		caoff += 0x01000000UL + (bus << 16);
 
 	switch (width) {
 	case 1:
 		return (in8rb(caoff));
 		break;
 	case 2:
 		return (in16rb(caoff));
 		break;
 	case 4:
 		return (in32rb(caoff));
 		break;
 	}
 
 	return (0xffffffff);
 }
 
 static void
 cpcht_write_config(device_t dev, u_int bus, u_int slot, u_int func,
     u_int reg, u_int32_t val, int width)
 {
 	struct		cpcht_softc *sc;
 	vm_offset_t	caoff;
 
 	sc = device_get_softc(dev);
 	caoff = sc->sc_data + 
 		(((((slot & 0x1f) << 3) | (func & 0x07)) << 8) | reg);
 
 	if (bus == 0 && (!(sc->sc_populated_slots & (1 << slot)) || func > 0))
 		return;
 
 	if (bus > 0)
 		caoff += 0x01000000UL + (bus << 16);
 
 	switch (width) {
 	case 1:
 		out8rb(caoff, val);
 		break;
 	case 2:
 		out16rb(caoff, val);
 		break;
 	case 4:
 		out32rb(caoff, val);
 		break;
 	}
 }
 
 static int
 cpcht_route_interrupt(device_t bus, device_t dev, int pin)
 {
 	return (pin);
 }
 
 static int
 cpcht_alloc_msi(device_t dev, device_t child, int count, int maxcount,
     int *irqs)
 {
 	struct cpcht_softc *sc;
 	int i, j;
 
 	sc = device_get_softc(dev);
 	j = 0;
 
 	/* Bail if no MSI PIC yet */
 	if (cpcht_msipic == 0)
 		return (ENXIO);
 
 	mtx_lock(&sc->htirq_mtx);
 	for (i = 8; i < 124 - count; i++) {
 		for (j = 0; j < count; j++) {
 			if (sc->htirq_map[i+j].irq_type != IRQ_NONE)
 				break;
 		}
 		if (j == count)
 			break;
 
 		i += j; /* We know there isn't a large enough run */
 	}
 
 	if (j != count) {
 		mtx_unlock(&sc->htirq_mtx);
 		return (ENXIO);
 	}
 
 	for (j = 0; j < count; j++) {
 		irqs[j] = MAP_IRQ(cpcht_msipic, i+j);
 		sc->htirq_map[i+j].irq_type = IRQ_MSI;
 	}
 	mtx_unlock(&sc->htirq_mtx);
 
 	return (0);
 }
 
 static int
 cpcht_release_msi(device_t dev, device_t child, int count, int *irqs)
 {
 	struct cpcht_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	mtx_lock(&sc->htirq_mtx);
 	for (i = 0; i < count; i++)
 		sc->htirq_map[irqs[i] & 0xff].irq_type = IRQ_NONE;
 	mtx_unlock(&sc->htirq_mtx);
 
 	return (0);
 }
 
 static int
 cpcht_alloc_msix(device_t dev, device_t child, int *irq)
 {
 	struct cpcht_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	/* Bail if no MSI PIC yet */
 	if (cpcht_msipic == 0)
 		return (ENXIO);
 
 	mtx_lock(&sc->htirq_mtx);
 	for (i = 8; i < 124; i++) {
 		if (sc->htirq_map[i].irq_type == IRQ_NONE) {
 			sc->htirq_map[i].irq_type = IRQ_MSI;
 			*irq = MAP_IRQ(cpcht_msipic, i);
 
 			mtx_unlock(&sc->htirq_mtx);
 			return (0);
 		}
 	}
 	mtx_unlock(&sc->htirq_mtx);
 
 	return (ENXIO);
 }
 	
 static int
 cpcht_release_msix(device_t dev, device_t child, int irq)
 {
 	struct cpcht_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	mtx_lock(&sc->htirq_mtx);
 	sc->htirq_map[irq & 0xff].irq_type = IRQ_NONE;
 	mtx_unlock(&sc->htirq_mtx);
 
 	return (0);
 }
 
 static int
 cpcht_map_msi(device_t dev, device_t child, int irq, uint64_t *addr,
     uint32_t *data)
 {
 	device_t pcib;
 	struct pci_devinfo *dinfo;
 	struct pcicfg_ht *ht = NULL;
 
 	for (pcib = child; pcib != dev; pcib =
 	    device_get_parent(device_get_parent(pcib))) {
 		dinfo = device_get_ivars(pcib);
 		ht = &dinfo->cfg.ht;
 
 		if (ht == NULL)
 			continue;
 	}
 
 	if (ht == NULL)
 		return (ENXIO);
 
 	*addr = ht->ht_msiaddr;
 	*data = irq & 0xff;
 
 	return (0);
 }
 
 /*
  * Driver for the integrated MPIC on U3/U4 (CPC925/CPC945)
  */
 
 static int	openpic_cpcht_probe(device_t);
 static int	openpic_cpcht_attach(device_t);
 static void	openpic_cpcht_config(device_t, u_int irq,
 		    enum intr_trigger trig, enum intr_polarity pol);
 static void	openpic_cpcht_enable(device_t, u_int irq, u_int vector);
 static void	openpic_cpcht_unmask(device_t, u_int irq);
 static void	openpic_cpcht_eoi(device_t, u_int irq);
 
 static device_method_t  openpic_cpcht_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		openpic_cpcht_probe),
 	DEVMETHOD(device_attach,	openpic_cpcht_attach),
 
 	/* PIC interface */
 	DEVMETHOD(pic_bind,		openpic_bind),
 	DEVMETHOD(pic_config,		openpic_cpcht_config),
 	DEVMETHOD(pic_dispatch,		openpic_dispatch),
 	DEVMETHOD(pic_enable,		openpic_cpcht_enable),
 	DEVMETHOD(pic_eoi,		openpic_cpcht_eoi),
 	DEVMETHOD(pic_ipi,		openpic_ipi),
 	DEVMETHOD(pic_mask,		openpic_mask),
 	DEVMETHOD(pic_unmask,		openpic_cpcht_unmask),
 
 	{ 0, 0 },
 };
 
 struct openpic_cpcht_softc {
 	struct openpic_softc sc_openpic;
 
 	struct mtx sc_ht_mtx;
 };
 
 static driver_t openpic_cpcht_driver = {
 	"htpic",
 	openpic_cpcht_methods,
 	sizeof(struct openpic_cpcht_softc),
 };
 
 DRIVER_MODULE(openpic, unin, openpic_cpcht_driver, openpic_devclass, 0, 0);
 
 static int
 openpic_cpcht_probe(device_t dev)
 {
 	const char *type = ofw_bus_get_type(dev);
 
 	if (strcmp(type, "open-pic") != 0)
                 return (ENXIO);
 
 	device_set_desc(dev, OPENPIC_DEVSTR);
 	return (0);
 }
 
 static int
 openpic_cpcht_attach(device_t dev)
 {
 	struct openpic_cpcht_softc *sc;
 	phandle_t node;
 	int err, irq;
 
 	node = ofw_bus_get_node(dev);
 	err = openpic_common_attach(dev, node);
 	if (err != 0)
 		return (err);
 
 	/*
 	 * The HT APIC stuff is not thread-safe, so we need a mutex to
 	 * protect it.
 	 */
 	sc = device_get_softc(dev);
 	mtx_init(&sc->sc_ht_mtx, "htpic", NULL, MTX_SPIN);
 
 	/*
 	 * Interrupts 0-3 are internally sourced and are level triggered
 	 * active low. Interrupts 4-123 are connected to a pulse generator
 	 * and should be programmed as edge triggered low-to-high.
 	 * 
 	 * IBM CPC945 Manual, Section 9.3.
 	 */
 
 	for (irq = 0; irq < 4; irq++)
 		openpic_config(dev, irq, INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
 	for (irq = 4; irq < 124; irq++)
 		openpic_config(dev, irq, INTR_TRIGGER_EDGE, INTR_POLARITY_LOW);
 
 	/*
 	 * Use this PIC for MSI only if it is the root PIC. This may not
 	 * be necessary, but Linux does it, and I cannot find any U3 machines
 	 * with MSI devices to test.
 	 */
 	if (dev == root_pic)
 		cpcht_msipic = node;
 
 	return (0);
 }
 
 static void
 openpic_cpcht_config(device_t dev, u_int irq, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	struct openpic_cpcht_softc *sc;
 	uint32_t ht_irq;
 
 	/*
 	 * The interrupt settings for the MPIC are completely determined
 	 * by the internal wiring in the northbridge. Real changes to these
 	 * settings need to be negotiated with the remote IO-APIC on the HT
 	 * link.
 	 */
 
 	sc = device_get_softc(dev);
 
 	if (cpcht_irqmap != NULL && irq < 128 &&
 	    cpcht_irqmap[irq].ht_base > 0 && !cpcht_irqmap[irq].edge) {
 		mtx_lock_spin(&sc->sc_ht_mtx);
 
 		/* Program the data port */
 		out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND,
 		    0x10 + (cpcht_irqmap[irq].ht_source << 1));
 
 		/* Grab the IRQ config register */
 		ht_irq = in32rb(cpcht_irqmap[irq].ht_base + 4);
 
 		/* Mask the IRQ while we fiddle settings */
 		out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq | HTAPIC_MASK);
 		
 		/* Program the interrupt sense */
 		ht_irq &= ~(HTAPIC_TRIGGER_LEVEL | HTAPIC_REQUEST_EOI);
 		if (trig == INTR_TRIGGER_EDGE) {
 			cpcht_irqmap[irq].edge = 1;
 		} else {
 			cpcht_irqmap[irq].edge = 0;
 			ht_irq |= HTAPIC_TRIGGER_LEVEL | HTAPIC_REQUEST_EOI;
 		}
 		out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq);
 
 		mtx_unlock_spin(&sc->sc_ht_mtx);
 	}
 }
 
 static void
 openpic_cpcht_enable(device_t dev, u_int irq, u_int vec)
 {
 	struct openpic_cpcht_softc *sc;
 	uint32_t ht_irq;
 
 	openpic_enable(dev, irq, vec);
 
 	sc = device_get_softc(dev);
 
 	if (cpcht_irqmap != NULL && irq < 128 &&
 	    cpcht_irqmap[irq].ht_base > 0) {
 		mtx_lock_spin(&sc->sc_ht_mtx);
 
 		/* Program the data port */
 		out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND,
 		    0x10 + (cpcht_irqmap[irq].ht_source << 1));
 
 		/* Unmask the interrupt */
 		ht_irq = in32rb(cpcht_irqmap[irq].ht_base + 4);
 		ht_irq &= ~HTAPIC_MASK;
 		out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq);
 
 		mtx_unlock_spin(&sc->sc_ht_mtx);
 	}
 		
 	openpic_cpcht_eoi(dev, irq);
 }
 
 static void
 openpic_cpcht_unmask(device_t dev, u_int irq)
 {
 	struct openpic_cpcht_softc *sc;
 	uint32_t ht_irq;
 
 	openpic_unmask(dev, irq);
 
 	sc = device_get_softc(dev);
 
 	if (cpcht_irqmap != NULL && irq < 128 &&
 	    cpcht_irqmap[irq].ht_base > 0) {
 		mtx_lock_spin(&sc->sc_ht_mtx);
 
 		/* Program the data port */
 		out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND,
 		    0x10 + (cpcht_irqmap[irq].ht_source << 1));
 
 		/* Unmask the interrupt */
 		ht_irq = in32rb(cpcht_irqmap[irq].ht_base + 4);
 		ht_irq &= ~HTAPIC_MASK;
 		out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq);
 
 		mtx_unlock_spin(&sc->sc_ht_mtx);
 	}
 
 	openpic_cpcht_eoi(dev, irq);
 }
 
 static void
 openpic_cpcht_eoi(device_t dev, u_int irq)
 {
 	struct openpic_cpcht_softc *sc;
 	uint32_t off, mask;
 
 	if (irq == 255)
 		return;
 
 	sc = device_get_softc(dev);
 
 	if (cpcht_irqmap != NULL && irq < 128 &&
 	    cpcht_irqmap[irq].ht_base > 0 && !cpcht_irqmap[irq].edge) {
 		/* If this is an HT IRQ, acknowledge it at the remote APIC */
 
 		if (cpcht_irqmap[irq].apple_eoi) {
 			off = (cpcht_irqmap[irq].ht_source >> 3) & ~3;
 			mask = 1 << (cpcht_irqmap[irq].ht_source & 0x1f);
 			out32rb(cpcht_irqmap[irq].apple_eoi + off, mask);
 		} else {
 			mtx_lock_spin(&sc->sc_ht_mtx);
 
 			out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND,
 			    0x11 + (cpcht_irqmap[irq].ht_source << 1));
 			out32rb(cpcht_irqmap[irq].ht_base + 4,
 			    cpcht_irqmap[irq].eoi_data);
 
 			mtx_unlock_spin(&sc->sc_ht_mtx);
 		}
 	}
 
 	openpic_eoi(dev, irq);
 }
Index: projects/powernv/powerpc/powermac/kiic.c
===================================================================
--- projects/powernv/powerpc/powermac/kiic.c	(revision 290990)
+++ projects/powernv/powerpc/powermac/kiic.c	(revision 290991)
@@ -1,444 +1,444 @@
 /*-
  * Copyright (c) 2001 Tsubai Masanari.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  *	NetBSD: ki2c.c,v 1.11 2007/12/06 17:00:33 ad Exp
  *	Id: ki2c.c,v 1.7 2002/10/05 09:56:05 tsubai Exp
  */
 
 /*
  * 	Support routines for the Keywest I2C controller.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 
 #include <dev/iicbus/iicbus.h>
 #include <dev/iicbus/iiconf.h>
 #include <dev/ofw/ofw_bus.h>
 #include "iicbus_if.h"
 
 /* Keywest I2C Register offsets */
 #define MODE	0
 #define CONTROL	1
 #define STATUS	2
 #define ISR	3
 #define IER	4
 #define ADDR	5
 #define SUBADDR	6
 #define DATA	7
 #define REV	8
 
 /* MODE */
 #define I2C_SPEED	0x03	/* Speed mask */
 #define  I2C_100kHz	0x00
 #define  I2C_50kHz	0x01
 #define  I2C_25kHz	0x02
 #define I2C_MODE	0x0c	/* Mode mask */
 #define  I2C_DUMBMODE	0x00	/*  Dumb mode */
 #define  I2C_STDMODE	0x04	/*  Standard mode */
 #define  I2C_STDSUBMODE	0x08	/*  Standard mode + sub address */
 #define  I2C_COMBMODE	0x0c	/*  Combined mode */
 #define I2C_PORT	0xf0	/* Port mask */
 
 /* CONTROL */
 #define I2C_CT_AAK	0x01	/* Send AAK */
 #define I2C_CT_ADDR	0x02	/* Send address(es) */
 #define I2C_CT_STOP	0x04	/* Send STOP */
 #define I2C_CT_START	0x08	/* Send START */
 
 /* STATUS */
 #define I2C_ST_BUSY	0x01	/* Busy */
 #define I2C_ST_LASTAAK	0x02	/* Last AAK */
 #define I2C_ST_LASTRW	0x04	/* Last R/W */
 #define I2C_ST_SDA	0x08	/* SDA */
 #define I2C_ST_SCL	0x10	/* SCL */
 
 /* ISR/IER */
 #define I2C_INT_DATA	0x01	/* Data byte sent/received */
 #define I2C_INT_ADDR	0x02	/* Address sent */
 #define I2C_INT_STOP	0x04	/* STOP condition sent */
 #define I2C_INT_START	0x08	/* START condition sent */
 
 /* I2C flags */
 #define I2C_BUSY	0x01
 #define I2C_READING	0x02
 #define I2C_ERROR	0x04
 #define I2C_SELECTED	0x08
 
 struct kiic_softc {
 	device_t 		 sc_dev;
 	phandle_t		 sc_node;
 	struct mtx 		 sc_mutex;
 	struct resource		*sc_reg;
 	int			 sc_irqrid;
 	struct resource		*sc_irq;
 	void			*sc_ih;
 	u_int 			 sc_regstep;
 	u_int 			 sc_flags;
 	u_char			*sc_data;
 	int 			 sc_resid;
 	uint16_t		 sc_i2c_base;
 	device_t 		 sc_iicbus;
 };
 
 static int 	kiic_probe(device_t dev);
 static int 	kiic_attach(device_t dev);
 static void 	kiic_writereg(struct kiic_softc *sc, u_int, u_int);
 static u_int 	kiic_readreg(struct kiic_softc *, u_int);
 static void 	kiic_setport(struct kiic_softc *, u_int);
 static void 	kiic_setmode(struct kiic_softc *, u_int);
 static void 	kiic_setspeed(struct kiic_softc *, u_int);
 static void 	kiic_intr(void *xsc);
 static int	kiic_transfer(device_t dev, struct iic_msg *msgs,
 		    uint32_t nmsgs);
 static phandle_t kiic_get_node(device_t bus, device_t dev);
 
 static device_method_t kiic_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe, 	kiic_probe),
 	DEVMETHOD(device_attach, 	kiic_attach),
 
 	/* iicbus interface */
 	DEVMETHOD(iicbus_callback,	iicbus_null_callback),
 	DEVMETHOD(iicbus_transfer,	kiic_transfer),
 
 	/* ofw_bus interface */
 	DEVMETHOD(ofw_bus_get_node,	kiic_get_node),
 
 	{ 0, 0 }
 };
 
 static driver_t kiic_driver = {
 	"iichb",
 	kiic_methods,
 	sizeof(struct kiic_softc)
 };
 static devclass_t kiic_devclass;
 
 DRIVER_MODULE(kiic, macio, kiic_driver, kiic_devclass, 0, 0);
 DRIVER_MODULE(kiic, unin, kiic_driver, kiic_devclass, 0, 0);
 
 static int
 kiic_probe(device_t self)
 {
 	const char *name;
 
 	name = ofw_bus_get_name(self);
 	if (name && strcmp(name, "i2c") == 0) {
 		device_set_desc(self, "Keywest I2C controller");
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 static int
 kiic_attach(device_t self)
 {
 	struct kiic_softc *sc = device_get_softc(self);
 	int rid, rate;
 	phandle_t node;
 	char name[64];
 
 	bzero(sc, sizeof(*sc));
 	sc->sc_dev = self;
 	
 	node = ofw_bus_get_node(self);
 	if (node == 0 || node == -1) {
 		return (EINVAL);
 	}
 
 	rid = 0;
 	sc->sc_reg = bus_alloc_resource_any(self, SYS_RES_MEMORY,
 			&rid, RF_ACTIVE);
 	if (sc->sc_reg == NULL) {
 		return (ENOMEM);
 	}
 
-	if (OF_getprop(node, "AAPL,i2c-rate", &rate, 4) != 4) {
+	if (OF_getencprop(node, "AAPL,i2c-rate", &rate, 4) != 4) {
 		device_printf(self, "cannot get i2c-rate\n");
 		return (ENXIO);
 	}
-	if (OF_getprop(node, "AAPL,address-step", &sc->sc_regstep, 4) != 4) {
+	if (OF_getencprop(node, "AAPL,address-step", &sc->sc_regstep, 4) != 4) {
 		device_printf(self, "unable to find i2c address step\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Some Keywest I2C devices have their children attached directly
 	 * underneath them.  Some have a single 'iicbus' child with the
 	 * devices underneath that.  Sort this out, and make sure that the
 	 * OFW I2C layer has the correct node.
 	 *
 	 * Note: the I2C children of the Uninorth bridges have two ports.
 	 *  In general, the port is designated in the 9th bit of the I2C
 	 *  address. However, for kiic devices with children attached below
 	 *  an i2c-bus node, the port is indicated in the 'reg' property
 	 *  of the i2c-bus node.
 	 */
 
 	sc->sc_node = node;
 
 	node = OF_child(node);
 	if (OF_getprop(node, "name", name, sizeof(name)) > 0) {
 		if (strcmp(name,"i2c-bus") == 0) {
 			phandle_t reg;
 			if (OF_getprop(node, "reg", &reg, sizeof(reg)) > 0)
 				sc->sc_i2c_base = reg << 8;
 
 			sc->sc_node = node;
 		}
 	}
 
 	mtx_init(&sc->sc_mutex, "kiic", NULL, MTX_DEF);
 
 	sc->sc_irq = bus_alloc_resource_any(self, SYS_RES_IRQ, &sc->sc_irqrid, 
 	    RF_ACTIVE);
 	bus_setup_intr(self, sc->sc_irq, INTR_TYPE_MISC | INTR_MPSAFE, NULL,
 	    kiic_intr, sc, &sc->sc_ih);
 
 	kiic_writereg(sc, ISR, kiic_readreg(sc, ISR));
 	kiic_writereg(sc, STATUS, 0);
 	kiic_writereg(sc, IER, 0);
 
 	kiic_setmode(sc, I2C_STDMODE);
 	kiic_setspeed(sc, I2C_100kHz);		/* XXX rate */
 	
 	kiic_writereg(sc, IER, I2C_INT_DATA | I2C_INT_ADDR | I2C_INT_STOP);
 
 	if (bootverbose)
 		device_printf(self, "Revision: %02X\n", kiic_readreg(sc, REV));
 
 	/* Add the IIC bus layer */
 	sc->sc_iicbus = device_add_child(self, "iicbus", -1);
 
 	return (bus_generic_attach(self));
 }
 
 static void
 kiic_writereg(struct kiic_softc *sc, u_int reg, u_int val)
 {
 	bus_write_4(sc->sc_reg, sc->sc_regstep * reg, val);
 	DELAY(100); /* register access delay */
 }
 
 static u_int
 kiic_readreg(struct kiic_softc *sc, u_int reg)
 {
 	return bus_read_4(sc->sc_reg, sc->sc_regstep * reg) & 0xff;
 }
 
 static void
 kiic_setmode(struct kiic_softc *sc, u_int mode)
 {
 	u_int x;
 
 	KASSERT((mode & ~I2C_MODE) == 0, ("bad mode"));
 	x = kiic_readreg(sc, MODE);
 	x &= ~I2C_MODE;
 	x |= mode;
 	kiic_writereg(sc, MODE, x);
 }
 
 static void
 kiic_setport(struct kiic_softc *sc, u_int port)
 {
 	u_int x;
 
 	KASSERT(port == 1 || port == 0, ("bad port"));
 	x = kiic_readreg(sc, MODE);
 	x &= ~I2C_PORT;
 	x |= (port << 4);
 	kiic_writereg(sc, MODE, x);
 }
 
 static void
 kiic_setspeed(struct kiic_softc *sc, u_int speed)
 {
 	u_int x;
 
 	KASSERT((speed & ~I2C_SPEED) == 0, ("bad speed"));
 	x = kiic_readreg(sc, MODE);
 	x &= ~I2C_SPEED;
 	x |= speed;
 	kiic_writereg(sc, MODE, x);
 }
 
 static void
 kiic_intr(void *xsc)
 {
 	struct kiic_softc *sc = xsc;
 	u_int isr;
 	uint32_t x;
 
 	mtx_lock(&sc->sc_mutex);
 	isr = kiic_readreg(sc, ISR);
 
 	if (isr & I2C_INT_ADDR) {
 		sc->sc_flags |= I2C_SELECTED;
 
 		if (sc->sc_flags & I2C_READING) {
 			if (sc->sc_resid > 1) {
 				x = kiic_readreg(sc, CONTROL);
 				x |= I2C_CT_AAK;
 				kiic_writereg(sc, CONTROL, x);
 			}
 		} else {
 			kiic_writereg(sc, DATA, *sc->sc_data++);
 			sc->sc_resid--;
 		}
 	}
 
 	if (isr & I2C_INT_DATA) {
 		if (sc->sc_flags & I2C_READING) {
 			if (sc->sc_resid > 0) {
 				*sc->sc_data++ = kiic_readreg(sc, DATA);
 				sc->sc_resid--;
 			}
 			if (sc->sc_resid == 0)  /* done */
 				kiic_writereg(sc, CONTROL, 0);
 		} else {
 			if (sc->sc_resid == 0) {
 				x = kiic_readreg(sc, CONTROL);
 				x |= I2C_CT_STOP;
 				kiic_writereg(sc, CONTROL, x);
 			} else {
 				kiic_writereg(sc, DATA, *sc->sc_data++);
 				sc->sc_resid--;
 			}
 		}
 	}
 
 	if (isr & I2C_INT_STOP) {
 		kiic_writereg(sc, CONTROL, 0);
 		sc->sc_flags &= ~I2C_SELECTED;
 		wakeup(sc->sc_dev);
 	}
 
 	kiic_writereg(sc, ISR, isr);
 	mtx_unlock(&sc->sc_mutex);
 }
 
 static int
 kiic_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs)
 {
 	struct kiic_softc *sc;
 	int i, x, timo, err;
 	uint16_t addr;
 	uint8_t subaddr;
 
 	sc = device_get_softc(dev);
 	timo = 100;
 	subaddr = 0;
 
 	mtx_lock(&sc->sc_mutex);
 
 	if (sc->sc_flags & I2C_BUSY)
 		mtx_sleep(dev, &sc->sc_mutex, 0, "kiic", timo);
 
 	if (sc->sc_flags & I2C_BUSY) {
 		mtx_unlock(&sc->sc_mutex);
 		return (ETIMEDOUT);
 	}
 		
 	sc->sc_flags = I2C_BUSY;
 
 	/* Clear pending interrupts, and reset controller */
 	kiic_writereg(sc, ISR, kiic_readreg(sc, ISR));
 	kiic_writereg(sc, STATUS, 0);
 
 	for (i = 0; i < nmsgs; i++) {
 		if (msgs[i].flags & IIC_M_NOSTOP) {
 			if (msgs[i+1].flags & IIC_M_RD)
 				kiic_setmode(sc, I2C_COMBMODE);
 			else
 				kiic_setmode(sc, I2C_STDSUBMODE);
 			KASSERT(msgs[i].len == 1, ("oversize I2C message"));
 			subaddr = msgs[i].buf[0];
 			i++;
 		} else {
 			kiic_setmode(sc, I2C_STDMODE);
 		}
 
 		sc->sc_data = msgs[i].buf;
 		sc->sc_resid = msgs[i].len;
 		sc->sc_flags = I2C_BUSY;
 		addr = msgs[i].slave;
 		timo = 1000 + sc->sc_resid * 200;
 		timo += 100000;
 
 		if (msgs[i].flags & IIC_M_RD) {
 			sc->sc_flags |= I2C_READING;
 			addr |= 1;
 		}
 
 		addr |= sc->sc_i2c_base;
 
 		kiic_setport(sc, (addr & 0x100) >> 8);
 		kiic_writereg(sc, ADDR, addr & 0xff);
 		kiic_writereg(sc, SUBADDR, subaddr);
 
 		x = kiic_readreg(sc, CONTROL) | I2C_CT_ADDR;
 		kiic_writereg(sc, CONTROL, x);
 
 		err = mtx_sleep(dev, &sc->sc_mutex, 0, "kiic", timo);
 		
 		msgs[i].len -= sc->sc_resid;
 
 		if ((sc->sc_flags & I2C_ERROR) || err == EWOULDBLOCK) {
 			device_printf(sc->sc_dev, "I2C error\n");
 			sc->sc_flags = 0;
 			mtx_unlock(&sc->sc_mutex);
 			return (EIO);
 		}
 	}
 
 	sc->sc_flags = 0;
 
 	mtx_unlock(&sc->sc_mutex);
 
 	return (0);
 }
 
 static phandle_t
 kiic_get_node(device_t bus, device_t dev)
 {
 	struct kiic_softc *sc;
 
 	sc = device_get_softc(bus);
 	/* We only have one child, the I2C bus, which needs our own node. */
 		
 	return sc->sc_node;
 }
 
Index: projects/powernv/powerpc/powermac/macgpio.c
===================================================================
--- projects/powernv/powerpc/powermac/macgpio.c	(revision 290990)
+++ projects/powernv/powerpc/powermac/macgpio.c	(revision 290991)
@@ -1,403 +1,403 @@
 /*-
  * Copyright 2008 by Nathan Whitehorn. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Driver for MacIO GPIO controller
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/pmap.h>
 #include <machine/resource.h>
 #include <machine/vmparam.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 
 #include <powerpc/powermac/macgpiovar.h>
 
 /*
  * Macgpio softc
  */
 struct macgpio_softc {
 	phandle_t	sc_node;
 	struct resource	*sc_gpios;
 	int		sc_gpios_rid;
 	uint32_t	sc_saved_gpio_levels[2];
 	uint32_t	sc_saved_gpios[GPIO_COUNT];
 	uint32_t	sc_saved_extint_gpios[GPIO_EXTINT_COUNT];
 };
 
 static MALLOC_DEFINE(M_MACGPIO, "macgpio", "macgpio device information");
 
 static int	macgpio_probe(device_t);
 static int	macgpio_attach(device_t);
 static int	macgpio_print_child(device_t dev, device_t child);
 static void	macgpio_probe_nomatch(device_t, device_t);
 static struct resource *macgpio_alloc_resource(device_t, device_t, int, int *,
 		    u_long, u_long, u_long, u_int);
 static int	macgpio_activate_resource(device_t, device_t, int, int,
 		    struct resource *);
 static int	macgpio_deactivate_resource(device_t, device_t, int, int,
 		    struct resource *);
 static ofw_bus_get_devinfo_t macgpio_get_devinfo;
 static int	macgpio_suspend(device_t dev);
 static int	macgpio_resume(device_t dev);
 
 /*
  * Bus interface definition
  */
 static device_method_t macgpio_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,         macgpio_probe),
 	DEVMETHOD(device_attach,        macgpio_attach),
 	DEVMETHOD(device_detach,        bus_generic_detach),
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 	DEVMETHOD(device_suspend,       macgpio_suspend),
 	DEVMETHOD(device_resume,        macgpio_resume),
 	
 	/* Bus interface */
 	DEVMETHOD(bus_print_child,      macgpio_print_child),
 	DEVMETHOD(bus_probe_nomatch,    macgpio_probe_nomatch),
 	DEVMETHOD(bus_setup_intr,       bus_generic_setup_intr),
 	DEVMETHOD(bus_teardown_intr,    bus_generic_teardown_intr),	
 
         DEVMETHOD(bus_alloc_resource,   macgpio_alloc_resource),
         DEVMETHOD(bus_activate_resource, macgpio_activate_resource),
         DEVMETHOD(bus_deactivate_resource, macgpio_deactivate_resource),
         DEVMETHOD(bus_release_resource, bus_generic_release_resource),
 
 	DEVMETHOD(bus_child_pnpinfo_str, ofw_bus_gen_child_pnpinfo_str),
 
 	/* ofw_bus interface */
 	DEVMETHOD(ofw_bus_get_devinfo,	macgpio_get_devinfo),
 	DEVMETHOD(ofw_bus_get_compat,	ofw_bus_gen_get_compat),
 	DEVMETHOD(ofw_bus_get_model,	ofw_bus_gen_get_model),
 	DEVMETHOD(ofw_bus_get_name,	ofw_bus_gen_get_name),
 	DEVMETHOD(ofw_bus_get_node,	ofw_bus_gen_get_node),
 	DEVMETHOD(ofw_bus_get_type,	ofw_bus_gen_get_type),
 
 	{ 0, 0 }
 };
 
 static driver_t macgpio_pci_driver = {
         "macgpio",
         macgpio_methods,
 	sizeof(struct macgpio_softc)
 };
 
 devclass_t macgpio_devclass;
 
 DRIVER_MODULE(macgpio, macio, macgpio_pci_driver, macgpio_devclass, 0, 0);
 
 struct macgpio_devinfo {
 	struct ofw_bus_devinfo mdi_obdinfo;
 	struct resource_list mdi_resources;
 
 	int gpio_num;
 };
 
 static int
 macgpio_probe(device_t dev)
 {
 	const char *name;
 
 	name = ofw_bus_get_name(dev);
 	if (name && strcmp(name, "gpio") == 0) {
 		device_set_desc(dev, "MacIO GPIO Controller");
 		return (0);
 	}
 	
         return (ENXIO);	
 }
 
 /*
  * Scan Open Firmware child nodes, and attach these as children
  * of the macgpio bus
  */
 static int 
 macgpio_attach(device_t dev)
 {
 	struct macgpio_softc *sc;
         struct macgpio_devinfo *dinfo;
         phandle_t root, child, iparent;
         device_t cdev;
 	uint32_t irq;
 
 	sc = device_get_softc(dev);
 	root = sc->sc_node = ofw_bus_get_node(dev);
 	
 	sc->sc_gpios = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &sc->sc_gpios_rid, RF_ACTIVE);
 
 	/*
 	 * Iterate through the sub-devices
 	 */
 	for (child = OF_child(root); child != 0; child = OF_peer(child)) {
 		dinfo = malloc(sizeof(*dinfo), M_MACGPIO, M_WAITOK | M_ZERO);
 		if (ofw_bus_gen_setup_devinfo(&dinfo->mdi_obdinfo, child) !=
 		    0) {
 			free(dinfo, M_MACGPIO);
 			continue;
 		}
 
-		if (OF_getprop(child,"reg",&dinfo->gpio_num,
+		if (OF_getencprop(child, "reg", &dinfo->gpio_num,
 		    sizeof(dinfo->gpio_num)) != sizeof(dinfo->gpio_num)) {
 			/*
 			 * Some early GPIO controllers don't provide GPIO
 			 * numbers for GPIOs designed only to provide
 			 * interrupt resources.  We should still allow these
 			 * to attach, but with caution.
 			 */
 
 			dinfo->gpio_num = -1;
 		}
 
 		resource_list_init(&dinfo->mdi_resources);
 
-		if (OF_getprop(child, "interrupts", &irq, sizeof(irq)) == 
+		if (OF_getencprop(child, "interrupts", &irq, sizeof(irq)) == 
 		    sizeof(irq)) {
-			OF_searchprop(child, "interrupt-parent", &iparent,
+			OF_searchencprop(child, "interrupt-parent", &iparent,
 			    sizeof(iparent));
 			resource_list_add(&dinfo->mdi_resources, SYS_RES_IRQ,
 			    0, MAP_IRQ(iparent, irq), MAP_IRQ(iparent, irq),
 			    1);
 		}
 
 		/* Fix messed-up offsets */
 		if (dinfo->gpio_num > 0x50)
 			dinfo->gpio_num -= 0x50;
 
 		cdev = device_add_child(dev, NULL, -1);
 		if (cdev == NULL) {
 			device_printf(dev, "<%s>: device_add_child failed\n",
 			    dinfo->mdi_obdinfo.obd_name);
 			ofw_bus_gen_destroy_devinfo(&dinfo->mdi_obdinfo);
 			free(dinfo, M_MACGPIO);
 			continue;
 		}
 		device_set_ivars(cdev, dinfo);
 	}
 
 	return (bus_generic_attach(dev));
 }
 
 
 static int
 macgpio_print_child(device_t dev, device_t child)
 {
         struct macgpio_devinfo *dinfo;
         int retval = 0;
 
         dinfo = device_get_ivars(child);
 
         retval += bus_print_child_header(dev, child);
 	
 	if (dinfo->gpio_num >= GPIO_BASE)
 		printf(" gpio %d", dinfo->gpio_num - GPIO_BASE);
 	else if (dinfo->gpio_num >= GPIO_EXTINT_BASE)
 		printf(" extint-gpio %d", dinfo->gpio_num - GPIO_EXTINT_BASE);
 	else if (dinfo->gpio_num >= 0)
 		printf(" addr 0x%02x", dinfo->gpio_num); /* should not happen */
 
 	resource_list_print_type(&dinfo->mdi_resources, "irq", SYS_RES_IRQ, 
 	    "%ld");
         retval += bus_print_child_footer(dev, child);
 
         return (retval);
 }
 
 
 static void
 macgpio_probe_nomatch(device_t dev, device_t child)
 {
         struct macgpio_devinfo *dinfo;
 	const char *type;
 
 	if (bootverbose) {
 		dinfo = device_get_ivars(child);
 
 		if ((type = ofw_bus_get_type(child)) == NULL)
 			type = "(unknown)";
 		device_printf(dev, "<%s, %s>", type, ofw_bus_get_name(child));
 		if (dinfo->gpio_num >= 0)
 			printf(" gpio %d",dinfo->gpio_num);
 		resource_list_print_type(&dinfo->mdi_resources, "irq", 
 		    SYS_RES_IRQ, "%ld");
 		printf(" (no driver attached)\n");
 	}
 }
 
 
 static struct resource *
 macgpio_alloc_resource(device_t bus, device_t child, int type, int *rid,
 		     u_long start, u_long end, u_long count, u_int flags)
 {
 	struct macgpio_devinfo *dinfo;
 
 	dinfo = device_get_ivars(child);
 
 	if (type != SYS_RES_IRQ)
 		return (NULL);
 
 	return (resource_list_alloc(&dinfo->mdi_resources, bus, child, type, 
 	    rid, start, end, count, flags));
 }
 
 static int
 macgpio_activate_resource(device_t bus, device_t child, int type, int rid,
 			   struct resource *res)
 {
 	struct macgpio_softc *sc;
 	struct macgpio_devinfo *dinfo;
 	u_char val;
 
 	sc = device_get_softc(bus);
 	dinfo = device_get_ivars(child);
 
 	if (type != SYS_RES_IRQ)
 		return ENXIO;
 
 	if (dinfo->gpio_num >= 0) {
 		val = bus_read_1(sc->sc_gpios,dinfo->gpio_num);
 		val |= 0x80;
 		bus_write_1(sc->sc_gpios,dinfo->gpio_num,val);
 	}
 
 	return (bus_activate_resource(bus, type, rid, res));
 }
 
 
 static int
 macgpio_deactivate_resource(device_t bus, device_t child, int type, int rid,
 			  struct resource *res)
 {
 	struct macgpio_softc *sc;
 	struct macgpio_devinfo *dinfo;
 	u_char val;
 
 	sc = device_get_softc(bus);
 	dinfo = device_get_ivars(child);
 
 	if (type != SYS_RES_IRQ)
 		return ENXIO;
 
 	if (dinfo->gpio_num >= 0) {
 		val = bus_read_1(sc->sc_gpios,dinfo->gpio_num);
 		val &= ~0x80;
 		bus_write_1(sc->sc_gpios,dinfo->gpio_num,val);
 	}
 
 	return (bus_deactivate_resource(bus, type, rid, res));
 }
 
 uint8_t
 macgpio_read(device_t dev)
 {
 	struct macgpio_softc *sc;
 	struct macgpio_devinfo *dinfo;
 
 	sc = device_get_softc(device_get_parent(dev));
 	dinfo = device_get_ivars(dev);
 
 	if (dinfo->gpio_num < 0)
 		return (0);
 
 	return (bus_read_1(sc->sc_gpios,dinfo->gpio_num));
 }
 
 void
 macgpio_write(device_t dev, uint8_t val)
 {
 	struct macgpio_softc *sc;
 	struct macgpio_devinfo *dinfo;
 
 	sc = device_get_softc(device_get_parent(dev));
 	dinfo = device_get_ivars(dev);
 
 	if (dinfo->gpio_num < 0)
 		return;
 
 	bus_write_1(sc->sc_gpios,dinfo->gpio_num,val);
 }
 
 static const struct ofw_bus_devinfo *
 macgpio_get_devinfo(device_t dev, device_t child)
 {
 	struct macgpio_devinfo *dinfo;
 
 	dinfo = device_get_ivars(child);
 	return (&dinfo->mdi_obdinfo);
 }
 
 static int
 macgpio_suspend(device_t dev)
 {
 	struct macgpio_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 	sc->sc_saved_gpio_levels[0] = bus_read_4(sc->sc_gpios, GPIO_LEVELS_0);
 	sc->sc_saved_gpio_levels[1] = bus_read_4(sc->sc_gpios, GPIO_LEVELS_1);
 
 	for (i = 0; i < GPIO_COUNT; i++)
 		sc->sc_saved_gpios[i] = bus_read_1(sc->sc_gpios, GPIO_BASE + i);
 	for (i = 0; i < GPIO_EXTINT_COUNT; i++)
 		sc->sc_saved_extint_gpios[i] = bus_read_1(sc->sc_gpios, GPIO_EXTINT_BASE + i);
 
 	return (0);
 }
 
 static int
 macgpio_resume(device_t dev)
 {
 	struct macgpio_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 	bus_write_4(sc->sc_gpios, GPIO_LEVELS_0, sc->sc_saved_gpio_levels[0]);
 	bus_write_4(sc->sc_gpios, GPIO_LEVELS_1, sc->sc_saved_gpio_levels[1]);
 
 	for (i = 0; i < GPIO_COUNT; i++)
 		bus_write_1(sc->sc_gpios, GPIO_BASE + i, sc->sc_saved_gpios[i]);
 	for (i = 0; i < GPIO_EXTINT_COUNT; i++)
 		bus_write_1(sc->sc_gpios, GPIO_EXTINT_BASE + i, sc->sc_saved_extint_gpios[i]);
 
 	return (0);
 }
Index: projects/powernv/powerpc/pseries/mmu_phyp.c
===================================================================
--- projects/powernv/powerpc/pseries/mmu_phyp.c	(revision 290990)
+++ projects/powernv/powerpc/pseries/mmu_phyp.c	(revision 290991)
@@ -1,474 +1,474 @@
 /*
  * Copyright (C) 2010 Andreas Tobler
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 
 #include <dev/ofw/openfirm.h>
 #include <machine/ofw_machdep.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/uma.h>
 
 #include <powerpc/aim/mmu_oea64.h>
 
 #include "mmu_if.h"
 #include "moea64_if.h"
 
 #include "phyp-hvcall.h"
 
 extern int n_slbs;
 
 static struct rmlock mphyp_eviction_lock;
 
 /*
  * Kernel MMU interface
  */
 
 static void	mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart,
 		    vm_offset_t kernelend);
 static void	mphyp_cpu_bootstrap(mmu_t mmup, int ap);
 static int64_t	mphyp_pte_synch(mmu_t, struct pvo_entry *pvo);
 static int64_t	mphyp_pte_clear(mmu_t, struct pvo_entry *pvo, uint64_t ptebit);
 static int64_t	mphyp_pte_unset(mmu_t, struct pvo_entry *pvo);
 static int	mphyp_pte_insert(mmu_t, struct pvo_entry *pvo);
 
 static mmu_method_t mphyp_methods[] = {
         MMUMETHOD(mmu_bootstrap,        mphyp_bootstrap),
         MMUMETHOD(mmu_cpu_bootstrap,    mphyp_cpu_bootstrap),
 
 	MMUMETHOD(moea64_pte_synch,     mphyp_pte_synch),
         MMUMETHOD(moea64_pte_clear,     mphyp_pte_clear),
         MMUMETHOD(moea64_pte_unset,     mphyp_pte_unset),
         MMUMETHOD(moea64_pte_insert,    mphyp_pte_insert),
 
 	/* XXX: pmap_copy_page, pmap_init_page with H_PAGE_INIT */
 
         { 0, 0 }
 };
 
 MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, 0, oea64_mmu);
 
 static int brokenkvm = 0;
 
 static void
 print_kvm_bug_warning(void *data)
 {
 
 	if (brokenkvm)
 		printf("WARNING: Running on a broken hypervisor that does "
 		    "not support mandatory H_CLEAR_MOD and H_CLEAR_REF "
 		    "hypercalls. Performance will be suboptimal.\n");
 }
 
 SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
     print_kvm_bug_warning, NULL);
 SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning,
     NULL);
 
 static void
 mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	uint64_t final_pteg_count = 0;
 	char buf[8];
 	uint32_t prop[2];
 	uint32_t nptlp, shift = 0, slb_encoding = 0;
 	uint32_t lp_size, lp_encoding;
 	struct lpte old;
 	uint64_t vsid;
 	phandle_t dev, node, root;
 	int idx, len, res;
 
 	rm_init(&mphyp_eviction_lock, "pte eviction");
 
 	moea64_early_bootstrap(mmup, kernelstart, kernelend);
 
 	root = OF_peer(0);
 
         dev = OF_child(root);
 	while (dev != 0) {
                 res = OF_getprop(dev, "name", buf, sizeof(buf));
                 if (res > 0 && strcmp(buf, "cpus") == 0)
                         break;
                 dev = OF_peer(dev);
         }
 
 	node = OF_child(dev);
 
 	while (node != 0) {
                 res = OF_getprop(node, "device_type", buf, sizeof(buf));
                 if (res > 0 && strcmp(buf, "cpu") == 0)
                         break;
                 node = OF_peer(node);
         }
 
-	res = OF_getprop(node, "ibm,pft-size", prop, sizeof(prop));
+	res = OF_getencprop(node, "ibm,pft-size", prop, sizeof(prop));
 	if (res <= 0)
 		panic("mmu_phyp: unknown PFT size");
 	final_pteg_count = 1 << prop[1];
-	res = OF_getprop(node, "ibm,slb-size", prop, sizeof(prop[0]));
+	res = OF_getencprop(node, "ibm,slb-size", prop, sizeof(prop[0]));
 	if (res > 0)
 		n_slbs = prop[0];
 
 	moea64_pteg_count = final_pteg_count / sizeof(struct lpteg);
 
 	/* Clear any old page table entries */
 	for (idx = 0; idx < moea64_pteg_count*8; idx++) {
 		phyp_pft_hcall(H_READ, 0, idx, 0, 0, &old.pte_hi,
 		    &old.pte_lo, &old.pte_lo);
 		vsid = (old.pte_hi << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) >> 28;
 		if (vsid == VSID_VRMA || vsid == 0 /* Older VRMA */)
 			continue;
 		
 		if (old.pte_hi & LPTE_VALID)
 			phyp_hcall(H_REMOVE, 0, idx, 0);
 	}
 
 	/*
 	 * Scan the large page size property for PAPR compatible machines.
 	 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'
 	 * for the encoding of the property.
 	 */
 
 	len = OF_getproplen(node, "ibm,segment-page-sizes");
 	if (len > 0) {
 		/*
 		 * We have to use a variable length array on the stack
 		 * since we have very limited stack space.
 		 */
 		pcell_t arr[len/sizeof(cell_t)];
 		res = OF_getencprop(node, "ibm,segment-page-sizes", arr,
 		    sizeof(arr));
 		len /= 4;
 		idx = 0;
 		while (len > 0) {
 			shift = arr[idx];
 			slb_encoding = arr[idx + 1];
 			nptlp = arr[idx + 2];
 			idx += 3;
 			len -= 3;
 			while (len > 0 && nptlp) {
 				lp_size = arr[idx];
 				lp_encoding = arr[idx+1];
 				if (slb_encoding == SLBV_L && lp_encoding == 0)
 					break;
 
 				idx += 2;
 				len -= 2;
 				nptlp--;
 			}
 			if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
 				break;
 		}
 
 		if (len == 0)
 			panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
 			    "not supported by this system. Please enable huge "
 			    "page backing if running under PowerKVM.");
 
 		moea64_large_page_shift = shift;
 		moea64_large_page_size = 1ULL << lp_size;
 	}
 
 	moea64_mid_bootstrap(mmup, kernelstart, kernelend);
 	moea64_late_bootstrap(mmup, kernelstart, kernelend);
 
 	/* Test for broken versions of KVM that don't conform to the spec */
 	if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION)
 		brokenkvm = 1;
 }
 
 static void
 mphyp_cpu_bootstrap(mmu_t mmup, int ap)
 {
 	struct slb *slb = PCPU_GET(slb);
 	register_t seg0;
 	int i;
 
 	/*
 	 * Install kernel SLB entries
 	 */
 
         __asm __volatile ("slbia");
         __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0));
 	for (i = 0; i < 64; i++) {
 		if (!(slb[i].slbe & SLBE_VALID))
 			continue;
 
 		__asm __volatile ("slbmte %0, %1" ::
 		    "r"(slb[i].slbv), "r"(slb[i].slbe));
 	}
 }
 
 static int64_t
 mphyp_pte_synch(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct lpte pte;
 	uint64_t junk;
 
 	__asm __volatile("ptesync");
 	phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi,
 	    &pte.pte_lo, &junk);
 	if ((pte.pte_hi & LPTE_AVPN_MASK) !=
 	    ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) &
 	    LPTE_AVPN_MASK))
 		return (-1);
 	if (!(pte.pte_hi & LPTE_VALID))
 		return (-1);
 
 	return (pte.pte_lo & (LPTE_CHG | LPTE_REF));
 }
 
 static int64_t
 mphyp_pte_clear(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit)
 {
 	struct rm_priotracker track;
 	int64_t refchg;
 	uint64_t ptelo, junk;
 	int err;
 
 	/*
 	 * This involves two steps (synch and clear) so we need the entry
 	 * not to change in the middle. We are protected against deliberate
 	 * unset by virtue of holding the pmap lock. Protection against
 	 * incidental unset (page table eviction) comes from holding the
 	 * shared eviction lock.
 	 */
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 	rm_rlock(&mphyp_eviction_lock, &track);
 
 	refchg = mphyp_pte_synch(mmu, pvo);
 	if (refchg < 0) {
 		rm_runlock(&mphyp_eviction_lock, &track);
 		return (refchg);
 	}
 
 	if (brokenkvm) {
 		/*
 		 * No way to clear either bit, which is total madness.
 		 * Pessimistically claim that, once modified, it stays so
 		 * forever and that it is never referenced.
 		 */
 		rm_runlock(&mphyp_eviction_lock, &track);
 		return (refchg & ~LPTE_REF);
 	}
 
 	if (ptebit & LPTE_CHG) {
 		err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0,
 		    &ptelo, &junk, &junk);
 		KASSERT(err == H_SUCCESS,
 		    ("Error clearing page change bit: %d", err));
 		refchg |= (ptelo & LPTE_CHG);
 	}
 	if (ptebit & LPTE_REF) {
 		err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0,
 		    &ptelo, &junk, &junk);
 		KASSERT(err == H_SUCCESS,
 		    ("Error clearing page reference bit: %d", err));
 		refchg |= (ptelo & LPTE_REF);
 	}
 
 	rm_runlock(&mphyp_eviction_lock, &track);
 
 	return (refchg);
 }
 
 static int64_t
 mphyp_pte_unset(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct lpte pte;
 	uint64_t junk;
 	int err;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 
 	moea64_pte_from_pvo(pvo, &pte);
 
 	err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
 	    pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
 	    &junk);
 	KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
 	    ("Error removing page: %d", err));
 
 	if (err == H_NOT_FOUND) {
 		moea64_pte_overflow--;
 		return (-1);
 	}
 
 	return (pte.pte_lo & (LPTE_REF | LPTE_CHG));
 }
 
 static uintptr_t
 mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict)
 {
 	uint64_t slot, junk, k;
 	struct lpte pt;
 	int     i, j;
 
 	/* Start at a random slot */
 	i = mftb() % 8;
 	k = -1;
 	for (j = 0; j < 8; j++) {
 		slot = ptegbase + (i + j) % 8;
 		phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi,
 		    &pt.pte_lo, &junk);
 		
 		if (pt.pte_hi & LPTE_WIRED)
 			continue;
 
 		/* This is a candidate, so remember it */
 		k = slot;
 
 		/* Try to get a page that has not been used lately */
 		if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) {
 			memcpy(to_evict, &pt, sizeof(struct lpte));
 			return (k);
 		}
 	}
 
 	if (k == -1)
 		return (k);
 
 	phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi,
 	    &to_evict->pte_lo, &junk);
 	return (k);
 }
 
 static int
 mphyp_pte_insert(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct rm_priotracker track;
 	int64_t result;
 	struct lpte evicted, pte;
 	uint64_t index, junk, lastptelo;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 
 	/* Initialize PTE */
 	moea64_pte_from_pvo(pvo, &pte);
 	evicted.pte_hi = 0;
 
 	/* Make sure further insertion is locked out during evictions */
 	rm_rlock(&mphyp_eviction_lock, &track);
 
 	/*
 	 * First try primary hash.
 	 */
 	pvo->pvo_pte.slot &= ~7UL; /* Base slot address */
 	result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi,
 	    pte.pte_lo, &index, &evicted.pte_lo, &junk);
 	if (result == H_SUCCESS) {
 		rm_runlock(&mphyp_eviction_lock, &track);
 		pvo->pvo_pte.slot = index;
 		return (0);
 	}
 	KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
 	    "(ptegidx: %#zx/%#x, PTE %#lx/%#lx", result, pvo->pvo_pte.slot,
 	    moea64_pteg_count, pte.pte_hi, pte.pte_lo));
 
 	/*
 	 * Next try secondary hash.
 	 */
 	pvo->pvo_vaddr ^= PVO_HID;
 	pte.pte_hi ^= LPTE_HID;
 	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
 
 	result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot,
 	    pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk);
 	if (result == H_SUCCESS) {
 		rm_runlock(&mphyp_eviction_lock, &track);
 		pvo->pvo_pte.slot = index;
 		return (0);
 	}
 	KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld",
 	    result));
 
 	/*
 	 * Out of luck. Find a PTE to sacrifice.
 	 */
 
 	/* Lock out all insertions for a bit */
 	rm_runlock(&mphyp_eviction_lock, &track);
 	rm_wlock(&mphyp_eviction_lock);
 
 	index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
 	if (index == -1L) {
 		/* Try other hash table? */
 		pvo->pvo_vaddr ^= PVO_HID;
 		pte.pte_hi ^= LPTE_HID;
 		pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
 		index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
 	}
 
 	if (index == -1L) {
 		/* No freeable slots in either PTEG? We're hosed. */
 		rm_wunlock(&mphyp_eviction_lock);
 		panic("mphyp_pte_insert: overflow");
 		return (-1);
 	}
 
 	/* Victim acquired: update page before waving goodbye */
 	if (evicted.pte_hi & LPTE_VALID) {
 		result = phyp_pft_hcall(H_REMOVE, H_AVPN, index,
 		    evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo,
 		    &junk);
 		moea64_pte_overflow++;
 		KASSERT(result == H_SUCCESS,
 		    ("Error evicting page: %d", (int)result));
 	}
 
 	/*
 	 * Set the new PTE.
 	 */
 	result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi,
 	    pte.pte_lo, &index, &evicted.pte_lo, &junk);
 	rm_wunlock(&mphyp_eviction_lock); /* All clear */
 
 	pvo->pvo_pte.slot = index;
 	if (result == H_SUCCESS)
 		return (0);
 
 	panic("Page replacement error: %ld", result);
 	return (result);
 }
 
Index: projects/powernv/powerpc/pseries/phyp_console.c
===================================================================
--- projects/powernv/powerpc/pseries/phyp_console.c	(revision 290990)
+++ projects/powernv/powerpc/pseries/phyp_console.c	(revision 290991)
@@ -1,433 +1,433 @@
 /*-
  * Copyright (C) 2011 by Nathan Whitehorn. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/types.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/tty.h>
 #include <machine/bus.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/uart/uart.h>
 #include <dev/uart/uart_cpu.h>
 #include <dev/uart/uart_bus.h>
 
 #include "phyp-hvcall.h"
 #include "uart_if.h"
 
 struct uart_phyp_softc {
 	device_t dev;
 	phandle_t node;
 	int vtermid;
 
 	struct tty *tp;
 	struct resource *irqres;
 	int irqrid;
 	struct callout callout;
 	void *sc_icookie;
 	int polltime;
 
 	struct mtx sc_mtx;
 	int protocol;
 
 	union {
 		uint64_t u64[2];
 		char str[16];
 	} phyp_inbuf;
 	uint64_t inbuflen;
 	uint8_t outseqno;
 };
 
 static struct uart_phyp_softc	*console_sc = NULL;
 #if defined(KDB)
 static int			alt_break_state;
 #endif
 
 enum {
 	HVTERM1, HVTERMPROT
 };
 
 #define VS_DATA_PACKET_HEADER		0xff
 #define VS_CONTROL_PACKET_HEADER	0xfe
 #define  VSV_SET_MODEM_CTL		0x01
 #define  VSV_MODEM_CTL_UPDATE		0x02
 #define  VSV_RENEGOTIATE_CONNECTION	0x03
 #define VS_QUERY_PACKET_HEADER		0xfd
 #define  VSV_SEND_VERSION_NUMBER	0x01
 #define  VSV_SEND_MODEM_CTL_STATUS	0x02
 #define VS_QUERY_RESPONSE_PACKET_HEADER	0xfc
 
 static int uart_phyp_probe(device_t dev);
 static int uart_phyp_attach(device_t dev);
 static void uart_phyp_intr(void *v);
 
 static device_method_t uart_phyp_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		uart_phyp_probe),
 	DEVMETHOD(device_attach,	uart_phyp_attach),
 
 	DEVMETHOD_END
 };
 
 static driver_t uart_phyp_driver = {
 	"uart",
 	uart_phyp_methods,
 	sizeof(struct uart_phyp_softc),
 };
  
 DRIVER_MODULE(uart_phyp, vdevice, uart_phyp_driver, uart_devclass, 0, 0);
 
 static cn_probe_t uart_phyp_cnprobe;
 static cn_init_t uart_phyp_cninit;
 static cn_term_t uart_phyp_cnterm;
 static cn_getc_t uart_phyp_cngetc;
 static cn_putc_t uart_phyp_cnputc;
 static cn_grab_t uart_phyp_cngrab;
 static cn_ungrab_t uart_phyp_cnungrab;
 
 CONSOLE_DRIVER(uart_phyp);
 
 static void uart_phyp_ttyoutwakeup(struct tty *tp);
 
 static struct ttydevsw uart_phyp_tty_class = {
 	.tsw_flags	= TF_INITLOCK|TF_CALLOUT,
 	.tsw_outwakeup	= uart_phyp_ttyoutwakeup,
 };
 
 static int
 uart_phyp_probe_node(struct uart_phyp_softc *sc)
 {
 	phandle_t node = sc->node;
 	uint32_t reg;
 	char buf[64];
 
 	sc->inbuflen = 0;
 	sc->outseqno = 0;
 
 	if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0)
 		return (ENXIO);
 	if (strcmp(buf, "vty") != 0)
 		return (ENXIO);
 
 	if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0)
 		return (ENXIO);
 	if (strcmp(buf, "serial") != 0)
 		return (ENXIO);
 
 	reg = -1;
-	OF_getprop(node, "reg", &reg, sizeof(reg));
+	OF_getencprop(node, "reg", &reg, sizeof(reg));
 	if (reg == -1)
 		return (ENXIO);
 	sc->vtermid = reg;
 	sc->node = node;
 
 	if (OF_getprop(node, "compatible", buf, sizeof(buf)) <= 0)
 		return (ENXIO);
 	if (strcmp(buf, "hvterm1") == 0) {
 		sc->protocol = HVTERM1;
 		return (0);
 	} else if (strcmp(buf, "hvterm-protocol") == 0) {
 		sc->protocol = HVTERMPROT;
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 static int
 uart_phyp_probe(device_t dev)
 {
 	const char *name;
 	struct uart_phyp_softc sc;
 	int err;
 
 	name = ofw_bus_get_name(dev);
 	if (name == NULL || strcmp(name, "vty") != 0)
 		return (ENXIO);
 
 	sc.node = ofw_bus_get_node(dev);
 	err = uart_phyp_probe_node(&sc);
 	if (err != 0)
 		return (err);
 
 	device_set_desc(dev, "POWER Hypervisor Virtual Serial Port");
 
 	return (err);
 }
 
 static void
 uart_phyp_cnprobe(struct consdev *cp)
 {
 	char buf[64];
 	ihandle_t stdout;
 	phandle_t input, chosen;
 	static struct uart_phyp_softc sc;
 
 	if ((chosen = OF_finddevice("/chosen")) == -1)
 		goto fail;
 
 	/* Check if OF has an active stdin/stdout */
 	input = -1;
-	if (OF_getprop(chosen, "stdout", &stdout,
+	if (OF_getencprop(chosen, "stdout", &stdout,
 	    sizeof(stdout)) == sizeof(stdout) && stdout != 0)
 		input = OF_instance_to_package(stdout);
 	if (input == -1)
 		goto fail;
 
 	if (OF_getprop(input, "device_type", buf, sizeof(buf)) == -1)
 		goto fail;
 	if (strcmp(buf, "serial") != 0)
 		goto fail;
 
 	sc.node = input;
 	if (uart_phyp_probe_node(&sc) != 0)
 		goto fail;
 	mtx_init(&sc.sc_mtx, "uart_phyp", NULL, MTX_SPIN | MTX_QUIET |
 	    MTX_NOWITNESS);
 
 	cp->cn_pri = CN_NORMAL;
 	console_sc = &sc;
 	return;
 	
 fail:
 	cp->cn_pri = CN_DEAD;
 	return;
 }
 
 static int
 uart_phyp_attach(device_t dev)
 {
 	struct uart_phyp_softc *sc;
 	int unit;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 	sc->node = ofw_bus_get_node(dev);
 	uart_phyp_probe_node(sc);
 
 	unit = device_get_unit(dev);
 	sc->tp = tty_alloc(&uart_phyp_tty_class, sc);
 	mtx_init(&sc->sc_mtx, device_get_nameunit(dev), NULL,
 	    MTX_SPIN | MTX_QUIET | MTX_NOWITNESS);
 
 	if (console_sc != NULL && console_sc->vtermid == sc->vtermid) {
 		sc->outseqno = console_sc->outseqno;
 		console_sc = sc;
 		sprintf(uart_phyp_consdev.cn_name, "ttyu%r", unit);
 		tty_init_console(sc->tp, 0);
 	}
 
 	sc->irqrid = 0;
 	sc->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqrid,
 	    RF_ACTIVE | RF_SHAREABLE);
 	if (sc->irqres != NULL) {
 		bus_setup_intr(dev, sc->irqres, INTR_TYPE_TTY | INTR_MPSAFE,
 		    NULL, uart_phyp_intr, sc, &sc->sc_icookie);
 	} else {
 		callout_init(&sc->callout, 1);
 		sc->polltime = hz / 20;
 		if (sc->polltime < 1)
 			sc->polltime = 1;
 		callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc);
 	}
 
 	tty_makedev(sc->tp, NULL, "u%r", unit);
 
 	return (0);
 }
 
 static void
 uart_phyp_cninit(struct consdev *cp)
 {
 
 	strcpy(cp->cn_name, "phypcons");
 }
 
 static void
 uart_phyp_cnterm(struct consdev *cp)
 {
 }
 
 static int
 uart_phyp_get(struct uart_phyp_softc *sc, void *buffer, size_t bufsize)
 {
 	int err;
 	int hdr = 0;
 
 	uart_lock(&sc->sc_mtx);
 	if (sc->inbuflen == 0) {
 		err = phyp_pft_hcall(H_GET_TERM_CHAR, sc->vtermid,
 		    0, 0, 0, &sc->inbuflen, &sc->phyp_inbuf.u64[0],
 		    &sc->phyp_inbuf.u64[1]);
 		if (err != H_SUCCESS) {
 			uart_unlock(&sc->sc_mtx);
 			return (-1);
 		}
 		hdr = 1; 
 	}
 
 	if (sc->inbuflen == 0) {
 		uart_unlock(&sc->sc_mtx);
 		return (0);
 	}
 
 	if (bufsize > sc->inbuflen)
 		bufsize = sc->inbuflen;
 
 	if ((sc->protocol == HVTERMPROT) && (hdr == 1)) {
 		sc->inbuflen = sc->inbuflen - 4;
 		/* The VTERM protocol has a 4 byte header, skip it here. */
 		memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[4],
 		    sc->inbuflen);
 	}
 
 	memcpy(buffer, sc->phyp_inbuf.str, bufsize);
 	sc->inbuflen -= bufsize;
 	if (sc->inbuflen > 0)
 		memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[bufsize],
 		    sc->inbuflen);
 
 	uart_unlock(&sc->sc_mtx);
 	return (bufsize);
 }
 
 static int
 uart_phyp_put(struct uart_phyp_softc *sc, void *buffer, size_t bufsize)
 {
 	uint16_t seqno;
 	uint64_t len = 0;
 	int	err;
 
 	union {
 		uint64_t u64[2];
 		char bytes[16];
 	} cbuf;
 
 	uart_lock(&sc->sc_mtx);
 	switch (sc->protocol) {
 	case HVTERM1:
 		if (bufsize > 16)
 			bufsize = 16;
 		memcpy(&cbuf, buffer, bufsize);
 		len = bufsize;
 		break;
 	case HVTERMPROT:
 		if (bufsize > 12)
 			bufsize = 12;
 		seqno = sc->outseqno++;
 		cbuf.bytes[0] = VS_DATA_PACKET_HEADER;
 		cbuf.bytes[1] = 4 + bufsize; /* total length, max 16 bytes */
 		cbuf.bytes[2] = (seqno >> 8) & 0xff;
 		cbuf.bytes[3] = seqno & 0xff;
 		memcpy(&cbuf.bytes[4], buffer, bufsize);
 		len = 4 + bufsize;
 		break;
 	}
 
 	do {
 	    err = phyp_hcall(H_PUT_TERM_CHAR, sc->vtermid, len, cbuf.u64[0],
 			    cbuf.u64[1]);
 		DELAY(100);
 	} while (err == H_BUSY);
 
 	uart_unlock(&sc->sc_mtx);
 
 	return (bufsize);
 }
 
 static int
 uart_phyp_cngetc(struct consdev *cp)
 {
 	unsigned char c;
 	int retval;
 
 	retval = uart_phyp_get(console_sc, &c, 1);
 	if (retval != 1)
 		return (-1);
 #if defined(KDB)
 	kdb_alt_break(c, &alt_break_state);
 #endif
 
 	return (c);
 }
 
 static void
 uart_phyp_cnputc(struct consdev *cp, int c)
 {
 	unsigned char ch = c;
 	uart_phyp_put(console_sc, &ch, 1);
 }
 
 static void
 uart_phyp_cngrab(struct consdev *cp)
 {
 }
 
 static void
 uart_phyp_cnungrab(struct consdev *cp)
 {
 }
 
 static void
 uart_phyp_ttyoutwakeup(struct tty *tp)
 {
 	struct uart_phyp_softc *sc;
 	char buffer[8];
 	int len;
 
 	sc = tty_softc(tp);
 	
 	while ((len = ttydisc_getc(tp, buffer, sizeof(buffer))) != 0)
 		uart_phyp_put(sc, buffer, len);
 }
 
 static void
 uart_phyp_intr(void *v)
 {
 	struct uart_phyp_softc *sc = v;
 	struct tty *tp = sc->tp;
 	unsigned char c;
 	int len;
 
 	tty_lock(tp);
 	while ((len = uart_phyp_get(sc, &c, 1)) > 0)
 		ttydisc_rint(tp, c, 0);
 	ttydisc_rint_done(tp);
 	tty_unlock(tp);
 
 	if (sc->irqres == NULL)
 		callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc);
 }
 
Index: projects/powernv/powerpc/pseries/phyp_llan.c
===================================================================
--- projects/powernv/powerpc/pseries/phyp_llan.c	(revision 290990)
+++ projects/powernv/powerpc/pseries/phyp_llan.c	(revision 290991)
@@ -1,512 +1,512 @@
 /*-
  * Copyright 2013 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/endian.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <powerpc/pseries/phyp-hvcall.h>
 
 #define LLAN_MAX_RX_PACKETS	100
 #define LLAN_MAX_TX_PACKETS	100
 #define LLAN_RX_BUF_LEN		8*PAGE_SIZE
 
 #define LLAN_BUFDESC_VALID	(1ULL << 63)
 #define LLAN_ADD_MULTICAST	0x1
 #define LLAN_DEL_MULTICAST	0x2
 #define LLAN_CLEAR_MULTICAST	0x3
 
 struct llan_xfer {
 	struct mbuf *rx_mbuf;
 	bus_dmamap_t rx_dmamap;
 	uint64_t rx_bufdesc;
 };
 
 struct llan_receive_queue_entry { /* PAPR page 539 */
 	uint8_t control;
 	uint8_t reserved;
 	uint16_t offset;
 	uint32_t length;
 	uint64_t handle;
 } __packed;
 
 struct llan_softc {
 	device_t	dev;
 	struct mtx	io_lock;
 
 	cell_t		unit;
 	uint8_t		mac_address[8];
 
 	int		irqid;
 	struct resource	*irq;
 	void		*irq_cookie;
 
 	bus_dma_tag_t	rx_dma_tag;
 	bus_dma_tag_t	rxbuf_dma_tag;
 	bus_dma_tag_t	tx_dma_tag;
 
 	bus_dmamap_t	tx_dma_map;
 
 	struct llan_receive_queue_entry *rx_buf;
 	int		rx_dma_slot;
 	int		rx_valid_val;
 	bus_dmamap_t	rx_buf_map;
 	bus_addr_t	rx_buf_phys;
 	bus_size_t	rx_buf_len;
 	bus_addr_t	input_buf_phys;
 	bus_addr_t	filter_buf_phys;
 	struct llan_xfer rx_xfer[LLAN_MAX_RX_PACKETS];
 
 	struct ifnet	*ifp;
 };
 
 static int	llan_probe(device_t);
 static int	llan_attach(device_t);
 static void	llan_intr(void *xsc);
 static void	llan_init(void *xsc);
 static void	llan_start(struct ifnet *ifp);
 static int	llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static void	llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs,
 		    int err);
 static int	llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx);
 static int	llan_set_multicast(struct llan_softc *sc);
 
 static devclass_t       llan_devclass;
 static device_method_t  llan_methods[] = {
         DEVMETHOD(device_probe,         llan_probe),
         DEVMETHOD(device_attach,        llan_attach),
         
         DEVMETHOD_END
 };
 static driver_t llan_driver = {
         "llan",
         llan_methods,
         sizeof(struct llan_softc)
 };
 DRIVER_MODULE(llan, vdevice, llan_driver, llan_devclass, 0, 0);
 
 static int
 llan_probe(device_t dev)
 {
 	if (!ofw_bus_is_compatible(dev,"IBM,l-lan"))
 		return (ENXIO);
 
 	device_set_desc(dev, "POWER Hypervisor Virtual Ethernet");
 	return (0);
 }
 
 static int
 llan_attach(device_t dev)
 {
 	struct llan_softc *sc;
 	phandle_t node;
 	int error, i;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	/* Get firmware properties */
 	node = ofw_bus_get_node(dev);
 	OF_getprop(node, "local-mac-address", sc->mac_address,
 	    sizeof(sc->mac_address));
-	OF_getprop(node, "reg", &sc->unit, sizeof(sc->unit));
+	OF_getencprop(node, "reg", &sc->unit, sizeof(sc->unit));
 
 	mtx_init(&sc->io_lock, "llan", NULL, MTX_DEF);
 
         /* Setup interrupt */
 	sc->irqid = 0;
 	sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid,
 	    RF_ACTIVE);
 
 	if (!sc->irq) {
 		device_printf(dev, "Could not allocate IRQ\n");
 		mtx_destroy(&sc->io_lock);
 		return (ENXIO);
 	}
 
 	bus_setup_intr(dev, sc->irq, INTR_TYPE_MISC | INTR_MPSAFE |
 	    INTR_ENTROPY, NULL, llan_intr, sc, &sc->irq_cookie);
 
 	/* Setup DMA */
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 16, 0,
             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    LLAN_RX_BUF_LEN, 1, BUS_SPACE_MAXSIZE_32BIT,
 	    0, NULL, NULL, &sc->rx_dma_tag);
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0,
             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE, 1, BUS_SPACE_MAXSIZE_32BIT,
 	    0, NULL, NULL, &sc->rxbuf_dma_tag);
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
             BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE, 6, BUS_SPACE_MAXSIZE_32BIT, 0,
 	    busdma_lock_mutex, &sc->io_lock, &sc->tx_dma_tag);
 
 	error = bus_dmamem_alloc(sc->rx_dma_tag, (void **)&sc->rx_buf,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->rx_buf_map);
 	error = bus_dmamap_load(sc->rx_dma_tag, sc->rx_buf_map, sc->rx_buf,
 	    LLAN_RX_BUF_LEN, llan_rx_load_cb, sc, 0);
 
 	/* TX DMA maps */
 	bus_dmamap_create(sc->tx_dma_tag, 0, &sc->tx_dma_map);
 
 	/* RX DMA */
 	for (i = 0; i < LLAN_MAX_RX_PACKETS; i++) {
 		error = bus_dmamap_create(sc->rxbuf_dma_tag, 0,
 		    &sc->rx_xfer[i].rx_dmamap);
 		sc->rx_xfer[i].rx_mbuf = NULL;
 	}
 
 	/* Attach to network stack */
 	sc->ifp = if_alloc(IFT_ETHER);
 	sc->ifp->if_softc = sc;
 
 	if_initname(sc->ifp, device_get_name(dev), device_get_unit(dev));
 	sc->ifp->if_mtu = ETHERMTU; /* XXX max-frame-size from OF? */
 	sc->ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	sc->ifp->if_hwassist = 0; /* XXX: ibm,illan-options */
 	sc->ifp->if_capabilities = 0;
 	sc->ifp->if_capenable = 0;
 	sc->ifp->if_start = llan_start;
 	sc->ifp->if_ioctl = llan_ioctl;
 	sc->ifp->if_init = llan_init;
 
 	IFQ_SET_MAXLEN(&sc->ifp->if_snd, LLAN_MAX_TX_PACKETS);
 	sc->ifp->if_snd.ifq_drv_maxlen = LLAN_MAX_TX_PACKETS;
 	IFQ_SET_READY(&sc->ifp->if_snd);
 
 	ether_ifattach(sc->ifp, &sc->mac_address[2]);
 
 	return (0);
 }
 
 static void
 llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err)
 {
 	struct llan_softc *sc = xsc;
 
 	sc->rx_buf_phys = segs[0].ds_addr;
 	sc->rx_buf_len = segs[0].ds_len - 2*PAGE_SIZE;
 	sc->input_buf_phys = segs[0].ds_addr + segs[0].ds_len - PAGE_SIZE;
 	sc->filter_buf_phys = segs[0].ds_addr + segs[0].ds_len - 2*PAGE_SIZE;
 }
 
 static void
 llan_init(void *xsc)
 {
 	struct llan_softc *sc = xsc;
 	uint64_t rx_buf_desc;
 	uint64_t macaddr;
 	int err, i;
 
 	mtx_lock(&sc->io_lock);
 
 	phyp_hcall(H_FREE_LOGICAL_LAN, sc->unit);
 
 	/* Create buffers (page 539) */
 	sc->rx_dma_slot = 0;
 	sc->rx_valid_val = 1;
 
 	rx_buf_desc = LLAN_BUFDESC_VALID;
 	rx_buf_desc |= (sc->rx_buf_len << 32);
 	rx_buf_desc |= sc->rx_buf_phys;
 	memcpy(&macaddr, sc->mac_address, 8);
 	err = phyp_hcall(H_REGISTER_LOGICAL_LAN, sc->unit, sc->input_buf_phys,
 	    rx_buf_desc, sc->filter_buf_phys, macaddr);
 
 	for (i = 0; i < LLAN_MAX_RX_PACKETS; i++)
 		llan_add_rxbuf(sc, &sc->rx_xfer[i]);
 
 	phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */
 
 	/* Tell stack we're up */
 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	mtx_unlock(&sc->io_lock);
 
 	/* Check for pending receives scheduled before interrupt enable */
 	llan_intr(sc);
 }
 
 static int
 llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx)
 {
 	struct mbuf *m;
 	bus_dma_segment_t segs[1];
 	int error, nsegs;
 
 	mtx_assert(&sc->io_lock, MA_OWNED);
 
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
 	if (rx->rx_mbuf != NULL) {
 		bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->rxbuf_dma_tag, rx->rx_dmamap);
 	}
 
 	/* Save pointer to buffer structure */
 	m_copyback(m, 0, 8, (void *)&rx);
 
 	error = bus_dmamap_load_mbuf_sg(sc->rxbuf_dma_tag, rx->rx_dmamap, m,
 	    segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->dev,
 		    "cannot load RX DMA map %p, error = %d\n", rx, error);
 		m_freem(m);
 		return (error);
 	}
 
 	/* If nsegs is wrong then the stack is corrupt. */
 	KASSERT(nsegs == 1,
 	    ("%s: too many DMA segments (%d)", __func__, nsegs));
 	rx->rx_mbuf = m;
 
 	bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap, BUS_DMASYNC_PREREAD);
 
 	rx->rx_bufdesc = LLAN_BUFDESC_VALID;
 	rx->rx_bufdesc |= (((uint64_t)segs[0].ds_len) << 32);
 	rx->rx_bufdesc |= segs[0].ds_addr;
 	error = phyp_hcall(H_ADD_LOGICAL_LAN_BUFFER, sc->unit, rx->rx_bufdesc);
 	if (error != 0) {
 		m_freem(m);
 		rx->rx_mbuf = NULL;
 		return (ENOBUFS);
 	}
 
         return (0);
 }
 
 static void
 llan_intr(void *xsc)
 {
 	struct llan_softc *sc = xsc;
 	struct llan_xfer *rx;
 	struct mbuf *m;
 
 	mtx_lock(&sc->io_lock);
 restart:
 	phyp_hcall(H_VIO_SIGNAL, sc->unit, 0);
 
 	while ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val) {
 		rx = (struct llan_xfer *)sc->rx_buf[sc->rx_dma_slot].handle;
 		m = rx->rx_mbuf;
 		m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset - 8);
 		m->m_len = sc->rx_buf[sc->rx_dma_slot].length;
 
 		/* llan_add_rxbuf does DMA sync and unload as well as requeue */
 		if (llan_add_rxbuf(sc, rx) != 0) {
 			if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
 			phyp_hcall(H_ADD_LOGICAL_LAN_BUFFER, sc->unit,
 			    rx->rx_bufdesc);
 			continue;
 		}
 
 		if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
 		m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset);
 		m->m_len = sc->rx_buf[sc->rx_dma_slot].length;
 		m->m_pkthdr.rcvif = sc->ifp;
 		m->m_pkthdr.len = m->m_len;
 		sc->rx_dma_slot++;
 
 		if (sc->rx_dma_slot >= sc->rx_buf_len/sizeof(sc->rx_buf[0])) {
 			sc->rx_dma_slot = 0;
 			sc->rx_valid_val = !sc->rx_valid_val;
 		}
 
 		mtx_unlock(&sc->io_lock);
 		(*sc->ifp->if_input)(sc->ifp, m);
 		mtx_lock(&sc->io_lock);
 	}
 
 	phyp_hcall(H_VIO_SIGNAL, sc->unit, 1);
 
 	/*
 	 * H_VIO_SIGNAL enables interrupts for future packets only.
 	 * Make sure none were queued between the end of the loop and the
 	 * enable interrupts call.
 	 */
 	if ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val)
 		goto restart;
 
 	mtx_unlock(&sc->io_lock);
 }
 
 static void
 llan_send_packet(void *xsc, bus_dma_segment_t *segs, int nsegs,
     bus_size_t mapsize, int error)
 {
 	struct llan_softc *sc = xsc;
 	uint64_t bufdescs[6];
 	int i;
 
 	bzero(bufdescs, sizeof(bufdescs));
 
 	for (i = 0; i < nsegs; i++) {
 		bufdescs[i] = LLAN_BUFDESC_VALID;
 		bufdescs[i] |= (((uint64_t)segs[i].ds_len) << 32);
 		bufdescs[i] |= segs[i].ds_addr;
 	}
 
 	phyp_hcall(H_SEND_LOGICAL_LAN, sc->unit, bufdescs[0],
 	    bufdescs[1], bufdescs[2], bufdescs[3], bufdescs[4], bufdescs[5], 0);
 	/*
 	 * The hypercall returning implies completion -- or that the call will
 	 * not complete. In principle, we should try a few times if we get back
 	 * H_BUSY based on the continuation token in R4. For now, just drop
 	 * the packet in such cases.
 	 */
 }
 
 static void
 llan_start_locked(struct ifnet *ifp)
 {
 	struct llan_softc *sc = ifp->if_softc;
 	bus_addr_t first;
 	int nsegs;
 	struct mbuf *mb_head, *m;
 
 	mtx_assert(&sc->io_lock, MA_OWNED);
 	first = 0;
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, mb_head);
 
 		if (mb_head == NULL)
 			break;
 
 		BPF_MTAP(ifp, mb_head);
 
 		for (m = mb_head, nsegs = 0; m != NULL; m = m->m_next)
 			nsegs++;
 		if (nsegs > 6) {
 			m = m_collapse(mb_head, M_NOWAIT, 6);
 			if (m == NULL) {
 				m_freem(mb_head);
 				continue;
 			}
 		}
 
 		bus_dmamap_load_mbuf(sc->tx_dma_tag, sc->tx_dma_map,
 			mb_head, llan_send_packet, sc, 0);
 		bus_dmamap_unload(sc->tx_dma_tag, sc->tx_dma_map);
 		m_freem(mb_head);
 	}
 }
 
 static void
 llan_start(struct ifnet *ifp)
 {
 	struct llan_softc *sc = ifp->if_softc;
 
 	mtx_lock(&sc->io_lock);
 	llan_start_locked(ifp);
 	mtx_unlock(&sc->io_lock);
 }
 
 static int
 llan_set_multicast(struct llan_softc *sc)
 {
 	struct ifnet *ifp = sc->ifp;
 	struct ifmultiaddr *inm;
 	uint64_t macaddr;
 
 	mtx_assert(&sc->io_lock, MA_OWNED);
 
 	phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_CLEAR_MULTICAST, 0);
 
 	if_maddr_rlock(ifp);
 	TAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) {
 		if (inm->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		memcpy((uint8_t *)&macaddr + 2,
 		    LLADDR((struct sockaddr_dl *)inm->ifma_addr), 6);
 		phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_ADD_MULTICAST,
 		    macaddr);
 	}
 	if_maddr_runlock(ifp);
 
 	return (0);
 }
 
 static int
 llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	int err = 0;
 	struct llan_softc *sc = ifp->if_softc;
 
 	switch (cmd) {
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		mtx_lock(&sc->io_lock);
 		if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			llan_set_multicast(sc);
 		mtx_unlock(&sc->io_lock);
 		break;
 	case SIOCSIFFLAGS:
 	default:
 		err = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (err);
 }
 
Index: projects/powernv/powerpc/pseries/phyp_vscsi.c
===================================================================
--- projects/powernv/powerpc/pseries/phyp_vscsi.c	(revision 290990)
+++ projects/powernv/powerpc/pseries/phyp_vscsi.c	(revision 290991)
@@ -1,992 +1,993 @@
 /*-
  * Copyright 2013 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/selinfo.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/eventhandler.h>
 #include <sys/rman.h>
 #include <sys/bus_dma.h>
 #include <sys/bio.h>
 #include <sys/ioccom.h>
 #include <sys/uio.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/endian.h>
 #include <sys/vmem.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_debug.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_periph.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <powerpc/pseries/phyp-hvcall.h>
 
 struct vscsi_softc;
 
 /* VSCSI CRQ format from table 260 of PAPR spec 2.4 (page 760) */
 struct vscsi_crq {
 	uint8_t valid;
 	uint8_t format;
 	uint8_t reserved;
 	uint8_t status;
 	uint16_t timeout;
 	uint16_t iu_length;
 	uint64_t iu_data;
 };
 
 struct vscsi_xfer {
         TAILQ_ENTRY(vscsi_xfer) queue;
         struct vscsi_softc *sc;
         union ccb *ccb;
         bus_dmamap_t dmamap;
         uint64_t tag;
 	
 	vmem_addr_t srp_iu_offset;
 	vmem_size_t srp_iu_size;
 };
 
 TAILQ_HEAD(vscsi_xferq, vscsi_xfer);
 
 struct vscsi_softc {
 	device_t	dev;
 	struct cam_devq *devq;
 	struct cam_sim	*sim;
 	struct cam_path	*path;
 	struct mtx io_lock;
 
 	cell_t		unit;
 	int		bus_initialized;
 	int		bus_logged_in;
 	int		max_transactions;
 
 	int		irqid;
 	struct resource	*irq;
 	void		*irq_cookie;
 
 	bus_dma_tag_t	crq_tag;
 	struct vscsi_crq *crq_queue;
 	int		n_crqs, cur_crq;
 	bus_dmamap_t	crq_map;
 	bus_addr_t	crq_phys;
 
 	vmem_t		*srp_iu_arena;
 	void		*srp_iu_queue;
 	bus_addr_t	srp_iu_phys;
 
 	bus_dma_tag_t	data_tag;
 
 	struct vscsi_xfer loginxp;
 	struct vscsi_xfer *xfer;
 	struct vscsi_xferq active_xferq;
 	struct vscsi_xferq free_xferq;
 };
 
 struct srp_login {
 	uint8_t type;
 	uint8_t reserved[7];
 	uint64_t tag;
 	uint64_t max_cmd_length;
 	uint32_t reserved2;
 	uint16_t buffer_formats;
 	uint8_t flags;
 	uint8_t reserved3[5];
 	uint8_t initiator_port_id[16];
 	uint8_t target_port_id[16];
 } __packed;
 
 struct srp_login_rsp {
 	uint8_t type;
 	uint8_t reserved[3];
 	uint32_t request_limit_delta;
 	uint8_t tag;
 	uint32_t max_i_to_t_len;
 	uint32_t max_t_to_i_len;
 	uint16_t buffer_formats;
 	uint8_t flags;
 	/* Some reserved bits follow */
 } __packed;
 
 struct srp_cmd {
 	uint8_t type;
 	uint8_t flags1;
 	uint8_t reserved[3];
 	uint8_t formats;
 	uint8_t out_buffer_count;
 	uint8_t in_buffer_count;
 	uint64_t tag;
 	uint32_t reserved2;
 	uint64_t lun;
 	uint8_t reserved3[3];
 	uint8_t additional_cdb;
 	uint8_t cdb[16];
 	uint8_t data_payload[0];
 } __packed;
 
 struct srp_rsp {
 	uint8_t type;
 	uint8_t reserved[3];
 	uint32_t request_limit_delta;
 	uint64_t tag;
 	uint16_t reserved2;
 	uint8_t flags;
 	uint8_t status;
 	uint32_t data_out_resid;
 	uint32_t data_in_resid;
 	uint32_t sense_data_len;
 	uint32_t response_data_len;
 	uint8_t data_payload[0];
 } __packed;
 
 struct srp_tsk_mgmt {
 	uint8_t type;
 	uint8_t reserved[7];
 	uint64_t tag;
 	uint32_t reserved2;
 	uint64_t lun;
 	uint8_t reserved3[2];
 	uint8_t function;
 	uint8_t reserved4;
 	uint64_t manage_tag;
 	uint64_t reserved5;
 } __packed;
 
 /* Message code type */
 #define SRP_LOGIN_REQ	0x00
 #define SRP_TSK_MGMT	0x01
 #define SRP_CMD		0x02
 #define SRP_I_LOGOUT	0x03
 
 #define SRP_LOGIN_RSP	0xC0
 #define SRP_RSP		0xC1
 #define SRP_LOGIN_REJ	0xC2
 
 #define SRP_T_LOGOUT	0x80
 #define SRP_CRED_REQ	0x81
 #define SRP_AER_REQ	0x82
 
 #define SRP_CRED_RSP	0x41
 #define SRP_AER_RSP	0x41
 
 /* Flags for srp_rsp flags field */
 #define SRP_RSPVALID	0x01
 #define SRP_SNSVALID	0x02
 #define SRP_DOOVER	0x04
 #define SRP_DOUNDER	0x08
 #define SRP_DIOVER	0x10
 #define SRP_DIUNDER	0x20
 
 #define	MAD_SUCESS			0x00
 #define	MAD_NOT_SUPPORTED		0xf1
 #define	MAD_FAILED			0xf7
 
 #define	MAD_EMPTY_IU			0x01
 #define	MAD_ERROR_LOGGING_REQUEST	0x02
 #define	MAD_ADAPTER_INFO_REQUEST	0x03
 #define	MAD_CAPABILITIES_EXCHANGE	0x05
 #define	MAD_PHYS_ADAP_INFO_REQUEST	0x06
 #define	MAD_TAPE_PASSTHROUGH_REQUEST	0x07
 #define	MAD_ENABLE_FAST_FAIL		0x08
 
 static int	vscsi_probe(device_t);
 static int	vscsi_attach(device_t);
 static int	vscsi_detach(device_t);
 static void	vscsi_cam_action(struct cam_sim *, union ccb *);
 static void	vscsi_cam_poll(struct cam_sim *);
 static void	vscsi_intr(void *arg);
 static void	vscsi_check_response_queue(struct vscsi_softc *sc);
 static void	vscsi_setup_bus(struct vscsi_softc *sc);
 
 static void	vscsi_srp_login(struct vscsi_softc *sc);
 static void	vscsi_crq_load_cb(void *, bus_dma_segment_t *, int, int);
 static void	vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs,
 		    int nsegs, int err);
 static void	vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb);
 static void	vscsi_srp_response(struct vscsi_xfer *, struct vscsi_crq *);
 
 static devclass_t	vscsi_devclass;
 static device_method_t	vscsi_methods[] = {
 	DEVMETHOD(device_probe,		vscsi_probe),
 	DEVMETHOD(device_attach,	vscsi_attach),
 	DEVMETHOD(device_detach,	vscsi_detach),
 
 	DEVMETHOD_END
 };
 static driver_t vscsi_driver = {
 	"vscsi",
 	vscsi_methods,
 	sizeof(struct vscsi_softc)
 };
 DRIVER_MODULE(vscsi, vdevice, vscsi_driver, vscsi_devclass, 0, 0);
 MALLOC_DEFINE(M_VSCSI, "vscsi", "CAM device queue for VSCSI");
 
 static int
 vscsi_probe(device_t dev)
 {
 
 	if (!ofw_bus_is_compatible(dev, "IBM,v-scsi"))
 		return (ENXIO);
 
 	device_set_desc(dev, "POWER Hypervisor Virtual SCSI Bus");
 	return (0);
 }
 
 static int
 vscsi_attach(device_t dev)
 {
 	struct vscsi_softc *sc;
 	struct vscsi_xfer *xp;
 	int error, i;
 
 	sc = device_get_softc(dev);
 	if (sc == NULL)
 		return (EINVAL);
 
 	sc->dev = dev;
 	mtx_init(&sc->io_lock, "vscsi", NULL, MTX_DEF);
 
 	/* Get properties */
-	OF_getprop(ofw_bus_get_node(dev), "reg", &sc->unit, sizeof(sc->unit));
+	OF_getencprop(ofw_bus_get_node(dev), "reg", &sc->unit,
+	    sizeof(sc->unit));
 
 	/* Setup interrupt */
 	sc->irqid = 0;
 	sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid,
 	    RF_ACTIVE);
 
 	if (!sc->irq) {
 		device_printf(dev, "Could not allocate IRQ\n");
 		mtx_destroy(&sc->io_lock);
 		return (ENXIO);
 	}
 
 	bus_setup_intr(dev, sc->irq, INTR_TYPE_CAM | INTR_MPSAFE |
 	    INTR_ENTROPY, NULL, vscsi_intr, sc, &sc->irq_cookie);
 
 	/* Data DMA */
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
 	    256, BUS_SPACE_MAXSIZE_32BIT, 0, busdma_lock_mutex, &sc->io_lock,
 	    &sc->data_tag);
 
 	TAILQ_INIT(&sc->active_xferq);
 	TAILQ_INIT(&sc->free_xferq);
 
 	/* First XFER for login data */
 	sc->loginxp.sc = sc;
 	bus_dmamap_create(sc->data_tag, 0, &sc->loginxp.dmamap);
 	TAILQ_INSERT_TAIL(&sc->free_xferq, &sc->loginxp, queue);
 	 
 	/* CRQ area */
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), PAGE_SIZE, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 8*PAGE_SIZE,
 	    1, BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->crq_tag);
 	error = bus_dmamem_alloc(sc->crq_tag, (void **)&sc->crq_queue,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->crq_map);
 	sc->crq_phys = 0;
 	sc->n_crqs = 0;
 	error = bus_dmamap_load(sc->crq_tag, sc->crq_map, sc->crq_queue,
 	    8*PAGE_SIZE, vscsi_crq_load_cb, sc, 0);
 
 	mtx_lock(&sc->io_lock);
 	vscsi_setup_bus(sc);
 	sc->xfer = malloc(sizeof(sc->xfer[0])*sc->max_transactions, M_VSCSI,
 	    M_NOWAIT);
 	for (i = 0; i < sc->max_transactions; i++) {
 		xp = &sc->xfer[i];
 		xp->sc = sc;
 
 		error = bus_dmamap_create(sc->data_tag, 0, &xp->dmamap);
 		if (error) {
 			device_printf(dev, "Could not create DMA map (%d)\n",
 			    error);
 			break;
 		}
 
 		TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue);
 	}
 	mtx_unlock(&sc->io_lock);
 
 	/* Allocate CAM bits */
 	if ((sc->devq = cam_simq_alloc(sc->max_transactions)) == NULL)
 		return (ENOMEM);
 
 	sc->sim = cam_sim_alloc(vscsi_cam_action, vscsi_cam_poll, "vscsi", sc,
 				device_get_unit(dev), &sc->io_lock,
 				sc->max_transactions, sc->max_transactions,
 				sc->devq);
 	if (sc->sim == NULL) {
 		cam_simq_free(sc->devq);
 		sc->devq = NULL;
 		device_printf(dev, "CAM SIM attach failed\n");
 		return (EINVAL);
 	}
 
 
 	mtx_lock(&sc->io_lock);
 	if (xpt_bus_register(sc->sim, dev, 0) != 0) {
 		device_printf(dev, "XPT bus registration failed\n");
 		cam_sim_free(sc->sim, FALSE);
 		sc->sim = NULL;
 		cam_simq_free(sc->devq);
 		sc->devq = NULL;
 		mtx_unlock(&sc->io_lock);
 		return (EINVAL);
 	}
 	mtx_unlock(&sc->io_lock);
 
 	return (0);
 }
 
 static int
 vscsi_detach(device_t dev)
 {
 	struct vscsi_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (sc == NULL)
 		return (EINVAL);
 
 	if (sc->sim != NULL) {
 		mtx_lock(&sc->io_lock);
 		xpt_bus_deregister(cam_sim_path(sc->sim));
 		cam_sim_free(sc->sim, FALSE);
 		sc->sim = NULL;
 		mtx_unlock(&sc->io_lock);
 	}
 
 	if (sc->devq != NULL) {
 		cam_simq_free(sc->devq);
 		sc->devq = NULL;
 	}
 	
 	mtx_destroy(&sc->io_lock);
 
 	return (0);
 }
 
 static void
 vscsi_cam_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct vscsi_softc *sc = cam_sim_softc(sim);
 
 	mtx_assert(&sc->io_lock, MA_OWNED);
 
 	switch (ccb->ccb_h.func_code) {
 	case XPT_PATH_INQ:
 	{
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_TAG_ABLE;
 		cpi->hba_misc = PIM_EXTLUNS;
 		cpi->target_sprt = 0;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = 0;
 		cpi->max_lun = 0;
 		cpi->initiator_id = ~0;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, "IBM", HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 150000;
 		cpi->transport = XPORT_SRP;
 		cpi->transport_version = 0;
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_SPC4;
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_RESET_BUS:
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_RESET_DEV:
 		ccb->ccb_h.status = CAM_REQ_INPROG;
 		vscsi_task_management(sc, ccb);
 		return;
 	case XPT_GET_TRAN_SETTINGS:
 		ccb->cts.protocol = PROTO_SCSI;
 		ccb->cts.protocol_version = SCSI_REV_SPC4;
 		ccb->cts.transport = XPORT_SRP;
 		ccb->cts.transport_version = 0;
 		ccb->cts.proto_specific.valid = 0;
 		ccb->cts.xport_specific.valid = 0;
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_SET_TRAN_SETTINGS:
 		ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
 		break;
 	case XPT_SCSI_IO:
 	{
 		struct vscsi_xfer *xp;
 
 		ccb->ccb_h.status = CAM_REQ_INPROG;
 
 		xp = TAILQ_FIRST(&sc->free_xferq);
 		if (xp == NULL)
 			panic("SCSI queue flooded");
 		xp->ccb = ccb;
 		TAILQ_REMOVE(&sc->free_xferq, xp, queue);
 		TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
 		bus_dmamap_load_ccb(sc->data_tag, xp->dmamap,
 		    ccb, vscsi_scsi_command, xp, 0);
 
 		return;
 	}
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	}
 
 	xpt_done(ccb);
 	return;
 }
 
 static void
 vscsi_srp_login(struct vscsi_softc *sc)
 {
 	struct vscsi_xfer *xp;
 	struct srp_login *login;
 	struct vscsi_crq crq;
 	int err;
 
 	mtx_assert(&sc->io_lock, MA_OWNED);
 
 	xp = TAILQ_FIRST(&sc->free_xferq);
 	if (xp == NULL)
 		panic("SCSI queue flooded");
 	xp->ccb = NULL;
 	TAILQ_REMOVE(&sc->free_xferq, xp, queue);
 	TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
 	
 	/* Set up command */
 	xp->srp_iu_size = crq.iu_length = 64;
 	err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
 	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
 	if (err)
 		panic("Error during VMEM allocation (%d)", err);
 
 	login = (struct srp_login *)((uint8_t *)xp->sc->srp_iu_queue +
 	    (uintptr_t)xp->srp_iu_offset);
 	bzero(login, xp->srp_iu_size);
 	login->type = SRP_LOGIN_REQ;
 	login->tag = (uint64_t)(xp);
 	login->max_cmd_length = htobe64(256);
 	login->buffer_formats = htobe16(0x1 | 0x2); /* Direct and indirect */
 	login->flags = 0;
 
 	/* Create CRQ entry */
 	crq.valid = 0x80;
 	crq.format = 0x01;
 	crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset;
 	bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
 
 	err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0],
 	    ((uint64_t *)(&crq))[1]);
 	if (err != 0)
 		panic("CRQ send failure (%d)", err);
 }
 
 static void
 vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb)
 {
 	struct srp_tsk_mgmt *cmd;
 	struct vscsi_xfer *xp;
 	struct vscsi_crq crq;
 	int err;
 
 	mtx_assert(&sc->io_lock, MA_OWNED);
 
 	xp = TAILQ_FIRST(&sc->free_xferq);
 	if (xp == NULL)
 		panic("SCSI queue flooded");
 	xp->ccb = ccb;
 	TAILQ_REMOVE(&sc->free_xferq, xp, queue);
 	TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
 
 	xp->srp_iu_size = crq.iu_length = sizeof(*cmd);
 	err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
 	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
 	if (err)
 		panic("Error during VMEM allocation (%d)", err);
 
 	cmd = (struct srp_tsk_mgmt *)((uint8_t *)xp->sc->srp_iu_queue +
 	    (uintptr_t)xp->srp_iu_offset);
 	bzero(cmd, xp->srp_iu_size);
 	cmd->type = SRP_TSK_MGMT;
 	cmd->tag = (uint64_t)xp;
 	cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun));
 
 	switch (ccb->ccb_h.func_code) {
 	case XPT_RESET_DEV:
 		cmd->function = 0x08;
 		break;
 	default:
 		panic("Unimplemented code %d", ccb->ccb_h.func_code);
 		break;
 	}
 
 	bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE);
 
 	/* Create CRQ entry */
 	crq.valid = 0x80;
 	crq.format = 0x01;
 	crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset;
 
 	err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0],
 	    ((uint64_t *)(&crq))[1]);
 	if (err != 0)
 		panic("CRQ send failure (%d)", err);
 }
 
 static void
 vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs, int nsegs, int err)
 {
 	struct vscsi_xfer *xp = xxp;
 	uint8_t *cdb;
 	union ccb *ccb = xp->ccb;
 	struct srp_cmd *cmd;
 	uint64_t chunk_addr;
 	uint32_t chunk_size;
 	int desc_start, i;
 	struct vscsi_crq crq;
 
 	KASSERT(err == 0, ("DMA error %d\n", err));
 
 	mtx_assert(&xp->sc->io_lock, MA_OWNED);
 
 	cdb = (ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 	    ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes;
 
 	/* Command format from Table 20, page 37 of SRP spec */
 	crq.iu_length = 48 + ((nsegs > 1) ? 20 : 16) + 
 	    ((ccb->csio.cdb_len > 16) ? (ccb->csio.cdb_len - 16) : 0);
 	xp->srp_iu_size = crq.iu_length;
 	if (nsegs > 1)
 		xp->srp_iu_size += nsegs*16;
 	xp->srp_iu_size = roundup(xp->srp_iu_size, 16);
 	err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
 	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
 	if (err)
 		panic("Error during VMEM allocation (%d)", err);
 
 	cmd = (struct srp_cmd *)((uint8_t *)xp->sc->srp_iu_queue +
 	    (uintptr_t)xp->srp_iu_offset);
 	bzero(cmd, xp->srp_iu_size);
 	cmd->type = SRP_CMD;
 	if (ccb->csio.cdb_len > 16)
 		cmd->additional_cdb = (ccb->csio.cdb_len - 16) << 2;
 	memcpy(cmd->cdb, cdb, ccb->csio.cdb_len);
 
 	cmd->tag = (uint64_t)(xp); /* Let the responder find this again */
 	cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun));
 
 	if (nsegs > 1) {
 		/* Use indirect descriptors */
 		switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 		case CAM_DIR_OUT:
 			cmd->formats = (2 << 4);
 			break;
 		case CAM_DIR_IN:
 			cmd->formats = 2;
 			break;
 		default:
 			panic("Does not support bidirectional commands (%d)",
 			    ccb->ccb_h.flags & CAM_DIR_MASK);
 			break;
 		}
 
 		desc_start = ((ccb->csio.cdb_len > 16) ?
 		    ccb->csio.cdb_len - 16 : 0);
 		chunk_addr = xp->sc->srp_iu_phys + xp->srp_iu_offset + 20 +
 		    desc_start + sizeof(*cmd);
 		chunk_size = 16*nsegs;
 		memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8);
 		memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4);
 		chunk_size = 0;
 		for (i = 0; i < nsegs; i++)
 			chunk_size += segs[i].ds_len;
 		memcpy(&cmd->data_payload[desc_start+16], &chunk_size, 4);
 		desc_start += 20;
 		for (i = 0; i < nsegs; i++) {
 			chunk_addr = segs[i].ds_addr;
 			chunk_size = segs[i].ds_len;
 
 			memcpy(&cmd->data_payload[desc_start + 16*i],
 			    &chunk_addr, 8);
 			/* Set handle tag to 0 */
 			memcpy(&cmd->data_payload[desc_start + 16*i + 12],
 			    &chunk_size, 4);
 		}
 	} else if (nsegs == 1) {
 		switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 		case CAM_DIR_OUT:
 			cmd->formats = (1 << 4);
 			break;
 		case CAM_DIR_IN:
 			cmd->formats = 1;
 			break;
 		default:
 			panic("Does not support bidirectional commands (%d)",
 			    ccb->ccb_h.flags & CAM_DIR_MASK);
 			break;
 		}
 
 		/*
 		 * Memory descriptor:
 		 * 8 byte address
 		 * 4 byte handle
 		 * 4 byte length
 		 */
 
 		chunk_addr = segs[0].ds_addr;
 		chunk_size = segs[0].ds_len;
 		desc_start = ((ccb->csio.cdb_len > 16) ?
 		    ccb->csio.cdb_len - 16 : 0);
 
 		memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8);
 		/* Set handle tag to 0 */
 		memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4);
 		KASSERT(xp->srp_iu_size >= 48 + ((ccb->csio.cdb_len > 16) ?
 		    ccb->csio.cdb_len : 16), ("SRP IU command length"));
 	} else {
 		cmd->formats = 0;
 	}
 	bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE);
 
 	/* Create CRQ entry */
 	crq.valid = 0x80;
 	crq.format = 0x01;
 	crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset;
 
 	err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0],
 	    ((uint64_t *)(&crq))[1]);
 	if (err != 0)
 		panic("CRQ send failure (%d)", err);
 }
 
 static void
 vscsi_crq_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err)
 {
 	struct vscsi_softc *sc = xsc;
 	
 	sc->crq_phys = segs[0].ds_addr;
 	sc->n_crqs = PAGE_SIZE/sizeof(struct vscsi_crq);
 
 	sc->srp_iu_queue = (uint8_t *)(sc->crq_queue);
 	sc->srp_iu_phys = segs[0].ds_addr;
 	sc->srp_iu_arena = vmem_create("VSCSI SRP IU", PAGE_SIZE,
 	    segs[0].ds_len - PAGE_SIZE, 16, 0, M_BESTFIT | M_NOWAIT);
 }
 
 static void
 vscsi_setup_bus(struct vscsi_softc *sc)
 {
 	struct vscsi_crq crq;
 	struct vscsi_xfer *xp;
 	int error;
 
 	struct {
 		uint32_t type;
 		uint16_t status;
 		uint16_t length;
 		uint64_t tag;
 		uint64_t buffer;
 		struct {
 			char srp_version[8];
 			char partition_name[96];
 			uint32_t partition_number;
 			uint32_t mad_version;
 			uint32_t os_type;
 			uint32_t port_max_txu[8];
 		} payload;
 	} mad_adapter_info;
 
 	bzero(&crq, sizeof(crq));
 
 	/* Init message */
 	crq.valid = 0xc0;
 	crq.format = 0x01;
 
 	do {
 		error = phyp_hcall(H_FREE_CRQ, sc->unit);
 	} while (error == H_BUSY);
 
 	/* See initialization sequence page 757 */
 	bzero(sc->crq_queue, sc->n_crqs*sizeof(sc->crq_queue[0]));
 	sc->cur_crq = 0;
 	sc->bus_initialized = 0;
 	sc->bus_logged_in = 0;
 	bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
 	error = phyp_hcall(H_REG_CRQ, sc->unit, sc->crq_phys,
 	    sc->n_crqs*sizeof(sc->crq_queue[0]));
 	KASSERT(error == 0, ("CRQ registration success"));
 
 	error = phyp_hcall(H_SEND_CRQ, sc->unit, ((uint64_t *)(&crq))[0],
 	    ((uint64_t *)(&crq))[1]);
 	if (error != 0)
 		panic("CRQ setup failure (%d)", error);
 
 	while (sc->bus_initialized == 0)
 		vscsi_check_response_queue(sc);
 
 	/* Send MAD adapter info */
 	mad_adapter_info.type = MAD_ADAPTER_INFO_REQUEST;
 	mad_adapter_info.status = 0;
 	mad_adapter_info.length = sizeof(mad_adapter_info.payload);
 
 	strcpy(mad_adapter_info.payload.srp_version, "16.a");
 	strcpy(mad_adapter_info.payload.partition_name, "UNKNOWN");
 	mad_adapter_info.payload.partition_number = -1;
 	mad_adapter_info.payload.mad_version = 1;
 	mad_adapter_info.payload.os_type = 2; /* Claim we are Linux */
 	mad_adapter_info.payload.port_max_txu[0] = 0;
 	/* If this fails, we get the defaults above */
 	OF_getprop(OF_finddevice("/"), "ibm,partition-name",
 	    mad_adapter_info.payload.partition_name,
 	    sizeof(mad_adapter_info.payload.partition_name));
 	OF_getprop(OF_finddevice("/"), "ibm,partition-no",
 	    &mad_adapter_info.payload.partition_number,
 	    sizeof(mad_adapter_info.payload.partition_number));
 
 	xp = TAILQ_FIRST(&sc->free_xferq);
 	xp->ccb = NULL;
 	TAILQ_REMOVE(&sc->free_xferq, xp, queue);
 	TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
 	xp->srp_iu_size = crq.iu_length = sizeof(mad_adapter_info);
 	vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
 	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
 	mad_adapter_info.buffer = xp->sc->srp_iu_phys + xp->srp_iu_offset + 24;
 	mad_adapter_info.tag = (uint64_t)xp;
 	memcpy((uint8_t *)xp->sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset,
 		&mad_adapter_info, sizeof(mad_adapter_info));
 	crq.valid = 0x80;
 	crq.format = 0x02;
 	crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset;
 	bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
 	phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0],
 	    ((uint64_t *)(&crq))[1]);
 
 	while (TAILQ_EMPTY(&sc->free_xferq))
 		vscsi_check_response_queue(sc);
 
 	/* Send SRP login */
 	vscsi_srp_login(sc);
 	while (sc->bus_logged_in == 0)
 		vscsi_check_response_queue(sc);
 
 	error = phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */
 }
 	
 
 static void
 vscsi_intr(void *xsc)
 {
 	struct vscsi_softc *sc = xsc;
 
 	mtx_lock(&sc->io_lock);
 	vscsi_check_response_queue(sc);
 	mtx_unlock(&sc->io_lock);
 }
 
 static void
 vscsi_srp_response(struct vscsi_xfer *xp, struct vscsi_crq *crq)
 {
 	union ccb *ccb = xp->ccb;
 	struct vscsi_softc *sc = xp->sc;
 	struct srp_rsp *rsp;
 	uint32_t sense_len;
 
 	/* SRP response packet in original request */
 	rsp = (struct srp_rsp *)((uint8_t *)sc->srp_iu_queue +
 	    (uintptr_t)xp->srp_iu_offset);
 	ccb->csio.scsi_status = rsp->status;
 	if (ccb->csio.scsi_status == SCSI_STATUS_OK)
 		ccb->ccb_h.status = CAM_REQ_CMP;
 	else
 		ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR;
 #ifdef NOTYET
 	/* Collect fast fail codes */
 	if (crq->status != 0)
 		ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 #endif
 
 	if (ccb->ccb_h.status != CAM_REQ_CMP) {
 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
 		xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
 	}
 
 	if (!(rsp->flags & SRP_RSPVALID))
 		rsp->response_data_len = 0;
 	if (!(rsp->flags & SRP_SNSVALID))
 		rsp->sense_data_len = 0;
 	if (!(rsp->flags & (SRP_DOOVER | SRP_DOUNDER)))
 		rsp->data_out_resid = 0;
 	if (!(rsp->flags & (SRP_DIOVER | SRP_DIUNDER)))
 		rsp->data_in_resid = 0;
 
 	if (rsp->flags & SRP_SNSVALID) {
 		bzero(&ccb->csio.sense_data, sizeof(struct scsi_sense_data));
 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
 		sense_len = min(be32toh(rsp->sense_data_len),
 		    ccb->csio.sense_len);
 		memcpy(&ccb->csio.sense_data,
 		    &rsp->data_payload[be32toh(rsp->response_data_len)],
 		    sense_len);
 		ccb->csio.sense_resid = ccb->csio.sense_len -
 		    be32toh(rsp->sense_data_len);
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 	case CAM_DIR_OUT:
 		ccb->csio.resid = rsp->data_out_resid;
 		break;
 	case CAM_DIR_IN:
 		ccb->csio.resid = rsp->data_in_resid;
 		break;
 	}
 
 	bus_dmamap_sync(sc->data_tag, xp->dmamap, BUS_DMASYNC_POSTREAD);
 	bus_dmamap_unload(sc->data_tag, xp->dmamap);
 	xpt_done(ccb);
 	xp->ccb = NULL;
 }
 
 static void
 vscsi_login_response(struct vscsi_xfer *xp, struct vscsi_crq *crq)
 {
 	struct vscsi_softc *sc = xp->sc;
 	struct srp_login_rsp *rsp;
 
 	/* SRP response packet in original request */
 	rsp = (struct srp_login_rsp *)((uint8_t *)sc->srp_iu_queue +
 	    (uintptr_t)xp->srp_iu_offset);
 	KASSERT(be16toh(rsp->buffer_formats) & 0x3, ("Both direct and indirect "
 	    "buffers supported"));
 
 	sc->max_transactions = be32toh(rsp->request_limit_delta);
 	device_printf(sc->dev, "Queue depth %d commands\n",
 	    sc->max_transactions);
 	sc->bus_logged_in = 1;
 }
 
 static void
 vscsi_cam_poll(struct cam_sim *sim)
 {
 	struct vscsi_softc *sc = cam_sim_softc(sim);
 
 	vscsi_check_response_queue(sc);
 }
 
 static void
 vscsi_check_response_queue(struct vscsi_softc *sc)
 {
 	struct vscsi_crq *crq;
 	struct vscsi_xfer *xp;
 	int code;
 
 	mtx_assert(&sc->io_lock, MA_OWNED);
 
 	while (sc->crq_queue[sc->cur_crq].valid != 0) {
 		/* The hypercalls at both ends of this are not optimal */
 		phyp_hcall(H_VIO_SIGNAL, sc->unit, 0);
 		bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_POSTREAD);
 
 		crq = &sc->crq_queue[sc->cur_crq];
 
 		switch (crq->valid) {
 		case 0xc0:
 			if (crq->format == 0x02)
 				sc->bus_initialized = 1;
 			break;
 		case 0x80:
 			/* IU data is set to tag pointer (the XP) */
 			xp = (struct vscsi_xfer *)crq->iu_data;
 
 			switch (crq->format) {
 			case 0x01:
 				code = *((uint8_t *)sc->srp_iu_queue +
 	    			    (uintptr_t)xp->srp_iu_offset);
 				switch (code) {
 				case SRP_RSP:
 					vscsi_srp_response(xp, crq);
 					break;
 				case SRP_LOGIN_RSP:
 					vscsi_login_response(xp, crq);
 					break;
 				default:
 					device_printf(sc->dev, "Unknown SRP "
 					    "response code %d\n", code);
 					break;
 				}
 				break;
 			case 0x02:
 				/* Ignore management datagrams */
 				break;
 			default:
 				panic("Unknown CRQ format %d\n", crq->format);
 				break;
 			}
 			vmem_free(sc->srp_iu_arena, xp->srp_iu_offset,
 			    xp->srp_iu_size);
 			TAILQ_REMOVE(&sc->active_xferq, xp, queue);
 			TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue);
 			break;
 		default:
 			device_printf(sc->dev,
 			    "Unknown CRQ message type %d\n", crq->valid);
 			break;
 		}
 
 		crq->valid = 0;
 		sc->cur_crq = (sc->cur_crq + 1) % sc->n_crqs;
 
 		bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
 		phyp_hcall(H_VIO_SIGNAL, sc->unit, 1);
 	}
 }
 
Index: projects/powernv/powerpc/pseries/platform_chrp.c
===================================================================
--- projects/powernv/powerpc/pseries/platform_chrp.c	(revision 290990)
+++ projects/powernv/powerpc/pseries/platform_chrp.c	(revision 290991)
@@ -1,516 +1,517 @@
 /*-
  * Copyright (c) 2008 Marcel Moolenaar
  * Copyright (c) 2009 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/hid.h>
 #include <machine/platformvar.h>
 #include <machine/pmap.h>
 #include <machine/rtas.h>
 #include <machine/smp.h>
 #include <machine/spr.h>
 #include <machine/trap.h>
 
 #include <dev/ofw/openfirm.h>
 #include <machine/ofw_machdep.h>
 
 #include "platform_if.h"
 
 #ifdef SMP
 extern void *ap_pcpu;
 #endif
 
 #ifdef __powerpc64__
 static uint8_t splpar_vpa[MAXCPU][640] __aligned(128); /* XXX: dpcpu */
 #endif
 
 static vm_offset_t realmaxaddr = VM_MAX_ADDRESS;
 
 static int chrp_probe(platform_t);
 static int chrp_attach(platform_t);
 void chrp_mem_regions(platform_t, struct mem_region *phys, int *physsz,
     struct mem_region *avail, int *availsz);
 static vm_offset_t chrp_real_maxaddr(platform_t);
 static u_long chrp_timebase_freq(platform_t, struct cpuref *cpuref);
 static int chrp_smp_first_cpu(platform_t, struct cpuref *cpuref);
 static int chrp_smp_next_cpu(platform_t, struct cpuref *cpuref);
 static int chrp_smp_get_bsp(platform_t, struct cpuref *cpuref);
 static void chrp_smp_ap_init(platform_t);
 #ifdef SMP
 static int chrp_smp_start_cpu(platform_t, struct pcpu *cpu);
 static struct cpu_group *chrp_smp_topo(platform_t plat);
 #endif
 static void chrp_reset(platform_t);
 #ifdef __powerpc64__
 #include "phyp-hvcall.h"
 static void phyp_cpu_idle(sbintime_t sbt);
 #endif
 
 static platform_method_t chrp_methods[] = {
 	PLATFORMMETHOD(platform_probe, 		chrp_probe),
 	PLATFORMMETHOD(platform_attach,		chrp_attach),
 	PLATFORMMETHOD(platform_mem_regions,	chrp_mem_regions),
 	PLATFORMMETHOD(platform_real_maxaddr,	chrp_real_maxaddr),
 	PLATFORMMETHOD(platform_timebase_freq,	chrp_timebase_freq),
 	
 	PLATFORMMETHOD(platform_smp_ap_init,	chrp_smp_ap_init),
 	PLATFORMMETHOD(platform_smp_first_cpu,	chrp_smp_first_cpu),
 	PLATFORMMETHOD(platform_smp_next_cpu,	chrp_smp_next_cpu),
 	PLATFORMMETHOD(platform_smp_get_bsp,	chrp_smp_get_bsp),
 #ifdef SMP
 	PLATFORMMETHOD(platform_smp_start_cpu,	chrp_smp_start_cpu),
 	PLATFORMMETHOD(platform_smp_topo,	chrp_smp_topo),
 #endif
 
 	PLATFORMMETHOD(platform_reset,		chrp_reset),
 
 	{ 0, 0 }
 };
 
 static platform_def_t chrp_platform = {
 	"chrp",
 	chrp_methods,
 	0
 };
 
 PLATFORM_DEF(chrp_platform);
 
 static int
 chrp_probe(platform_t plat)
 {
 	if (OF_finddevice("/memory") != -1 || OF_finddevice("/memory@0") != -1)
 		return (BUS_PROBE_GENERIC);
 
 	return (ENXIO);
 }
 
 static int
 chrp_attach(platform_t plat)
 {
 #ifdef __powerpc64__
 	int i;
 
 	/* XXX: check for /rtas/ibm,hypertas-functions? */
 	if (!(mfmsr() & PSL_HV)) {
 		struct mem_region *phys, *avail;
 		int nphys, navail;
 		mem_regions(&phys, &nphys, &avail, &navail);
 		realmaxaddr = phys[0].mr_size;
 
 		pmap_mmu_install("mmu_phyp", BUS_PROBE_SPECIFIC);
 		cpu_idle_hook = phyp_cpu_idle;
 
 		/* Set up important VPA fields */
 		for (i = 0; i < MAXCPU; i++) {
 			bzero(splpar_vpa[i], sizeof(splpar_vpa));
 			/* First two: VPA size */
 			splpar_vpa[i][4] =
 			    (uint8_t)((sizeof(splpar_vpa[i]) >> 8) & 0xff);
 			splpar_vpa[i][5] =
 			    (uint8_t)(sizeof(splpar_vpa[i]) & 0xff);
 			splpar_vpa[i][0xba] = 1;	/* Maintain FPRs */
 			splpar_vpa[i][0xbb] = 1;	/* Maintain PMCs */
 			splpar_vpa[i][0xfc] = 0xff;	/* Maintain full SLB */
 			splpar_vpa[i][0xfd] = 0xff;
 			splpar_vpa[i][0xff] = 1;	/* Maintain Altivec */
 		}
 		mb();
 
 		/* Set up hypervisor CPU stuff */
 		chrp_smp_ap_init(plat);
 	}
 #endif
 
 	/* Some systems (e.g. QEMU) need Open Firmware to stand down */
 	ofw_quiesce();
 
 	return (0);
 }
 
 static int
 parse_drconf_memory(struct mem_region *ofmem, int *msz,
 		    struct mem_region *ofavail, int *asz)
 {
 	phandle_t phandle;
 	vm_offset_t base;
 	int i, idx, len, lasz, lmsz, res;
 	uint32_t flags, lmb_size[2];
-	uint64_t *dmem;
+	uint32_t *dmem;
 
 	lmsz = *msz;
 	lasz = *asz;
 
 	phandle = OF_finddevice("/ibm,dynamic-reconfiguration-memory");
 	if (phandle == -1)
 		/* No drconf node, return. */
 		return (0);
 
-	res = OF_getprop(phandle, "ibm,lmb-size", lmb_size, sizeof(lmb_size));
+	res = OF_getencprop(phandle, "ibm,lmb-size", lmb_size,
+	    sizeof(lmb_size));
 	if (res == -1)
 		return (0);
 	printf("Logical Memory Block size: %d MB\n", lmb_size[1] >> 20);
 
 	/* Parse the /ibm,dynamic-memory.
 	   The first position gives the # of entries. The next two words
  	   reflect the address of the memory block. The next four words are
 	   the DRC index, reserved, list index and flags.
 	   (see PAPR C.6.6.2 ibm,dynamic-reconfiguration-memory)
 	   
 	    #el  Addr   DRC-idx  res   list-idx  flags
 	   -------------------------------------------------
 	   | 4 |   8   |   4   |   4   |   4   |   4   |....
 	   -------------------------------------------------
 	*/
 
 	len = OF_getproplen(phandle, "ibm,dynamic-memory");
 	if (len > 0) {
 
 		/* We have to use a variable length array on the stack
 		   since we have very limited stack space.
 		*/
 		cell_t arr[len/sizeof(cell_t)];
 
-		res = OF_getprop(phandle, "ibm,dynamic-memory", &arr,
-				 sizeof(arr));
+		res = OF_getencprop(phandle, "ibm,dynamic-memory", arr,
+		    sizeof(arr));
 		if (res == -1)
 			return (0);
 
 		/* Number of elements */
 		idx = arr[0];
 
 		/* First address, in arr[1], arr[2]*/
-		dmem = (uint64_t*)&arr[1];
+		dmem = &arr[1];
 	
 		for (i = 0; i < idx; i++) {
-			base = *dmem;
-			dmem += 2;
-			flags = *dmem;
+			base = ((uint64_t)dmem[0] << 32) + dmem[1];
+			dmem += 4;
+			flags = dmem[1];
 			/* Use region only if available and not reserved. */
 			if ((flags & 0x8) && !(flags & 0x80)) {
 				ofmem[lmsz].mr_start = base;
 				ofmem[lmsz].mr_size = (vm_size_t)lmb_size[1];
 				ofavail[lasz].mr_start = base;
 				ofavail[lasz].mr_size = (vm_size_t)lmb_size[1];
 				lmsz++;
 				lasz++;
 			}
-			dmem++;
+			dmem += 2;
 		}
 	}
 
 	*msz = lmsz;
 	*asz = lasz;
 
 	return (1);
 }
 
 void
 chrp_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
     struct mem_region *avail, int *availsz)
 {
 	vm_offset_t maxphysaddr;
 	int i;
 
 	ofw_mem_regions(phys, physsz, avail, availsz);
 	parse_drconf_memory(phys, physsz, avail, availsz);
 
 	/*
 	 * On some firmwares (SLOF), some memory may be marked available that
 	 * doesn't actually exist. This manifests as an extension of the last
 	 * available segment past the end of physical memory, so truncate that
 	 * one.
 	 */
 	maxphysaddr = 0;
 	for (i = 0; i < *physsz; i++)
 		if (phys[i].mr_start + phys[i].mr_size > maxphysaddr)
 			maxphysaddr = phys[i].mr_start + phys[i].mr_size;
 
 	for (i = 0; i < *availsz; i++)
 		if (avail[i].mr_start + avail[i].mr_size > maxphysaddr)
 			avail[i].mr_size = maxphysaddr - avail[i].mr_start;
 }
 
 static vm_offset_t
 chrp_real_maxaddr(platform_t plat)
 {
 	return (realmaxaddr);
 }
 
 static u_long
 chrp_timebase_freq(platform_t plat, struct cpuref *cpuref)
 {
 	phandle_t phandle;
 	int32_t ticks = -1;
 
 	phandle = cpuref->cr_hwref;
 
-	OF_getprop(phandle, "timebase-frequency", &ticks, sizeof(ticks));
+	OF_getencprop(phandle, "timebase-frequency", &ticks, sizeof(ticks));
 
 	if (ticks <= 0)
 		panic("Unable to determine timebase frequency!");
 
 	return (ticks);
 }
 
 static int
 chrp_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
 {
 	char buf[8];
 	phandle_t cpu, dev, root;
 	int res, cpuid;
 
 	root = OF_peer(0);
 
 	dev = OF_child(root);
 	while (dev != 0) {
 		res = OF_getprop(dev, "name", buf, sizeof(buf));
 		if (res > 0 && strcmp(buf, "cpus") == 0)
 			break;
 		dev = OF_peer(dev);
 	}
 	if (dev == 0) {
 		/*
 		 * psim doesn't have a name property on the /cpus node,
 		 * but it can be found directly
 		 */
 		dev = OF_finddevice("/cpus");
 		if (dev == 0)
 			return (ENOENT);
 	}
 
 	cpu = OF_child(dev);
 
 	while (cpu != 0) {
 		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
 		if (res > 0 && strcmp(buf, "cpu") == 0)
 			break;
 		cpu = OF_peer(cpu);
 	}
 	if (cpu == 0)
 		return (ENOENT);
 
 	cpuref->cr_hwref = cpu;
-	res = OF_getprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid,
+	res = OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid,
 	    sizeof(cpuid));
 	if (res <= 0)
-		res = OF_getprop(cpu, "reg", &cpuid, sizeof(cpuid));
+		res = OF_getencprop(cpu, "reg", &cpuid, sizeof(cpuid));
 	if (res <= 0)
 		cpuid = 0;
 	cpuref->cr_cpuid = cpuid;
 
 	return (0);
 }
 
 static int
 chrp_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
 {
 	char buf[8];
 	phandle_t cpu;
 	int i, res, cpuid;
 
 	/* Check for whether it should be the next thread */
 	res = OF_getproplen(cpuref->cr_hwref, "ibm,ppc-interrupt-server#s");
 	if (res > 0) {
 		cell_t interrupt_servers[res/sizeof(cell_t)];
-		OF_getprop(cpuref->cr_hwref, "ibm,ppc-interrupt-server#s",
+		OF_getencprop(cpuref->cr_hwref, "ibm,ppc-interrupt-server#s",
 		    interrupt_servers, res);
 		for (i = 0; i < res/sizeof(cell_t) - 1; i++) {
 			if (interrupt_servers[i] == cpuref->cr_cpuid) {
 				cpuref->cr_cpuid = interrupt_servers[i+1];
 				return (0);
 			}
 		}
 	}
 
 	/* Next CPU core/package */
 	cpu = OF_peer(cpuref->cr_hwref);
 	while (cpu != 0) {
 		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
 		if (res > 0 && strcmp(buf, "cpu") == 0)
 			break;
 		cpu = OF_peer(cpu);
 	}
 	if (cpu == 0)
 		return (ENOENT);
 
 	cpuref->cr_hwref = cpu;
-	res = OF_getprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid,
+	res = OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid,
 	    sizeof(cpuid));
 	if (res <= 0)
-		res = OF_getprop(cpu, "reg", &cpuid, sizeof(cpuid));
+		res = OF_getencprop(cpu, "reg", &cpuid, sizeof(cpuid));
 	if (res <= 0)
 		cpuid = 0;
 	cpuref->cr_cpuid = cpuid;
 
 	return (0);
 }
 
 static int
 chrp_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
 {
 	ihandle_t inst;
 	phandle_t bsp, chosen;
 	int res, cpuid;
 
 	chosen = OF_finddevice("/chosen");
 	if (chosen == 0)
 		return (ENXIO);
 
-	res = OF_getprop(chosen, "cpu", &inst, sizeof(inst));
+	res = OF_getencprop(chosen, "cpu", &inst, sizeof(inst));
 	if (res < 0)
 		return (ENXIO);
 
 	bsp = OF_instance_to_package(inst);
 
 	/* Pick the primary thread. Can it be any other? */
 	cpuref->cr_hwref = bsp;
-	res = OF_getprop(bsp, "ibm,ppc-interrupt-server#s", &cpuid,
+	res = OF_getencprop(bsp, "ibm,ppc-interrupt-server#s", &cpuid,
 	    sizeof(cpuid));
 	if (res <= 0)
-		res = OF_getprop(bsp, "reg", &cpuid, sizeof(cpuid));
+		res = OF_getencprop(bsp, "reg", &cpuid, sizeof(cpuid));
 	if (res <= 0)
 		cpuid = 0;
 	cpuref->cr_cpuid = cpuid;
 
 	return (0);
 }
 
 #ifdef SMP
 static int
 chrp_smp_start_cpu(platform_t plat, struct pcpu *pc)
 {
 	cell_t start_cpu;
 	int result, err, timeout;
 
 	if (!rtas_exists()) {
 		printf("RTAS uninitialized: unable to start AP %d\n",
 		    pc->pc_cpuid);
 		return (ENXIO);
 	}
 
 	start_cpu = rtas_token_lookup("start-cpu");
 	if (start_cpu == -1) {
 		printf("RTAS unknown method: unable to start AP %d\n",
 		    pc->pc_cpuid);
 		return (ENXIO);
 	}
 
 	ap_pcpu = pc;
 	powerpc_sync();
 
 	result = rtas_call_method(start_cpu, 3, 1, pc->pc_cpuid, EXC_RST, pc,
 	    &err);
 	if (result < 0 || err != 0) {
 		printf("RTAS error (%d/%d): unable to start AP %d\n",
 		    result, err, pc->pc_cpuid);
 		return (ENXIO);
 	}
 
 	timeout = 10000;
 	while (!pc->pc_awake && timeout--)
 		DELAY(100);
 
 	return ((pc->pc_awake) ? 0 : EBUSY);
 }
 
 static struct cpu_group *
 chrp_smp_topo(platform_t plat)
 {
 	struct pcpu *pc, *last_pc;
 	int i, ncores, ncpus;
 
 	ncores = ncpus = 0;
 	last_pc = NULL;
 	for (i = 0; i <= mp_maxid; i++) {
 		pc = pcpu_find(i);
 		if (pc == NULL)
 			continue;
 		if (last_pc == NULL || pc->pc_hwref != last_pc->pc_hwref)
 			ncores++;
 		last_pc = pc;
 		ncpus++;
 	}
 
 	if (ncpus % ncores != 0) {
 		printf("WARNING: Irregular SMP topology. Performance may be "
 		     "suboptimal (%d CPUS, %d cores)\n", ncpus, ncores);
 		return (smp_topo_none());
 	}
 
 	/* Don't do anything fancier for non-threaded SMP */
 	if (ncpus == ncores)
 		return (smp_topo_none());
 
 	return (smp_topo_1level(CG_SHARE_L1, ncpus / ncores, CG_FLAG_SMT));
 }
 #endif
 
 static void
 chrp_reset(platform_t platform)
 {
 	OF_reboot();
 }
 
 #ifdef __powerpc64__
 static void
 phyp_cpu_idle(sbintime_t sbt)
 {
 	phyp_hcall(H_CEDE);
 }
 
 static void
 chrp_smp_ap_init(platform_t platform)
 {
 	if (!(mfmsr() & PSL_HV)) {
 		/* Register VPA */
 		phyp_hcall(H_REGISTER_VPA, 1UL, PCPU_GET(cpuid),
 		    splpar_vpa[PCPU_GET(cpuid)]);
 
 		/* Set interrupt priority */
 		phyp_hcall(H_CPPR, 0xff);
 	}
 }
 #else
 static void
 chrp_smp_ap_init(platform_t platform)
 {
 }
 #endif
 
Index: projects/powernv/powerpc/pseries/plpar_iommu.c
===================================================================
--- projects/powernv/powerpc/pseries/plpar_iommu.c	(revision 290990)
+++ projects/powernv/powerpc/pseries/plpar_iommu.c	(revision 290991)
@@ -1,244 +1,245 @@
 /*-
  * Copyright (c) 2013, Nathan Whitehorn <nwhitehorn@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/libkern.h>
 #include <sys/module.h>
 #include <sys/vmem.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 
 #include <machine/bus.h>
 
 #include <powerpc/pseries/phyp-hvcall.h>
 #include <powerpc/pseries/plpar_iommu.h>
 
 MALLOC_DEFINE(M_PHYPIOMMU, "iommu", "IOMMU data for PAPR LPARs");
 
 struct papr_iommu_map {
 	uint32_t iobn;
 	vmem_t *vmem;
 	struct papr_iommu_map *next;
 };
 
 static SLIST_HEAD(iommu_maps, iommu_map) iommu_map_head =
     SLIST_HEAD_INITIALIZER(iommu_map_head);
 static int papr_supports_stuff_tce = -1;
 
 struct iommu_map {
 	uint32_t iobn;
 	vmem_t *vmem;
 
 	SLIST_ENTRY(iommu_map) entries;
 };
 
 struct dma_window {
 	struct iommu_map *map;
 	bus_addr_t start;
 	bus_addr_t end;
 };
 
 int
 phyp_iommu_set_dma_tag(device_t bus, device_t dev, bus_dma_tag_t tag)
 {
 	device_t p;
 	phandle_t node;
 	cell_t dma_acells, dma_scells, dmawindow[6];
 	struct iommu_map *i;
 	int cell;
 
 	for (p = dev; device_get_parent(p) != NULL; p = device_get_parent(p)) {
 		if (ofw_bus_has_prop(p, "ibm,my-dma-window"))
 			break;
 		if (ofw_bus_has_prop(p, "ibm,dma-window"))
 			break;
 	}
 
 	if (p == NULL)
 		return (ENXIO);
 
 	node = ofw_bus_get_node(p);
-	if (OF_getprop(node, "ibm,#dma-size-cells", &dma_scells,
+	if (OF_getencprop(node, "ibm,#dma-size-cells", &dma_scells,
 	    sizeof(cell_t)) <= 0)
-		OF_searchprop(node, "#size-cells", &dma_scells, sizeof(cell_t));
-	if (OF_getprop(node, "ibm,#dma-address-cells", &dma_acells,
+		OF_searchencprop(node, "#size-cells", &dma_scells,
+		    sizeof(cell_t));
+	if (OF_getencprop(node, "ibm,#dma-address-cells", &dma_acells,
 	    sizeof(cell_t)) <= 0)
-		OF_searchprop(node, "#address-cells", &dma_acells,
+		OF_searchencprop(node, "#address-cells", &dma_acells,
 		    sizeof(cell_t));
 
 	if (ofw_bus_has_prop(p, "ibm,my-dma-window"))
-		OF_getprop(node, "ibm,my-dma-window", dmawindow,
+		OF_getencprop(node, "ibm,my-dma-window", dmawindow,
 		    sizeof(cell_t)*(dma_scells + dma_acells + 1));
 	else
-		OF_getprop(node, "ibm,dma-window", dmawindow,
+		OF_getencprop(node, "ibm,dma-window", dmawindow,
 		    sizeof(cell_t)*(dma_scells + dma_acells + 1));
 
 	struct dma_window *window = malloc(sizeof(struct dma_window),
 	    M_PHYPIOMMU, M_WAITOK);
 	window->start = 0;
 	for (cell = 1; cell < 1 + dma_acells; cell++) {
 		window->start <<= 32;
 		window->start |= dmawindow[cell];
 	}
 	window->end = 0;
 	for (; cell < 1 + dma_acells + dma_scells; cell++) {
 		window->end <<= 32;
 		window->end |= dmawindow[cell];
 	}
 	window->end += window->start;
 
 	if (bootverbose)
 		device_printf(dev, "Mapping IOMMU domain %#x\n", dmawindow[0]);
 	window->map = NULL;
 	SLIST_FOREACH(i, &iommu_map_head, entries) {
 		if (i->iobn == dmawindow[0]) {
 			window->map = i;
 			break;
 		}
 	}
 
 	if (window->map == NULL) {
 		window->map = malloc(sizeof(struct iommu_map), M_PHYPIOMMU,
 		    M_WAITOK);
 		window->map->iobn = dmawindow[0];
 		/*
 		 * Allocate IOMMU range beginning at PAGE_SIZE. Some drivers
 		 * (em(4), for example) do not like getting mappings at 0.
 		 */
 		window->map->vmem = vmem_create("IOMMU mappings", PAGE_SIZE,
 		    trunc_page(VMEM_ADDR_MAX) - PAGE_SIZE, PAGE_SIZE, 0,
 		    M_BESTFIT | M_NOWAIT);
 		SLIST_INSERT_HEAD(&iommu_map_head, window->map, entries);
 	}
 
 	/*
 	 * Check experimentally whether we can use H_STUFF_TCE. It is required
 	 * by the spec but some firmware (e.g. QEMU) does not actually support
 	 * it
 	 */
 	if (papr_supports_stuff_tce == -1)
 		papr_supports_stuff_tce = !(phyp_hcall(H_STUFF_TCE,
 		    window->map->iobn, 0, 0, 0) == H_FUNCTION);
 
 	bus_dma_tag_set_iommu(tag, bus, window);
 
 	return (0);
 }
 
 int
 phyp_iommu_map(device_t dev, bus_dma_segment_t *segs, int *nsegs,
     bus_addr_t min, bus_addr_t max, bus_size_t alignment, bus_addr_t boundary,
     void *cookie)
 {
 	struct dma_window *window = cookie;
 	bus_addr_t minaddr, maxaddr;
 	bus_addr_t alloced;
 	bus_size_t allocsize;
 	int error, i, j;
 	uint64_t tce;
 	minaddr = window->start;
 	maxaddr = window->end;
 
 	/* XXX: handle exclusion range in a more useful way */
 	if (min < maxaddr)
 		maxaddr = min;
 
 	/* XXX: consolidate segs? */
 	for (i = 0; i < *nsegs; i++) {
 		allocsize = round_page(segs[i].ds_len +
 		    (segs[i].ds_addr & PAGE_MASK));
 		error = vmem_xalloc(window->map->vmem, allocsize,
 		    (alignment < PAGE_SIZE) ? PAGE_SIZE : alignment, 0,
 		    boundary, minaddr, maxaddr, M_BESTFIT | M_NOWAIT, &alloced);
 		if (error != 0) {
 			panic("VMEM failure: %d\n", error);
 			return (error);
 		}
 		KASSERT(alloced % PAGE_SIZE == 0, ("Alloc not page aligned"));
 		KASSERT((alloced + (segs[i].ds_addr & PAGE_MASK)) %
 		    alignment == 0,
 		    ("Allocated segment does not match alignment constraint"));
 
 		tce = trunc_page(segs[i].ds_addr);
 		tce |= 0x3; /* read/write */
 		for (j = 0; j < allocsize; j += PAGE_SIZE) {
 			error = phyp_hcall(H_PUT_TCE, window->map->iobn,
 			    alloced + j, tce + j);
 			if (error < 0) {
 				panic("IOMMU mapping error: %d\n", error);
 				return (ENOMEM);
 			}
 		}
 
 		segs[i].ds_addr = alloced + (segs[i].ds_addr & PAGE_MASK);
 		KASSERT(segs[i].ds_addr > 0, ("Address needs to be positive"));
 		KASSERT(segs[i].ds_addr + segs[i].ds_len < maxaddr,
 		    ("Address not in range"));
 		if (error < 0) {
 			panic("IOMMU mapping error: %d\n", error);
 			return (ENOMEM);
 		}
 	}
 
 	return (0);
 }
 	
 int
 phyp_iommu_unmap(device_t dev, bus_dma_segment_t *segs, int nsegs, void *cookie)
 {
 	struct dma_window *window = cookie;
 	bus_addr_t pageround;
 	bus_size_t roundedsize;
 	int i;
 	bus_addr_t j;
 
 	for (i = 0; i < nsegs; i++) {
 		pageround = trunc_page(segs[i].ds_addr);
 		roundedsize = round_page(segs[i].ds_len +
 		    (segs[i].ds_addr & PAGE_MASK));
 
 		if (papr_supports_stuff_tce) {
 			phyp_hcall(H_STUFF_TCE, window->map->iobn, pageround, 0,
 			    roundedsize/PAGE_SIZE);
 		} else {
 			for (j = 0; j < roundedsize; j += PAGE_SIZE)
 				phyp_hcall(H_PUT_TCE, window->map->iobn,
 				    pageround + j, 0);
 		}
 
 		vmem_xfree(window->map->vmem, pageround, roundedsize);
 	}
 
 	return (0);
 }
 
Index: projects/powernv/powerpc/pseries/rtas_pci.c
===================================================================
--- projects/powernv/powerpc/pseries/rtas_pci.c	(revision 290990)
+++ projects/powernv/powerpc/pseries/rtas_pci.c	(revision 290991)
@@ -1,205 +1,205 @@
 /*-
  * Copyright (c) 2011 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_pci.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/pio.h>
 #include <machine/resource.h>
 #include <machine/rtas.h>
 
 #include <sys/rman.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <powerpc/ofw/ofw_pci.h>
 #include <powerpc/pseries/plpar_iommu.h>
 
 #include "pcib_if.h"
 #include "iommu_if.h"
 
 /*
  * Device interface.
  */
 static int		rtaspci_probe(device_t);
 static int		rtaspci_attach(device_t);
 
 /*
  * pcib interface.
  */
 static u_int32_t	rtaspci_read_config(device_t, u_int, u_int, u_int,
 			    u_int, int);
 static void		rtaspci_write_config(device_t, u_int, u_int, u_int,
 			    u_int, u_int32_t, int);
 
 /*
  * Driver methods.
  */
 static device_method_t	rtaspci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		rtaspci_probe),
 	DEVMETHOD(device_attach,	rtaspci_attach),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_read_config,	rtaspci_read_config),
 	DEVMETHOD(pcib_write_config,	rtaspci_write_config),
 
 	DEVMETHOD_END
 };
 
 struct rtaspci_softc {
 	struct ofw_pci_softc	pci_sc;
 
 	cell_t			read_pci_config, write_pci_config;
 	cell_t			ex_read_pci_config, ex_write_pci_config;
 	int			sc_extended_config;
 };
 
 static devclass_t	rtaspci_devclass;
 DEFINE_CLASS_1(pcib, rtaspci_driver, rtaspci_methods,
     sizeof(struct rtaspci_softc), ofw_pci_driver);
 DRIVER_MODULE(rtaspci, ofwbus, rtaspci_driver, rtaspci_devclass, 0, 0);
 
 static int
 rtaspci_probe(device_t dev)
 {
 	const char	*type;
 
 	if (!rtas_exists())
 		return (ENXIO);
 
 	type = ofw_bus_get_type(dev);
 
 	if (OF_getproplen(ofw_bus_get_node(dev), "used-by-rtas") < 0)
 		return (ENXIO);
 	if (type == NULL || strcmp(type, "pci") != 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "RTAS Host-PCI bridge");
 	return (BUS_PROBE_GENERIC);
 }
 
 static int
 rtaspci_attach(device_t dev)
 {
 	struct		rtaspci_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	sc->read_pci_config = rtas_token_lookup("read-pci-config");
 	sc->write_pci_config = rtas_token_lookup("write-pci-config");
 	sc->ex_read_pci_config = rtas_token_lookup("ibm,read-pci-config");
 	sc->ex_write_pci_config = rtas_token_lookup("ibm,write-pci-config");
 
 	sc->sc_extended_config = 0;
-	OF_getprop(ofw_bus_get_node(dev), "ibm,pci-config-space-type",
+	OF_getencprop(ofw_bus_get_node(dev), "ibm,pci-config-space-type",
 	    &sc->sc_extended_config, sizeof(sc->sc_extended_config));
 
 	return (ofw_pci_attach(dev));
 }
 
 static uint32_t
 rtaspci_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg,
     int width)
 {
 	struct rtaspci_softc *sc;
 	uint32_t retval = 0xffffffff;
 	uint32_t config_addr;
 	int error, pcierror;
 
 	sc = device_get_softc(dev);
 	
 	config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) |
 	    ((func & 0x7) << 8) | (reg & 0xff);
 	if (sc->sc_extended_config)
 		config_addr |= (reg & 0xf00) << 16;
 		
 	if (sc->ex_read_pci_config != -1)
 		error = rtas_call_method(sc->ex_read_pci_config, 4, 2,
 		    config_addr, sc->pci_sc.sc_pcir.phys_hi,
 		    sc->pci_sc.sc_pcir.phys_mid, width, &pcierror, &retval);
 	else
 		error = rtas_call_method(sc->read_pci_config, 2, 2,
 		    config_addr, width, &pcierror, &retval);
 
 	/* Sign-extend output */
 	switch (width) {
 	case 1:
 		retval = (int32_t)(int8_t)(retval);
 		break;
 	case 2:
 		retval = (int32_t)(int16_t)(retval);
 		break;
 	}
 	
 	if (error < 0 || pcierror != 0)
 		retval = 0xffffffff;
 
 	return (retval);
 }
 
 static void
 rtaspci_write_config(device_t dev, u_int bus, u_int slot, u_int func,
     u_int reg, uint32_t val, int width)
 {
 	struct rtaspci_softc *sc;
 	uint32_t config_addr;
 	int pcierror;
 
 	sc = device_get_softc(dev);
 	
 	config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) |
 	    ((func & 0x7) << 8) | (reg & 0xff);
 	if (sc->sc_extended_config)
 		config_addr |= (reg & 0xf00) << 16;
 		
 	if (sc->ex_write_pci_config != -1)
 		rtas_call_method(sc->ex_write_pci_config, 5, 1, config_addr,
 		    sc->pci_sc.sc_pcir.phys_hi, sc->pci_sc.sc_pcir.phys_mid,
 		    width, val, &pcierror);
 	else
 		rtas_call_method(sc->write_pci_config, 3, 1, config_addr,
 		    width, val, &pcierror);
 }
 
Index: projects/powernv/sparc64/include/dump.h
===================================================================
--- projects/powernv/sparc64/include/dump.h	(revision 290990)
+++ projects/powernv/sparc64/include/dump.h	(revision 290991)
@@ -1,76 +1,68 @@
 /*-
  * Copyright (c) 2014 EMC Corp.
  * Author: Conrad Meyer <conrad.meyer@isilon.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_DUMP_H_
 #define	_MACHINE_DUMP_H_
 
 #define	DUMPSYS_MD_PA_NPAIRS	128
 #define	DUMPSYS_NUM_AUX_HDRS	0
 #define	KERNELDUMP_ARCH_VERSION	KERNELDUMP_SPARC64_VERSION
 #define	EM_VALUE		EM_SPARCV9
 
-void dumpsys_pa_init(void);
 int dumpsys(struct dumperinfo *);
 
 static inline struct dump_pa *
 dumpsys_pa_next(struct dump_pa *p)
 {
 
 	return (dumpsys_gen_pa_next(p));
 }
 
 static inline void
 dumpsys_wbinv_all(void)
 {
 
 	dumpsys_gen_wbinv_all();
 }
 
 static inline void
 dumpsys_unmap_chunk(vm_paddr_t pa, size_t s, void *va)
 {
 
 	dumpsys_gen_unmap_chunk(pa, s, va);
-}
-
-static inline int
-dumpsys_write_aux_headers(struct dumperinfo *di)
-{
-
-	return (dumpsys_gen_write_aux_headers(di));
 }
 
 static inline int
 minidumpsys(struct dumperinfo *di)
 {
 
 	return (-ENOSYS);
 }
 
 #endif  /* !_MACHINE_DUMP_H_ */
Index: projects/powernv/sparc64/sparc64/dump_machdep.c
===================================================================
--- projects/powernv/sparc64/sparc64/dump_machdep.c	(revision 290990)
+++ projects/powernv/sparc64/sparc64/dump_machdep.c	(revision 290991)
@@ -1,172 +1,162 @@
 /*-
  * Copyright (c) 2002 Marcel Moolenaar
  * Copyright (c) 2002 Thomas Moestl
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <machine/dump.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/kerneldump.h>
 #include <machine/ofw_mem.h>
 #include <machine/tsb.h>
 #include <machine/tlb.h>
 
 static off_t fileofs;
 
 extern off_t dumplo;
 extern struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
 
 int do_minidump = 0;
 
 void
-dumpsys_pa_init(void)
-{
-	int i;
-
-	memset(dump_map, 0, sizeof(dump_map));
-	for (i = 0; i < sparc64_nmemreg; i++) {
-		dump_map[i].pa_start = sparc64_memreg[i].mr_start;
-		dump_map[i].pa_size = sparc64_memreg[i].mr_size;
-	}
-}
-
-void
 dumpsys_map_chunk(vm_paddr_t pa, size_t chunk __unused, void **va)
 {
 
 	*va = (void *)TLB_PHYS_TO_DIRECT(pa);
 }
 
 static int
 reg_write(struct dumperinfo *di, vm_paddr_t pa, vm_size_t size)
 {
 	struct sparc64_dump_reg r;
 
 	r.dr_pa = pa;
 	r.dr_size = size;
 	r.dr_offs = fileofs;
 	fileofs += size;
 	return (dumpsys_buf_write(di, (char *)&r, sizeof(r)));
 }
 
 int
 dumpsys(struct dumperinfo *di)
 {
 	static struct kerneldumpheader kdh;
-
 	struct sparc64_dump_hdr hdr;
 	vm_size_t size, totsize, hdrsize;
 	int error, i, nreg;
 
-	/* Calculate dump size. */
+	/* Set up dump_map and calculate dump size. */
 	size = 0;
 	nreg = sparc64_nmemreg;
-	for (i = 0; i < sparc64_nmemreg; i++)
-		size += sparc64_memreg[i].mr_size;
+	memset(dump_map, 0, sizeof(dump_map));
+	for (i = 0; i < nreg; i++) {
+		dump_map[i].pa_start = sparc64_memreg[i].mr_start;
+		size += dump_map[i].pa_size = sparc64_memreg[i].mr_size;
+	}
 	/* Account for the header size. */
 	hdrsize = roundup2(sizeof(hdr) + sizeof(struct sparc64_dump_reg) * nreg,
 	    DEV_BSIZE);
 	size += hdrsize;
 
 	totsize = size + 2 * sizeof(kdh);
 	if (totsize > di->mediasize) {
 		printf("Insufficient space on device (need %ld, have %ld), "
 		    "refusing to dump.\n", (long)totsize,
 		    (long)di->mediasize);
 		error = ENOSPC;
 		goto fail;
 	}
 
 	/* Determine dump offset on device. */
 	dumplo = di->mediaoffset + di->mediasize - totsize;
 
 	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_SPARC64_VERSION, size,
 	    di->blocksize);
 
 	printf("Dumping %lu MB (%d chunks)\n", (u_long)(size >> 20), nreg);
 
 	/* Dump leader */
 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
 	if (error)
 		goto fail;
 	dumplo += sizeof(kdh);
 
 	/* Dump the private header. */
 	hdr.dh_hdr_size = hdrsize;
 	hdr.dh_tsb_pa = tsb_kernel_phys;
 	hdr.dh_tsb_size = tsb_kernel_size;
 	hdr.dh_tsb_mask = tsb_kernel_mask;
 	hdr.dh_nregions = nreg;
 
 	if (dumpsys_buf_write(di, (char *)&hdr, sizeof(hdr)) != 0)
 		goto fail;
 
 	fileofs = hdrsize;
 	/* Now, write out the region descriptors. */
-	for (i = 0; i < sparc64_nmemreg; i++) {
+	for (i = 0; i < nreg; i++) {
 		error = reg_write(di, sparc64_memreg[i].mr_start,
 		    sparc64_memreg[i].mr_size);
 		if (error != 0)
 			goto fail;
 	}
 	dumpsys_buf_flush(di);
 
 	/* Dump memory chunks. */
 	error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
 	if (error < 0)
 		goto fail;
 
 	/* Dump trailer */
 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
 	if (error)
 		goto fail;
 
 	/* Signal completion, signoff and exit stage left. */
 	dump_write(di, NULL, 0, 0, 0);
 	printf("\nDump complete\n");
 	return (0);
 
  fail:
 	if (error < 0)
 		error = -error;
 
 	/* XXX It should look more like VMS :-) */
 	printf("** DUMP FAILED (ERROR %d) **\n", error);
 	return (error);
 }
Index: projects/powernv/sys/jail.h
===================================================================
--- projects/powernv/sys/jail.h	(revision 290990)
+++ projects/powernv/sys/jail.h	(revision 290991)
@@ -1,414 +1,415 @@
 /*-
  * Copyright (c) 1999 Poul-Henning Kamp.
  * Copyright (c) 2009 James Gritton.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_JAIL_H_
 #define _SYS_JAIL_H_
 
 #ifdef _KERNEL
 struct jail_v0 {
 	u_int32_t	version;
 	char		*path;
 	char		*hostname;
 	u_int32_t	ip_number;
 };
 #endif
 
 struct jail {
 	uint32_t	version;
 	char		*path;
 	char		*hostname;
 	char		*jailname;
 	uint32_t	ip4s;
 	uint32_t	ip6s;
 	struct in_addr	*ip4;
 	struct in6_addr	*ip6;
 };
 #define	JAIL_API_VERSION	2
 
 /*
  * For all xprison structs, always keep the pr_version an int and
  * the first variable so userspace can easily distinguish them.
  */
 #ifndef _KERNEL
 struct xprison_v1 {
 	int		 pr_version;
 	int		 pr_id;
 	char		 pr_path[MAXPATHLEN];
 	char		 pr_host[MAXHOSTNAMELEN];
 	u_int32_t	 pr_ip;
 };
 #endif
 
 struct xprison {
 	int		 pr_version;
 	int		 pr_id;
 	int		 pr_state;
 	cpusetid_t	 pr_cpusetid;
 	char		 pr_path[MAXPATHLEN];
 	char		 pr_host[MAXHOSTNAMELEN];
 	char		 pr_name[MAXHOSTNAMELEN];
 	uint32_t	 pr_ip4s;
 	uint32_t	 pr_ip6s;
 #if 0
 	/*
 	 * sizeof(xprison) will be malloced + size needed for all
 	 * IPv4 and IPv6 addesses. Offsets are based numbers of addresses.
 	 */
 	struct in_addr	 pr_ip4[];
 	struct in6_addr	 pr_ip6[];
 #endif
 };
 #define	XPRISON_VERSION		3
 
 #define	PRISON_STATE_INVALID	0
 #define	PRISON_STATE_ALIVE	1
 #define	PRISON_STATE_DYING	2
 
 /*
  * Flags for jail_set and jail_get.
  */
 #define	JAIL_CREATE	0x01	/* Create jail if it doesn't exist */
 #define	JAIL_UPDATE	0x02	/* Update parameters of existing jail */
 #define	JAIL_ATTACH	0x04	/* Attach to jail upon creation */
 #define	JAIL_DYING	0x08	/* Allow getting a dying jail */
 #define	JAIL_SET_MASK	0x0f
 #define	JAIL_GET_MASK	0x08
 
 #define	JAIL_SYS_DISABLE	0
 #define	JAIL_SYS_NEW		1
 #define	JAIL_SYS_INHERIT	2
 
 #ifndef _KERNEL
 
 struct iovec;
 
 int jail(struct jail *);
 int jail_set(struct iovec *, unsigned int, int);
 int jail_get(struct iovec *, unsigned int, int);
 int jail_attach(int);
 int jail_remove(int);
 
 #else /* _KERNEL */
 
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/_task.h>
 
 #define JAIL_MAX	999999
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PRISON);
 #endif
 #endif /* _KERNEL */
 
 #if defined(_KERNEL) || defined(_WANT_PRISON)
 
 #include <sys/osd.h>
 
 #define	HOSTUUIDLEN	64
 #define	OSRELEASELEN	32
 
 struct racct;
 struct prison_racct;
 
 /*
  * This structure describes a prison.  It is pointed to by all struct
  * ucreds's of the inmates.  pr_ref keeps track of them and is used to
  * delete the struture when the last inmate is dead.
  *
  * Lock key:
  *   (a) allprison_lock
  *   (p) locked by pr_mtx
  *   (c) set only during creation before the structure is shared, no mutex
  *       required to read
  *   (d) set only during destruction of jail, no mutex needed
  */
 struct prison {
 	TAILQ_ENTRY(prison) pr_list;			/* (a) all prisons */
 	int		 pr_id;				/* (c) prison id */
 	int		 pr_ref;			/* (p) refcount */
 	int		 pr_uref;			/* (p) user (alive) refcount */
 	unsigned	 pr_flags;			/* (p) PR_* flags */
 	LIST_HEAD(, prison) pr_children;		/* (a) list of child jails */
 	LIST_ENTRY(prison) pr_sibling;			/* (a) next in parent's list */
 	struct prison	*pr_parent;			/* (c) containing jail */
 	struct mtx	 pr_mtx;
 	struct task	 pr_task;			/* (d) destroy task */
 	struct osd	 pr_osd;			/* (p) additional data */
 	struct cpuset	*pr_cpuset;			/* (p) cpuset */
 	struct vnet	*pr_vnet;			/* (c) network stack */
 	struct vnode	*pr_root;			/* (c) vnode to rdir */
 	int		 pr_ip4s;			/* (p) number of v4 IPs */
 	int		 pr_ip6s;			/* (p) number of v6 IPs */
 	struct in_addr	*pr_ip4;			/* (p) v4 IPs of jail */
 	struct in6_addr	*pr_ip6;			/* (p) v6 IPs of jail */
 	struct prison_racct *pr_prison_racct;		/* (c) racct jail proxy */
 	void		*pr_sparep[3];
 	int		 pr_childcount;			/* (a) number of child jails */
 	int		 pr_childmax;			/* (p) maximum child jails */
 	unsigned	 pr_allow;			/* (p) PR_ALLOW_* flags */
 	int		 pr_securelevel;		/* (p) securelevel */
 	int		 pr_enforce_statfs;		/* (p) statfs permission */
 	int		 pr_devfs_rsnum;		/* (p) devfs ruleset */
 	int		 pr_spare[3];
 	int		 pr_osreldate;			/* (c) kern.osreldate value */
 	unsigned long	 pr_hostid;			/* (p) jail hostid */
 	char		 pr_name[MAXHOSTNAMELEN];	/* (p) admin jail name */
 	char		 pr_path[MAXPATHLEN];		/* (c) chroot path */
 	char		 pr_hostname[MAXHOSTNAMELEN];	/* (p) jail hostname */
 	char		 pr_domainname[MAXHOSTNAMELEN];	/* (p) jail domainname */
 	char		 pr_hostuuid[HOSTUUIDLEN];	/* (p) jail hostuuid */
 	char		 pr_osrelease[OSRELEASELEN];	/* (c) kern.osrelease value */
 };
 
 struct prison_racct {
 	LIST_ENTRY(prison_racct) prr_next;
 	char		prr_name[MAXHOSTNAMELEN];
 	u_int		prr_refcount;
 	struct racct	*prr_racct;
 };
 #endif /* _KERNEL || _WANT_PRISON */
 
 #ifdef _KERNEL
 /* Flag bits set via options */
 #define	PR_PERSIST	0x00000001	/* Can exist without processes */
 #define	PR_HOST		0x00000002	/* Virtualize hostname et al */
 #define	PR_IP4_USER	0x00000004	/* Restrict IPv4 addresses */
 #define	PR_IP6_USER	0x00000008	/* Restrict IPv6 addresses */
 #define	PR_VNET		0x00000010	/* Virtual network stack */
 #define	PR_IP4_SADDRSEL	0x00000080	/* Do IPv4 src addr sel. or use the */
 					/* primary jail address. */
 #define	PR_IP6_SADDRSEL	0x00000100	/* Do IPv6 src addr sel. or use the */
 					/* primary jail address. */
 
 /* Internal flag bits */
 #define	PR_REMOVE	0x01000000	/* In process of being removed */
 #define	PR_IP4		0x02000000	/* IPv4 restricted or disabled */
 					/* by this jail or an ancestor */
 #define	PR_IP6		0x04000000	/* IPv6 restricted or disabled */
 					/* by this jail or an ancestor */
 
 /* Flags for pr_allow */
 #define	PR_ALLOW_SET_HOSTNAME		0x0001
 #define	PR_ALLOW_SYSVIPC		0x0002
 #define	PR_ALLOW_RAW_SOCKETS		0x0004
 #define	PR_ALLOW_CHFLAGS		0x0008
 #define	PR_ALLOW_MOUNT			0x0010
 #define	PR_ALLOW_QUOTAS			0x0020
 #define	PR_ALLOW_SOCKET_AF		0x0040
 #define	PR_ALLOW_MOUNT_DEVFS		0x0080
 #define	PR_ALLOW_MOUNT_NULLFS		0x0100
 #define	PR_ALLOW_MOUNT_ZFS		0x0200
 #define	PR_ALLOW_MOUNT_PROCFS		0x0400
 #define	PR_ALLOW_MOUNT_TMPFS		0x0800
 #define	PR_ALLOW_MOUNT_FDESCFS		0x1000
 #define	PR_ALLOW_MOUNT_LINPROCFS	0x2000
 #define	PR_ALLOW_MOUNT_LINSYSFS		0x4000
 #define	PR_ALLOW_ALL			0x7fff
 
 /*
  * OSD methods
  */
 #define	PR_METHOD_CREATE	0
 #define	PR_METHOD_GET		1
 #define	PR_METHOD_SET		2
 #define	PR_METHOD_CHECK		3
 #define	PR_METHOD_ATTACH	4
 #define	PR_MAXMETHOD		5
 
 /*
  * Lock/unlock a prison.
  * XXX These exist not so much for general convenience, but to be useable in
  *     the FOREACH_PRISON_DESCENDANT_LOCKED macro which can't handle them in
  *     non-function form as currently defined.
  */
 static __inline void
 prison_lock(struct prison *pr)
 {
 
 	mtx_lock(&pr->pr_mtx);
 }
 
 static __inline void
 prison_unlock(struct prison *pr)
 {
 
 	mtx_unlock(&pr->pr_mtx);
 }
 
 /* Traverse a prison's immediate children. */
 #define	FOREACH_PRISON_CHILD(ppr, cpr)					\
 	LIST_FOREACH(cpr, &(ppr)->pr_children, pr_sibling)
 
 /*
  * Preorder traversal of all of a prison's descendants.
  * This ugly loop allows the macro to be followed by a single block
  * as expected in a looping primitive.
  */
 #define	FOREACH_PRISON_DESCENDANT(ppr, cpr, descend)			\
 	for ((cpr) = (ppr), (descend) = 1;				\
 	    ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children))	\
 	      ? LIST_FIRST(&(cpr)->pr_children)				\
 	      : ((cpr) == (ppr)						\
 		 ? NULL							\
 		 : (((descend) = LIST_NEXT(cpr, pr_sibling) != NULL)	\
 		    ? LIST_NEXT(cpr, pr_sibling)			\
 		    : (cpr)->pr_parent))));)				\
 		if (!(descend))						\
 			;						\
 		else
 
 /*
  * As above, but lock descendants on the way down and unlock on the way up.
  */
 #define	FOREACH_PRISON_DESCENDANT_LOCKED(ppr, cpr, descend)		\
 	for ((cpr) = (ppr), (descend) = 1;				\
 	    ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children))	\
 	      ? LIST_FIRST(&(cpr)->pr_children)				\
 	      : ((cpr) == (ppr)						\
 		 ? NULL							\
 		 : ((prison_unlock(cpr),				\
 		    (descend) = LIST_NEXT(cpr, pr_sibling) != NULL)	\
 		    ? LIST_NEXT(cpr, pr_sibling)			\
 		    : (cpr)->pr_parent))));)				\
 		if ((descend) ? (prison_lock(cpr), 0) : 1)		\
 			;						\
 		else
 
 /*
  * As above, but also keep track of the level descended to.
  */
 #define	FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(ppr, cpr, descend, level)\
 	for ((cpr) = (ppr), (descend) = 1, (level) = 0;			\
 	    ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children))	\
 	      ? (level++, LIST_FIRST(&(cpr)->pr_children))		\
 	      : ((cpr) == (ppr)						\
 		 ? NULL							\
 		 : ((prison_unlock(cpr),				\
 		    (descend) = LIST_NEXT(cpr, pr_sibling) != NULL)	\
 		    ? LIST_NEXT(cpr, pr_sibling)			\
 		    : (level--, (cpr)->pr_parent)))));)			\
 		if ((descend) ? (prison_lock(cpr), 0) : 1)		\
 			;						\
 		else
 
 /*
  * Attributes of the physical system, and the root of the jail tree.
  */
 extern struct	prison prison0;
 
 TAILQ_HEAD(prisonlist, prison);
 extern struct	prisonlist allprison;
 extern struct	sx allprison_lock;
 
 /*
  * Sysctls to describe jail parameters.
  */
 SYSCTL_DECL(_security_jail_param);
 
 #define	SYSCTL_JAIL_PARAM(module, param, type, fmt, descr)		\
     SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param,	\
 	(type) | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_param, fmt, descr)
 #define	SYSCTL_JAIL_PARAM_STRING(module, param, access, len, descr)	\
     SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param,	\
 	CTLTYPE_STRING | CTLFLAG_MPSAFE | (access), NULL, len,		\
 	sysctl_jail_param, "A", descr)
 #define	SYSCTL_JAIL_PARAM_STRUCT(module, param, access, len, fmt, descr)\
     SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param,	\
 	CTLTYPE_STRUCT | CTLFLAG_MPSAFE | (access), NULL, len,		\
 	sysctl_jail_param, fmt, descr)
 #define	SYSCTL_JAIL_PARAM_NODE(module, descr)				\
     SYSCTL_NODE(_security_jail_param, OID_AUTO, module, 0, 0, descr)
 #define	SYSCTL_JAIL_PARAM_SUBNODE(parent, module, descr)		\
     SYSCTL_NODE(_security_jail_param_##parent, OID_AUTO, module, 0, 0, descr)
 #define	SYSCTL_JAIL_PARAM_SYS_NODE(module, access, descr)		\
     SYSCTL_JAIL_PARAM_NODE(module, descr);				\
     SYSCTL_JAIL_PARAM(_##module, , CTLTYPE_INT | (access), "E,jailsys",	\
 	descr)
 
 /*
  * Kernel support functions for jail().
  */
 struct ucred;
 struct mount;
 struct sockaddr;
 struct statfs;
 int jailed(struct ucred *cred);
 int jailed_without_vnet(struct ucred *);
 void getcredhostname(struct ucred *, char *, size_t);
 void getcreddomainname(struct ucred *, char *, size_t);
 void getcredhostuuid(struct ucred *, char *, size_t);
 void getcredhostid(struct ucred *, unsigned long *);
 void prison0_init(void);
 int prison_allow(struct ucred *, unsigned);
 int prison_check(struct ucred *cred1, struct ucred *cred2);
 int prison_owns_vnet(struct ucred *);
 int prison_canseemount(struct ucred *cred, struct mount *mp);
 void prison_enforce_statfs(struct ucred *cred, struct mount *mp,
     struct statfs *sp);
 struct prison *prison_find(int prid);
 struct prison *prison_find_child(struct prison *, int);
 struct prison *prison_find_name(struct prison *, const char *);
 int prison_flag(struct ucred *, unsigned);
 void prison_free(struct prison *pr);
 void prison_free_locked(struct prison *pr);
 void prison_hold(struct prison *pr);
 void prison_hold_locked(struct prison *pr);
 void prison_proc_hold(struct prison *);
 void prison_proc_free(struct prison *);
 int prison_ischild(struct prison *, struct prison *);
 int prison_equal_ip4(struct prison *, struct prison *);
 int prison_get_ip4(struct ucred *cred, struct in_addr *ia);
 int prison_local_ip4(struct ucred *cred, struct in_addr *ia);
 int prison_remote_ip4(struct ucred *cred, struct in_addr *ia);
 int prison_check_ip4(const struct ucred *, const struct in_addr *);
 int prison_saddrsel_ip4(struct ucred *, struct in_addr *);
 #ifdef INET6
 int prison_equal_ip6(struct prison *, struct prison *);
 int prison_get_ip6(struct ucred *, struct in6_addr *);
 int prison_local_ip6(struct ucred *, struct in6_addr *, int);
 int prison_remote_ip6(struct ucred *, struct in6_addr *);
 int prison_check_ip6(struct ucred *, struct in6_addr *);
 int prison_saddrsel_ip6(struct ucred *, struct in6_addr *);
 #endif
 int prison_check_af(struct ucred *cred, int af);
 int prison_if(struct ucred *cred, struct sockaddr *sa);
 char *prison_name(struct prison *, struct prison *);
 int prison_priv_check(struct ucred *cred, int priv);
 int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
 void prison_racct_foreach(void (*callback)(struct racct *racct,
-    void *arg2, void *arg3), void *arg2, void *arg3);
+    void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+    void *arg2, void *arg3);
 struct prison_racct *prison_racct_find(const char *name);
 void prison_racct_hold(struct prison_racct *prr);
 void prison_racct_free(struct prison_racct *prr);
 
 #endif /* _KERNEL */
 #endif /* !_SYS_JAIL_H_ */
Index: projects/powernv/sys/loginclass.h
===================================================================
--- projects/powernv/sys/loginclass.h	(revision 290990)
+++ projects/powernv/sys/loginclass.h	(revision 290991)
@@ -1,53 +1,54 @@
 /*-
  * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_LOGINCLASS_H_
 #define	_SYS_LOGINCLASS_H_
 
 struct racct;
 
 /*
  * Exactly one of these structures exists per login class.
  */
 struct loginclass {
 	LIST_ENTRY(loginclass)	lc_next;
 	char			lc_name[MAXLOGNAME];
 	u_int			lc_refcount;
 	struct racct		*lc_racct;
 };
 
 void	loginclass_hold(struct loginclass *lc);
 void	loginclass_free(struct loginclass *lc);
 struct loginclass	*loginclass_find(const char *name);
 void	loginclass_racct_foreach(void (*callback)(struct racct *racct,
-	    void *arg2, void *arg3), void *arg2, void *arg3);
+	    void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+	    void *arg2, void *arg3);
 
 #endif /* !_SYS_LOGINCLASS_H_ */
Index: projects/powernv/sys/pmc.h
===================================================================
--- projects/powernv/sys/pmc.h	(revision 290990)
+++ projects/powernv/sys/pmc.h	(revision 290991)
@@ -1,1158 +1,1159 @@
 /*-
  * Copyright (c) 2003-2008, Joseph Koshy
  * Copyright (c) 2007 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_PMC_H_
 #define	_SYS_PMC_H_
 
 #include <dev/hwpmc/pmc_events.h>
 
 #include <machine/pmc_mdep.h>
 #include <machine/profile.h>
 
 #define	PMC_MODULE_NAME		"hwpmc"
 #define	PMC_NAME_MAX		64 /* HW counter name size */
 #define	PMC_CLASS_MAX		8  /* max #classes of PMCs per-system */
 
 /*
  * Kernel<->userland API version number [MMmmpppp]
  *
  * Major numbers are to be incremented when an incompatible change to
  * the ABI occurs that older clients will not be able to handle.
  *
  * Minor numbers are incremented when a backwards compatible change
  * occurs that allows older correct programs to run unchanged.  For
  * example, when support for a new PMC type is added.
  *
  * The patch version is incremented for every bug fix.
  */
 #define	PMC_VERSION_MAJOR	0x03
 #define	PMC_VERSION_MINOR	0x01
 #define	PMC_VERSION_PATCH	0x0000
 
 #define	PMC_VERSION		(PMC_VERSION_MAJOR << 24 |		\
 	PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH)
 
 /*
  * Kinds of CPUs known.
  *
  * We keep track of CPU variants that need to be distinguished in
  * some way for PMC operations.  CPU names are grouped by manufacturer
  * and numbered sparsely in order to minimize changes to the ABI involved
  * when new CPUs are added.
  */
 
 #define	__PMC_CPUS()						\
 	__PMC_CPU(AMD_K7,	0x00,	"AMD K7")		\
 	__PMC_CPU(AMD_K8,	0x01,	"AMD K8")		\
 	__PMC_CPU(INTEL_P5,	0x80,	"Intel Pentium")	\
 	__PMC_CPU(INTEL_P6,	0x81,	"Intel Pentium Pro")	\
 	__PMC_CPU(INTEL_CL,	0x82,	"Intel Celeron")	\
 	__PMC_CPU(INTEL_PII,	0x83,	"Intel Pentium II")	\
 	__PMC_CPU(INTEL_PIII,	0x84,	"Intel Pentium III")	\
 	__PMC_CPU(INTEL_PM,	0x85,	"Intel Pentium M")	\
 	__PMC_CPU(INTEL_PIV,	0x86,	"Intel Pentium IV")	\
 	__PMC_CPU(INTEL_CORE,	0x87,	"Intel Core Solo/Duo")	\
 	__PMC_CPU(INTEL_CORE2,	0x88,	"Intel Core2")		\
 	__PMC_CPU(INTEL_CORE2EXTREME,	0x89,	"Intel Core2 Extreme")	\
 	__PMC_CPU(INTEL_ATOM,	0x8A,	"Intel Atom")		\
 	__PMC_CPU(INTEL_COREI7, 0x8B,   "Intel Core i7")	\
 	__PMC_CPU(INTEL_WESTMERE, 0x8C,   "Intel Westmere")	\
 	__PMC_CPU(INTEL_SANDYBRIDGE, 0x8D,   "Intel Sandy Bridge")	\
 	__PMC_CPU(INTEL_IVYBRIDGE, 0x8E,   "Intel Ivy Bridge")	\
 	__PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F,   "Intel Sandy Bridge Xeon")	\
 	__PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90,   "Intel Ivy Bridge Xeon")	\
 	__PMC_CPU(INTEL_HASWELL, 0x91,   "Intel Haswell")	\
 	__PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92,	"Intel Atom Silvermont")    \
 	__PMC_CPU(INTEL_NEHALEM_EX, 0x93,   "Intel Nehalem Xeon 7500")	\
 	__PMC_CPU(INTEL_WESTMERE_EX, 0x94,   "Intel Westmere Xeon E7")	\
 	__PMC_CPU(INTEL_HASWELL_XEON, 0x95,   "Intel Haswell Xeon E5 v3") \
 	__PMC_CPU(INTEL_BROADWELL, 0x96,   "Intel Broadwell") \
 	__PMC_CPU(INTEL_XSCALE,	0x100,	"Intel XScale")		\
 	__PMC_CPU(MIPS_24K,     0x200,  "MIPS 24K")		\
 	__PMC_CPU(MIPS_OCTEON,  0x201,  "Cavium Octeon")	\
 	__PMC_CPU(MIPS_74K,     0x202,  "MIPS 74K")		\
 	__PMC_CPU(PPC_7450,     0x300,  "PowerPC MPC7450")	\
 	__PMC_CPU(PPC_E500,     0x340,  "PowerPC e500 Core")	\
 	__PMC_CPU(PPC_MPC85XX,  0x340,  "Freescale PowerPC MPC85XX")	\
 	__PMC_CPU(PPC_970,      0x380,  "IBM PowerPC 970")	\
 	__PMC_CPU(GENERIC, 	0x400,  "Generic")		\
 	__PMC_CPU(ARMV7_CORTEX_A5,	0x500,	"ARMv7 Cortex A5")	\
 	__PMC_CPU(ARMV7_CORTEX_A7,	0x501,	"ARMv7 Cortex A7")	\
 	__PMC_CPU(ARMV7_CORTEX_A8,	0x502,	"ARMv7 Cortex A8")	\
 	__PMC_CPU(ARMV7_CORTEX_A9,	0x503,	"ARMv7 Cortex A9")	\
 	__PMC_CPU(ARMV7_CORTEX_A15,	0x504,	"ARMv7 Cortex A15")	\
 	__PMC_CPU(ARMV7_CORTEX_A17,	0x505,	"ARMv7 Cortex A17")	\
 	__PMC_CPU(ARMV8_CORTEX_A53,	0x600,	"ARMv8 Cortex A53")	\
 	__PMC_CPU(ARMV8_CORTEX_A57,	0x601,	"ARMv8 Cortex A57")
 
 enum pmc_cputype {
 #undef	__PMC_CPU
 #define	__PMC_CPU(S,V,D)	PMC_CPU_##S = V,
 	__PMC_CPUS()
 };
 
 #define	PMC_CPU_FIRST	PMC_CPU_AMD_K7
 #define	PMC_CPU_LAST	PMC_CPU_GENERIC
 
 /*
  * Classes of PMCs
  */
 
 #define	__PMC_CLASSES()							\
 	__PMC_CLASS(TSC,	0x00,	"CPU Timestamp counter")	\
 	__PMC_CLASS(K7,		0x01,	"AMD K7 performance counters")	\
 	__PMC_CLASS(K8,		0x02,	"AMD K8 performance counters")	\
 	__PMC_CLASS(P5,		0x03,	"Intel Pentium counters")	\
 	__PMC_CLASS(P6,		0x04,	"Intel Pentium Pro counters")	\
 	__PMC_CLASS(P4,		0x05,	"Intel Pentium-IV counters")	\
 	__PMC_CLASS(IAF,	0x06,	"Intel Core2/Atom, fixed function") \
 	__PMC_CLASS(IAP,	0x07,	"Intel Core...Atom, programmable") \
 	__PMC_CLASS(UCF,	0x08,	"Intel Uncore fixed function")	\
 	__PMC_CLASS(UCP,	0x09,	"Intel Uncore programmable")	\
 	__PMC_CLASS(XSCALE,	0x0A,	"Intel XScale counters")	\
 	__PMC_CLASS(MIPS24K,	0x0B,	"MIPS 24K")			\
 	__PMC_CLASS(OCTEON,	0x0C,	"Cavium Octeon")		\
 	__PMC_CLASS(PPC7450,	0x0D,	"Motorola MPC7450 class")	\
 	__PMC_CLASS(PPC970,	0x0E,	"IBM PowerPC 970 class")	\
 	__PMC_CLASS(SOFT,	0x0F,	"Software events")		\
 	__PMC_CLASS(ARMV7,	0x10,	"ARMv7")			\
 	__PMC_CLASS(ARMV8,	0x11,	"ARMv8")			\
 	__PMC_CLASS(MIPS74K,	0x12,	"MIPS 74K")			\
 	__PMC_CLASS(E500,	0x13,	"Freescale e500 class")
 
 enum pmc_class {
 #undef  __PMC_CLASS
 #define	__PMC_CLASS(S,V,D)	PMC_CLASS_##S = V,
 	__PMC_CLASSES()
 };
 
 #define	PMC_CLASS_FIRST	PMC_CLASS_TSC
 #define	PMC_CLASS_LAST	PMC_CLASS_E500
 
 /*
  * A PMC can be in the following states:
  *
  * Hardware states:
  *   DISABLED   -- administratively prohibited from being used.
  *   FREE       -- HW available for use
  * Software states:
  *   ALLOCATED  -- allocated
  *   STOPPED    -- allocated, but not counting events
  *   RUNNING    -- allocated, and in operation; 'pm_runcount'
  *                 holds the number of CPUs using this PMC at
  *                 a given instant
  *   DELETED    -- being destroyed
  */
 
 #define	__PMC_HWSTATES()			\
 	__PMC_STATE(DISABLED)			\
 	__PMC_STATE(FREE)
 
 #define	__PMC_SWSTATES()			\
 	__PMC_STATE(ALLOCATED)			\
 	__PMC_STATE(STOPPED)			\
 	__PMC_STATE(RUNNING)			\
 	__PMC_STATE(DELETED)
 
 #define	__PMC_STATES()				\
 	__PMC_HWSTATES()			\
 	__PMC_SWSTATES()
 
 enum pmc_state {
 #undef	__PMC_STATE
 #define	__PMC_STATE(S)	PMC_STATE_##S,
 	__PMC_STATES()
 	__PMC_STATE(MAX)
 };
 
 #define	PMC_STATE_FIRST	PMC_STATE_DISABLED
 #define	PMC_STATE_LAST	PMC_STATE_DELETED
 
 /*
  * An allocated PMC may used as a 'global' counter or as a
  * 'thread-private' one.  Each such mode of use can be in either
  * statistical sampling mode or in counting mode.  Thus a PMC in use
  *
  * SS i.e., SYSTEM STATISTICAL  -- system-wide statistical profiling
  * SC i.e., SYSTEM COUNTER      -- system-wide counting mode
  * TS i.e., THREAD STATISTICAL  -- thread virtual, statistical profiling
  * TC i.e., THREAD COUNTER      -- thread virtual, counting mode
  *
  * Statistical profiling modes rely on the PMC periodically delivering
  * a interrupt to the CPU (when the configured number of events have
  * been measured), so the PMC must have the ability to generate
  * interrupts.
  *
  * In counting modes, the PMC counts its configured events, with the
  * value of the PMC being read whenever needed by its owner process.
  *
  * The thread specific modes "virtualize" the PMCs -- the PMCs appear
  * to be thread private and count events only when the profiled thread
  * actually executes on the CPU.
  *
  * The system-wide "global" modes keep the PMCs running all the time
  * and are used to measure the behaviour of the whole system.
  */
 
 #define	__PMC_MODES()				\
 	__PMC_MODE(SS,	0)			\
 	__PMC_MODE(SC,	1)			\
 	__PMC_MODE(TS,	2)			\
 	__PMC_MODE(TC,	3)
 
 enum pmc_mode {
 #undef	__PMC_MODE
 #define	__PMC_MODE(M,N)	PMC_MODE_##M = N,
 	__PMC_MODES()
 };
 
 #define	PMC_MODE_FIRST	PMC_MODE_SS
 #define	PMC_MODE_LAST	PMC_MODE_TC
 
 #define	PMC_IS_COUNTING_MODE(mode)				\
 	((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC)
 #define	PMC_IS_SYSTEM_MODE(mode)				\
 	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC)
 #define	PMC_IS_SAMPLING_MODE(mode)				\
 	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS)
 #define	PMC_IS_VIRTUAL_MODE(mode)				\
 	((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC)
 
 /*
  * PMC row disposition
  */
 
 #define	__PMC_DISPOSITIONS(N)					\
 	__PMC_DISP(STANDALONE)	/* global/disabled counters */	\
 	__PMC_DISP(FREE)	/* free/available */		\
 	__PMC_DISP(THREAD)	/* thread-virtual PMCs */	\
 	__PMC_DISP(UNKNOWN)	/* sentinel */
 
 enum pmc_disp {
 #undef	__PMC_DISP
 #define	__PMC_DISP(D)	PMC_DISP_##D ,
 	__PMC_DISPOSITIONS()
 };
 
 #define	PMC_DISP_FIRST	PMC_DISP_STANDALONE
 #define	PMC_DISP_LAST	PMC_DISP_THREAD
 
 /*
  * Counter capabilities
  *
  * __PMC_CAPS(NAME, VALUE, DESCRIPTION)
  */
 
 #define	__PMC_CAPS()							\
 	__PMC_CAP(INTERRUPT,	0, "generate interrupts")		\
 	__PMC_CAP(USER,		1, "count user-mode events")		\
 	__PMC_CAP(SYSTEM,	2, "count system-mode events")		\
 	__PMC_CAP(EDGE,		3, "do edge detection of events")	\
 	__PMC_CAP(THRESHOLD,	4, "ignore events below a threshold")	\
 	__PMC_CAP(READ,		5, "read PMC counter")			\
 	__PMC_CAP(WRITE,	6, "reprogram PMC counter")		\
 	__PMC_CAP(INVERT,	7, "invert comparision sense")		\
 	__PMC_CAP(QUALIFIER,	8, "further qualify monitored events")	\
 	__PMC_CAP(PRECISE,	9, "perform precise sampling")		\
 	__PMC_CAP(TAGGING,	10, "tag upstream events")		\
 	__PMC_CAP(CASCADE,	11, "cascade counters")
 
 enum pmc_caps
 {
 #undef	__PMC_CAP
 #define	__PMC_CAP(NAME, VALUE, DESCR)	PMC_CAP_##NAME = (1 << VALUE) ,
 	__PMC_CAPS()
 };
 
 #define	PMC_CAP_FIRST		PMC_CAP_INTERRUPT
 #define	PMC_CAP_LAST		PMC_CAP_CASCADE
 
 /*
  * PMC Event Numbers
  *
  * These are generated from the definitions in "dev/hwpmc/pmc_events.h".
  */
 
 enum pmc_event {
 #undef	__PMC_EV
 #undef	__PMC_EV_BLOCK
 #define	__PMC_EV_BLOCK(C,V)	PMC_EV_ ## C ## __BLOCK_START = (V) - 1 ,
 #define	__PMC_EV(C,N)		PMC_EV_ ## C ## _ ## N ,
 	__PMC_EVENTS()
 };
 
 /*
  * PMC SYSCALL INTERFACE
  */
 
 /*
  * "PMC_OPS" -- these are the commands recognized by the kernel
  * module, and are used when performing a system call from userland.
  */
 #define	__PMC_OPS()							\
 	__PMC_OP(CONFIGURELOG, "Set log file")				\
 	__PMC_OP(FLUSHLOG, "Flush log file")				\
 	__PMC_OP(GETCPUINFO, "Get system CPU information")		\
 	__PMC_OP(GETDRIVERSTATS, "Get driver statistics")		\
 	__PMC_OP(GETMODULEVERSION, "Get module version")		\
 	__PMC_OP(GETPMCINFO, "Get per-cpu PMC information")		\
 	__PMC_OP(PMCADMIN, "Set PMC state")				\
 	__PMC_OP(PMCALLOCATE, "Allocate and configure a PMC")		\
 	__PMC_OP(PMCATTACH, "Attach a PMC to a process")		\
 	__PMC_OP(PMCDETACH, "Detach a PMC from a process")		\
 	__PMC_OP(PMCGETMSR, "Get a PMC's hardware address")		\
 	__PMC_OP(PMCRELEASE, "Release a PMC")				\
 	__PMC_OP(PMCRW, "Read/Set a PMC")				\
 	__PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate")	\
 	__PMC_OP(PMCSTART, "Start a PMC")				\
 	__PMC_OP(PMCSTOP, "Stop a PMC")					\
 	__PMC_OP(WRITELOG, "Write a cookie to the log file")		\
 	__PMC_OP(CLOSELOG, "Close log file")				\
 	__PMC_OP(GETDYNEVENTINFO, "Get dynamic events list")
 
 
 enum pmc_ops {
 #undef	__PMC_OP
 #define	__PMC_OP(N, D)	PMC_OP_##N,
 	__PMC_OPS()
 };
 
 
 /*
  * Flags used in operations on PMCs.
  */
 
 #define	PMC_F_FORCE		0x00000001 /*OP ADMIN force operation */
 #define	PMC_F_DESCENDANTS	0x00000002 /*OP ALLOCATE track descendants */
 #define	PMC_F_LOG_PROCCSW	0x00000004 /*OP ALLOCATE track ctx switches */
 #define	PMC_F_LOG_PROCEXIT	0x00000008 /*OP ALLOCATE log proc exits */
 #define	PMC_F_NEWVALUE		0x00000010 /*OP RW write new value */
 #define	PMC_F_OLDVALUE		0x00000020 /*OP RW get old value */
 #define	PMC_F_KGMON		0x00000040 /*OP ALLOCATE kgmon(8) profiling */
 /* V2 API */
 #define	PMC_F_CALLCHAIN		0x00000080 /*OP ALLOCATE capture callchains */
 
 /* internal flags */
 #define	PMC_F_ATTACHED_TO_OWNER	0x00010000 /*attached to owner*/
 #define	PMC_F_NEEDS_LOGFILE	0x00020000 /*needs log file */
 #define	PMC_F_ATTACH_DONE	0x00040000 /*attached at least once */
 
 #define	PMC_CALLCHAIN_DEPTH_MAX	128
 
 #define	PMC_CC_F_USERSPACE	0x01	   /*userspace callchain*/
 
 /*
  * Cookies used to denote allocated PMCs, and the values of PMCs.
  */
 
 typedef uint32_t	pmc_id_t;
 typedef uint64_t	pmc_value_t;
 
 #define	PMC_ID_INVALID		(~ (pmc_id_t) 0)
 
 /*
  * PMC IDs have the following format:
  *
  * +--------+----------+-----------+-----------+
  * |   CPU  | PMC MODE | PMC CLASS | ROW INDEX |
  * +--------+----------+-----------+-----------+
  *
  * where each field is 8 bits wide.  Field 'CPU' is set to the
  * requested CPU for system-wide PMCs or PMC_CPU_ANY for process-mode
  * PMCs.  Field 'PMC MODE' is the allocated PMC mode.  Field 'PMC
  * CLASS' is the class of the PMC.  Field 'ROW INDEX' is the row index
  * for the PMC.
  *
  * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total
  * number of hardware PMCs on this cpu.
  */
 
 
 #define	PMC_ID_TO_ROWINDEX(ID)	((ID) & 0xFF)
 #define	PMC_ID_TO_CLASS(ID)	(((ID) & 0xFF00) >> 8)
 #define	PMC_ID_TO_MODE(ID)	(((ID) & 0xFF0000) >> 16)
 #define	PMC_ID_TO_CPU(ID)	(((ID) & 0xFF000000) >> 24)
 #define	PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX)			\
 	((((CPU) & 0xFF) << 24) | (((MODE) & 0xFF) << 16) |	\
 	(((CLASS) & 0xFF) << 8) | ((ROWINDEX) & 0xFF))
 
 /*
  * Data structures for system calls supported by the pmc driver.
  */
 
 /*
  * OP PMCALLOCATE
  *
  * Allocate a PMC on the named CPU.
  */
 
 #define	PMC_CPU_ANY	~0
 
 struct pmc_op_pmcallocate {
 	uint32_t	pm_caps;	/* PMC_CAP_* */
 	uint32_t	pm_cpu;		/* CPU number or PMC_CPU_ANY */
 	enum pmc_class	pm_class;	/* class of PMC desired */
 	enum pmc_event	pm_ev;		/* [enum pmc_event] desired */
 	uint32_t	pm_flags;	/* additional modifiers PMC_F_* */
 	enum pmc_mode	pm_mode;	/* desired mode */
 	pmc_id_t	pm_pmcid;	/* [return] process pmc id */
 
 	union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */
 };
 
 /*
  * OP PMCADMIN
  *
  * Set the administrative state (i.e., whether enabled or disabled) of
  * a PMC 'pm_pmc' on CPU 'pm_cpu'.  Note that 'pm_pmc' specifies an
  * absolute PMC number and need not have been first allocated by the
  * calling process.
  */
 
 struct pmc_op_pmcadmin {
 	int		pm_cpu;		/* CPU# */
 	uint32_t	pm_flags;	/* flags */
 	int		pm_pmc;         /* PMC# */
 	enum pmc_state  pm_state;	/* desired state */
 };
 
 /*
  * OP PMCATTACH / OP PMCDETACH
  *
  * Attach/detach a PMC and a process.
  */
 
 struct pmc_op_pmcattach {
 	pmc_id_t	pm_pmc;		/* PMC to attach to */
 	pid_t		pm_pid;		/* target process */
 };
 
 /*
  * OP PMCSETCOUNT
  *
  * Set the sampling rate (i.e., the reload count) for statistical counters.
  * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE.
  */
 
 struct pmc_op_pmcsetcount {
 	pmc_value_t	pm_count;	/* initial/sample count */
 	pmc_id_t	pm_pmcid;	/* PMC id to set */
 };
 
 
 /*
  * OP PMCRW
  *
  * Read the value of a PMC named by 'pm_pmcid'.  'pm_pmcid' needs
  * to have been previously allocated using PMCALLOCATE.
  */
 
 
 struct pmc_op_pmcrw {
 	uint32_t	pm_flags;	/* PMC_F_{OLD,NEW}VALUE*/
 	pmc_id_t	pm_pmcid;	/* pmc id */
 	pmc_value_t	pm_value;	/* new&returned value */
 };
 
 
 /*
  * OP GETPMCINFO
  *
  * retrieve PMC state for a named CPU.  The caller is expected to
  * allocate 'npmc' * 'struct pmc_info' bytes of space for the return
  * values.
  */
 
 struct pmc_info {
 	char		pm_name[PMC_NAME_MAX]; /* pmc name */
 	enum pmc_class	pm_class;	/* enum pmc_class */
 	int		pm_enabled;	/* whether enabled */
 	enum pmc_disp	pm_rowdisp;	/* FREE, THREAD or STANDLONE */
 	pid_t		pm_ownerpid;	/* owner, or -1 */
 	enum pmc_mode	pm_mode;	/* current mode [enum pmc_mode] */
 	enum pmc_event	pm_event;	/* current event */
 	uint32_t	pm_flags;	/* current flags */
 	pmc_value_t	pm_reloadcount;	/* sampling counters only */
 };
 
 struct pmc_op_getpmcinfo {
 	int32_t		pm_cpu;		/* 0 <= cpu < mp_maxid */
 	struct pmc_info	pm_pmcs[];	/* space for 'npmc' structures */
 };
 
 
 /*
  * OP GETCPUINFO
  *
  * Retrieve system CPU information.
  */
 
 
 struct pmc_classinfo {
 	enum pmc_class	pm_class;	/* class id */
 	uint32_t	pm_caps;	/* counter capabilities */
 	uint32_t	pm_width;	/* width of the PMC */
 	uint32_t	pm_num;		/* number of PMCs in class */
 };
 
 struct pmc_op_getcpuinfo {
 	enum pmc_cputype pm_cputype; /* what kind of CPU */
 	uint32_t	pm_ncpu;    /* max CPU number */
 	uint32_t	pm_npmc;    /* #PMCs per CPU */
 	uint32_t	pm_nclass;  /* #classes of PMCs */
 	struct pmc_classinfo  pm_classes[PMC_CLASS_MAX];
 };
 
 /*
  * OP CONFIGURELOG
  *
  * Configure a log file for writing system-wide statistics to.
  */
 
 struct pmc_op_configurelog {
 	int		pm_flags;
 	int		pm_logfd;   /* logfile fd (or -1) */
 };
 
 /*
  * OP GETDRIVERSTATS
  *
  * Retrieve pmc(4) driver-wide statistics.
  */
 
 struct pmc_op_getdriverstats {
-	int	pm_intr_ignored;	/* #interrupts ignored */
-	int	pm_intr_processed;	/* #interrupts processed */
-	int	pm_intr_bufferfull;	/* #interrupts with ENOSPC */
-	int	pm_syscalls;		/* #syscalls */
-	int	pm_syscall_errors;	/* #syscalls with errors */
-	int	pm_buffer_requests;	/* #buffer requests */
-	int	pm_buffer_requests_failed; /* #failed buffer requests */
-	int	pm_log_sweeps;		/* #sample buffer processing passes */
+	unsigned int	pm_intr_ignored;	/* #interrupts ignored */
+	unsigned int	pm_intr_processed;	/* #interrupts processed */
+	unsigned int	pm_intr_bufferfull;	/* #interrupts with ENOSPC */
+	unsigned int	pm_syscalls;		/* #syscalls */
+	unsigned int	pm_syscall_errors;	/* #syscalls with errors */
+	unsigned int	pm_buffer_requests;	/* #buffer requests */
+	unsigned int	pm_buffer_requests_failed; /* #failed buffer requests */
+	unsigned int	pm_log_sweeps;		/* #sample buffer processing
+						   passes */
 };
 
 /*
  * OP RELEASE / OP START / OP STOP
  *
  * Simple operations on a PMC id.
  */
 
 struct pmc_op_simple {
 	pmc_id_t	pm_pmcid;
 };
 
 /*
  * OP WRITELOG
  *
  * Flush the current log buffer and write 4 bytes of user data to it.
  */
 
 struct pmc_op_writelog {
 	uint32_t	pm_userdata;
 };
 
 /*
  * OP GETMSR
  *
  * Retrieve the machine specific address assoicated with the allocated
  * PMC.  This number can be used subsequently with a read-performance-counter
  * instruction.
  */
 
 struct pmc_op_getmsr {
 	uint32_t	pm_msr;		/* machine specific address */
 	pmc_id_t	pm_pmcid;	/* allocated pmc id */
 };
 
 /*
  * OP GETDYNEVENTINFO
  *
  * Retrieve a PMC dynamic class events list.
  */
 
 struct pmc_dyn_event_descr {
 	char		pm_ev_name[PMC_NAME_MAX];
 	enum pmc_event	pm_ev_code;
 };
 
 struct pmc_op_getdyneventinfo {
 	enum pmc_class			pm_class;
 	unsigned int			pm_nevent;
 	struct pmc_dyn_event_descr	pm_events[PMC_EV_DYN_COUNT];
 };
 
 #ifdef _KERNEL
 
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/_cpuset.h>
 
 #include <machine/frame.h>
 
 #define	PMC_HASH_SIZE				1024
 #define	PMC_MTXPOOL_SIZE			2048
 #define	PMC_LOG_BUFFER_SIZE			4
 #define	PMC_NLOGBUFFERS				1024
 #define	PMC_NSAMPLES				1024
 #define	PMC_CALLCHAIN_DEPTH			32
 
 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
 
 /*
  * Locking keys
  *
  * (b) - pmc_bufferlist_mtx (spin lock)
  * (k) - pmc_kthread_mtx (sleep lock)
  * (o) - po->po_mtx (spin lock)
  */
 
 /*
  * PMC commands
  */
 
 struct pmc_syscall_args {
 	register_t	pmop_code;	/* one of PMC_OP_* */
 	void		*pmop_data;	/* syscall parameter */
 };
 
 /*
  * Interface to processor specific s1tuff
  */
 
 /*
  * struct pmc_descr
  *
  * Machine independent (i.e., the common parts) of a human readable
  * PMC description.
  */
 
 struct pmc_descr {
 	char		pd_name[PMC_NAME_MAX]; /* name */
 	uint32_t	pd_caps;	/* capabilities */
 	enum pmc_class	pd_class;	/* class of the PMC */
 	uint32_t	pd_width;	/* width in bits */
 };
 
 /*
  * struct pmc_target
  *
  * This structure records all the target processes associated with a
  * PMC.
  */
 
 struct pmc_target {
 	LIST_ENTRY(pmc_target)	pt_next;
 	struct pmc_process	*pt_process; /* target descriptor */
 };
 
 /*
  * struct pmc
  *
  * Describes each allocated PMC.
  *
  * Each PMC has precisely one owner, namely the process that allocated
  * the PMC.
  *
  * A PMC may be attached to multiple target processes.  The
  * 'pm_targets' field links all the target processes being monitored
  * by this PMC.
  *
  * The 'pm_savedvalue' field is protected by a mutex.
  *
  * On a multi-cpu machine, multiple target threads associated with a
  * process-virtual PMC could be concurrently executing on different
  * CPUs.  The 'pm_runcount' field is atomically incremented every time
  * the PMC gets scheduled on a CPU and atomically decremented when it
  * get descheduled.  Deletion of a PMC is only permitted when this
  * field is '0'.
  *
  */
 
 struct pmc {
 	LIST_HEAD(,pmc_target)	pm_targets;	/* list of target processes */
 	LIST_ENTRY(pmc)		pm_next;	/* owner's list */
 
 	/*
 	 * System-wide PMCs are allocated on a CPU and are not moved
 	 * around.  For system-wide PMCs we record the CPU the PMC was
 	 * allocated on in the 'CPU' field of the pmc ID.
 	 *
 	 * Virtual PMCs run on whichever CPU is currently executing
 	 * their targets' threads.  For these PMCs we need to save
 	 * their current PMC counter values when they are taken off
 	 * CPU.
 	 */
 
 	union {
 		pmc_value_t	pm_savedvalue;	/* Virtual PMCS */
 	} pm_gv;
 
 	/*
 	 * For sampling mode PMCs, we keep track of the PMC's "reload
 	 * count", which is the counter value to be loaded in when
 	 * arming the PMC for the next counting session.  For counting
 	 * modes on PMCs that are read-only (e.g., the x86 TSC), we
 	 * keep track of the initial value at the start of
 	 * counting-mode operation.
 	 */
 
 	union {
 		pmc_value_t	pm_reloadcount;	/* sampling PMC modes */
 		pmc_value_t	pm_initial;	/* counting PMC modes */
 	} pm_sc;
 
 	volatile cpuset_t pm_stalled;	/* marks stalled sampling PMCs */
 	volatile cpuset_t pm_cpustate;	/* CPUs where PMC should be active */
 	uint32_t	pm_caps;	/* PMC capabilities */
 	enum pmc_event	pm_event;	/* event being measured */
 	uint32_t	pm_flags;	/* additional flags PMC_F_... */
 	struct pmc_owner *pm_owner;	/* owner thread state */
 	int		pm_runcount;	/* #cpus currently on */
 	enum pmc_state	pm_state;	/* current PMC state */
 
 	/*
 	 * The PMC ID field encodes the row-index for the PMC, its
 	 * mode, class and the CPU# associated with the PMC.
 	 */
 
 	pmc_id_t	pm_id;		/* allocated PMC id */
 
 	/* md extensions */
 	union pmc_md_pmc	pm_md;
 };
 
 /*
  * Accessor macros for 'struct pmc'
  */
 
 #define	PMC_TO_MODE(P)		PMC_ID_TO_MODE((P)->pm_id)
 #define	PMC_TO_CLASS(P)		PMC_ID_TO_CLASS((P)->pm_id)
 #define	PMC_TO_ROWINDEX(P)	PMC_ID_TO_ROWINDEX((P)->pm_id)
 #define	PMC_TO_CPU(P)		PMC_ID_TO_CPU((P)->pm_id)
 
 
 /*
  * struct pmc_process
  *
  * Record a 'target' process being profiled.
  *
  * The target process being profiled could be different from the owner
  * process which allocated the PMCs.  Each target process descriptor
  * is associated with NHWPMC 'struct pmc *' pointers.  Each PMC at a
  * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]'
  * array.  The size of this structure is thus PMC architecture
  * dependent.
  *
  */
 
 struct pmc_targetstate {
 	struct pmc	*pp_pmc;   /* target PMC */
 	pmc_value_t	pp_pmcval; /* per-process value */
 };
 
 struct pmc_process {
 	LIST_ENTRY(pmc_process) pp_next;	/* hash chain */
 	int		pp_refcnt;		/* reference count */
 	uint32_t	pp_flags;		/* flags PMC_PP_* */
 	struct proc	*pp_proc;		/* target thread */
 	struct pmc_targetstate pp_pmcs[];       /* NHWPMCs */
 };
 
 #define	PMC_PP_ENABLE_MSR_ACCESS	0x00000001
 
 /*
  * struct pmc_owner
  *
  * We associate a PMC with an 'owner' process.
  *
  * A process can be associated with 0..NCPUS*NHWPMC PMCs during its
  * lifetime, where NCPUS is the numbers of CPUS in the system and
  * NHWPMC is the number of hardware PMCs per CPU.  These are
  * maintained in the list headed by the 'po_pmcs' to save on space.
  *
  */
 
 struct pmc_owner  {
 	LIST_ENTRY(pmc_owner)	po_next;	/* hash chain */
 	LIST_ENTRY(pmc_owner)	po_ssnext;	/* list of SS PMC owners */
 	LIST_HEAD(, pmc)	po_pmcs;	/* owned PMC list */
 	TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */
 	struct mtx		po_mtx;		/* spin lock for (o) */
 	struct proc		*po_owner;	/* owner proc */
 	uint32_t		po_flags;	/* (k) flags PMC_PO_* */
 	struct proc		*po_kthread;	/* (k) helper kthread */
 	struct pmclog_buffer	*po_curbuf;	/* current log buffer */
 	struct file		*po_file;	/* file reference */
 	int			po_error;	/* recorded error */
 	short			po_sscount;	/* # SS PMCs owned */
 	short			po_logprocmaps;	/* global mappings done */
 };
 
 #define	PMC_PO_OWNS_LOGFILE		0x00000001 /* has a log file */
 #define	PMC_PO_SHUTDOWN			0x00000010 /* in the process of shutdown */
 #define	PMC_PO_INITIAL_MAPPINGS_DONE	0x00000020
 
 /*
  * struct pmc_hw -- describe the state of the PMC hardware
  *
  * When in use, a HW PMC is associated with one allocated 'struct pmc'
  * pointed to by field 'phw_pmc'.  When inactive, this field is NULL.
  *
  * On an SMP box, one or more HW PMC's in process virtual mode with
  * the same 'phw_pmc' could be executing on different CPUs.  In order
  * to handle this case correctly, we need to ensure that only
  * incremental counts get added to the saved value in the associated
  * 'struct pmc'.  The 'phw_save' field is used to keep the saved PMC
  * value at the time the hardware is started during this context
  * switch (i.e., the difference between the new (hardware) count and
  * the saved count is atomically added to the count field in 'struct
  * pmc' at context switch time).
  *
  */
 
 struct pmc_hw {
 	uint32_t	phw_state;	/* see PHW_* macros below */
 	struct pmc	*phw_pmc;	/* current thread PMC */
 };
 
 #define	PMC_PHW_RI_MASK		0x000000FF
 #define	PMC_PHW_CPU_SHIFT	8
 #define	PMC_PHW_CPU_MASK	0x0000FF00
 #define	PMC_PHW_FLAGS_SHIFT	16
 #define	PMC_PHW_FLAGS_MASK	0xFFFF0000
 
 #define	PMC_PHW_INDEX_TO_STATE(ri)	((ri) & PMC_PHW_RI_MASK)
 #define	PMC_PHW_STATE_TO_INDEX(state)	((state) & PMC_PHW_RI_MASK)
 #define	PMC_PHW_CPU_TO_STATE(cpu)	(((cpu) << PMC_PHW_CPU_SHIFT) & \
 	PMC_PHW_CPU_MASK)
 #define	PMC_PHW_STATE_TO_CPU(state)	(((state) & PMC_PHW_CPU_MASK) >> \
 	PMC_PHW_CPU_SHIFT)
 #define	PMC_PHW_FLAGS_TO_STATE(flags)	(((flags) << PMC_PHW_FLAGS_SHIFT) & \
 	PMC_PHW_FLAGS_MASK)
 #define	PMC_PHW_STATE_TO_FLAGS(state)	(((state) & PMC_PHW_FLAGS_MASK) >> \
 	PMC_PHW_FLAGS_SHIFT)
 #define	PMC_PHW_FLAG_IS_ENABLED		(PMC_PHW_FLAGS_TO_STATE(0x01))
 #define	PMC_PHW_FLAG_IS_SHAREABLE	(PMC_PHW_FLAGS_TO_STATE(0x02))
 
 /*
  * struct pmc_sample
  *
  * Space for N (tunable) PC samples and associated control data.
  */
 
 struct pmc_sample {
 	uint16_t		ps_nsamples;	/* callchain depth */
 	uint8_t			ps_cpu;		/* cpu number */
 	uint8_t			ps_flags;	/* other flags */
 	pid_t			ps_pid;		/* process PID or -1 */
 	struct thread		*ps_td;		/* which thread */
 	struct pmc		*ps_pmc;	/* interrupting PMC */
 	uintptr_t		*ps_pc;		/* (const) callchain start */
 };
 
 #define 	PMC_SAMPLE_FREE		((uint16_t) 0)
 #define 	PMC_SAMPLE_INUSE	((uint16_t) 0xFFFF)
 
 struct pmc_samplebuffer {
 	struct pmc_sample * volatile ps_read;	/* read pointer */
 	struct pmc_sample * volatile ps_write;	/* write pointer */
 	uintptr_t		*ps_callchains;	/* all saved call chains */
 	struct pmc_sample	*ps_fence;	/* one beyond ps_samples[] */
 	struct pmc_sample	ps_samples[];	/* array of sample entries */
 };
 
 
 /*
  * struct pmc_cpustate
  *
  * A CPU is modelled as a collection of HW PMCs with space for additional
  * flags.
  */
 
 struct pmc_cpu {
 	uint32_t	pc_state;	/* physical cpu number + flags */
 	struct pmc_samplebuffer *pc_sb[2]; /* space for samples */
 	struct pmc_hw	*pc_hwpmcs[];	/* 'npmc' pointers */
 };
 
 #define	PMC_PCPU_CPU_MASK		0x000000FF
 #define	PMC_PCPU_FLAGS_MASK		0xFFFFFF00
 #define	PMC_PCPU_FLAGS_SHIFT		8
 #define	PMC_PCPU_STATE_TO_CPU(S)	((S) & PMC_PCPU_CPU_MASK)
 #define	PMC_PCPU_STATE_TO_FLAGS(S)	(((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT)
 #define	PMC_PCPU_FLAGS_TO_STATE(F)	(((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK)
 #define	PMC_PCPU_CPU_TO_STATE(C)	((C) & PMC_PCPU_CPU_MASK)
 #define	PMC_PCPU_FLAG_HTT		(PMC_PCPU_FLAGS_TO_STATE(0x1))
 
 /*
  * struct pmc_binding
  *
  * CPU binding information.
  */
 
 struct pmc_binding {
 	int	pb_bound;	/* is bound? */
 	int	pb_cpu;		/* if so, to which CPU */
 };
 
 
 struct pmc_mdep;
 
 /*
  * struct pmc_classdep
  *
  * PMC class-dependent operations.
  */
 struct pmc_classdep {
 	uint32_t	pcd_caps;	/* class capabilities */
 	enum pmc_class	pcd_class;	/* class id */
 	int		pcd_num;	/* number of PMCs */
 	int		pcd_ri;		/* row index of the first PMC in class */
 	int		pcd_width;	/* width of the PMC */
 
 	/* configuring/reading/writing the hardware PMCs */
 	int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm);
 	int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm);
 	int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value);
 	int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value);
 
 	/* pmc allocation/release */
 	int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t,
 		const struct pmc_op_pmcallocate *_a);
 	int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm);
 
 	/* starting and stopping PMCs */
 	int (*pcd_start_pmc)(int _cpu, int _ri);
 	int (*pcd_stop_pmc)(int _cpu, int _ri);
 
 	/* description */
 	int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi,
 		struct pmc **_ppmc);
 
 	/* class-dependent initialization & finalization */
 	int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu);
 	int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu);
 
 	/* machine-specific interface */
 	int (*pcd_get_msr)(int _ri, uint32_t *_msr);
 };
 
 /*
  * struct pmc_mdep
  *
  * Machine dependent bits needed per CPU type.
  */
 
 struct pmc_mdep  {
 	uint32_t	pmd_cputype;    /* from enum pmc_cputype */
 	uint32_t	pmd_npmc;	/* number of PMCs per CPU */
 	uint32_t	pmd_nclass;	/* number of PMC classes present */
 
 	/*
 	 * Machine dependent methods.
 	 */
 
 	/* per-cpu initialization and finalization */
 	int (*pmd_pcpu_init)(struct pmc_mdep *_md, int _cpu);
 	int (*pmd_pcpu_fini)(struct pmc_mdep *_md, int _cpu);
 
 	/* thread context switch in/out */
 	int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp);
 	int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp);
 
 	/* handle a PMC interrupt */
 	int (*pmd_intr)(int _cpu, struct trapframe *_tf);
 
 	/*
 	 * PMC class dependent information.
 	 */
 	struct pmc_classdep pmd_classdep[];
 };
 
 /*
  * Per-CPU state.  This is an array of 'mp_ncpu' pointers
  * to struct pmc_cpu descriptors.
  */
 
 extern struct pmc_cpu **pmc_pcpu;
 
 /* driver statistics */
 extern struct pmc_op_getdriverstats pmc_stats;
 
 #if	defined(HWPMC_DEBUG)
 #include <sys/ktr.h>
 
 /* debug flags, major flag groups */
 struct pmc_debugflags {
 	int	pdb_CPU;
 	int	pdb_CSW;
 	int	pdb_LOG;
 	int	pdb_MDP;
 	int	pdb_MOD;
 	int	pdb_OWN;
 	int	pdb_PMC;
 	int	pdb_PRC;
 	int	pdb_SAM;
 };
 
 extern struct pmc_debugflags pmc_debugflags;
 
 #define	KTR_PMC			KTR_SUBSYS
 
 #define	PMC_DEBUG_STRSIZE		128
 #define	PMC_DEBUG_DEFAULT_FLAGS		{ 0, 0, 0, 0, 0, 0, 0, 0 }
 
 #define	PMCDBG0(M, N, L, F) do {					\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR0(KTR_PMC, #M ":" #N ":" #L  ": " F);		\
 } while (0)
 #define	PMCDBG1(M, N, L, F, p1) do {					\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR1(KTR_PMC, #M ":" #N ":" #L  ": " F, p1);		\
 } while (0)
 #define	PMCDBG2(M, N, L, F, p1, p2) do {				\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR2(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2);	\
 } while (0)
 #define	PMCDBG3(M, N, L, F, p1, p2, p3) do {				\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR3(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3);	\
 } while (0)
 #define	PMCDBG4(M, N, L, F, p1, p2, p3, p4) do {			\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR4(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3, p4);\
 } while (0)
 #define	PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) do {			\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR5(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3, p4,	\
 		    p5);						\
 } while (0)
 #define	PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) do {		\
 	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
 		CTR6(KTR_PMC, #M ":" #N ":" #L  ": " F, p1, p2, p3, p4,	\
 		    p5, p6);						\
 } while (0)
 	
 /* Major numbers */
 #define	PMC_DEBUG_MAJ_CPU		0 /* cpu switches */
 #define	PMC_DEBUG_MAJ_CSW		1 /* context switches */
 #define	PMC_DEBUG_MAJ_LOG		2 /* logging */
 #define	PMC_DEBUG_MAJ_MDP		3 /* machine dependent */
 #define	PMC_DEBUG_MAJ_MOD		4 /* misc module infrastructure */
 #define	PMC_DEBUG_MAJ_OWN		5 /* owner */
 #define	PMC_DEBUG_MAJ_PMC		6 /* pmc management */
 #define	PMC_DEBUG_MAJ_PRC		7 /* processes */
 #define	PMC_DEBUG_MAJ_SAM		8 /* sampling */
 
 /* Minor numbers */
 
 /* Common (8 bits) */
 #define	PMC_DEBUG_MIN_ALL		0 /* allocation */
 #define	PMC_DEBUG_MIN_REL		1 /* release */
 #define	PMC_DEBUG_MIN_OPS		2 /* ops: start, stop, ... */
 #define	PMC_DEBUG_MIN_INI		3 /* init */
 #define	PMC_DEBUG_MIN_FND		4 /* find */
 
 /* MODULE */
 #define	PMC_DEBUG_MIN_PMH	       14 /* pmc_hook */
 #define	PMC_DEBUG_MIN_PMS	       15 /* pmc_syscall */
 
 /* OWN */
 #define	PMC_DEBUG_MIN_ORM		8 /* owner remove */
 #define	PMC_DEBUG_MIN_OMR		9 /* owner maybe remove */
 
 /* PROCESSES */
 #define	PMC_DEBUG_MIN_TLK		8 /* link target */
 #define	PMC_DEBUG_MIN_TUL		9 /* unlink target */
 #define	PMC_DEBUG_MIN_EXT	       10 /* process exit */
 #define	PMC_DEBUG_MIN_EXC	       11 /* process exec */
 #define	PMC_DEBUG_MIN_FRK	       12 /* process fork */
 #define	PMC_DEBUG_MIN_ATT	       13 /* attach/detach */
 #define	PMC_DEBUG_MIN_SIG	       14 /* signalling */
 
 /* CONTEXT SWITCHES */
 #define	PMC_DEBUG_MIN_SWI		8 /* switch in */
 #define	PMC_DEBUG_MIN_SWO		9 /* switch out */
 
 /* PMC */
 #define	PMC_DEBUG_MIN_REG		8 /* pmc register */
 #define	PMC_DEBUG_MIN_ALR		9 /* allocate row */
 
 /* MACHINE DEPENDENT LAYER */
 #define	PMC_DEBUG_MIN_REA		8 /* read */
 #define	PMC_DEBUG_MIN_WRI		9 /* write */
 #define	PMC_DEBUG_MIN_CFG	       10 /* config */
 #define	PMC_DEBUG_MIN_STA	       11 /* start */
 #define	PMC_DEBUG_MIN_STO	       12 /* stop */
 #define	PMC_DEBUG_MIN_INT	       13 /* interrupts */
 
 /* CPU */
 #define	PMC_DEBUG_MIN_BND		8 /* bind */
 #define	PMC_DEBUG_MIN_SEL		9 /* select */
 
 /* LOG */
 #define	PMC_DEBUG_MIN_GTB		8 /* get buf */
 #define	PMC_DEBUG_MIN_SIO		9 /* schedule i/o */
 #define	PMC_DEBUG_MIN_FLS	       10 /* flush */
 #define	PMC_DEBUG_MIN_SAM	       11 /* sample */
 #define	PMC_DEBUG_MIN_CLO	       12 /* close */
 
 #else
 #define	PMCDBG0(M, N, L, F)		/* nothing */
 #define	PMCDBG1(M, N, L, F, p1)
 #define	PMCDBG2(M, N, L, F, p1, p2)
 #define	PMCDBG3(M, N, L, F, p1, p2, p3)
 #define	PMCDBG4(M, N, L, F, p1, p2, p3, p4)
 #define	PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5)
 #define	PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6)
 #endif
 
 /* declare a dedicated memory pool */
 MALLOC_DECLARE(M_PMC);
 
 /*
  * Functions
  */
 
 struct pmc_mdep *pmc_md_initialize(void);	/* MD init function */
 void	pmc_md_finalize(struct pmc_mdep *_md);	/* MD fini function */
 int	pmc_getrowdisp(int _ri);
 int	pmc_process_interrupt(int _cpu, int _soft, struct pmc *_pm,
     struct trapframe *_tf, int _inuserspace);
 int	pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
     struct trapframe *_tf);
 int	pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples,
     struct trapframe *_tf);
 struct pmc_mdep *pmc_mdep_alloc(int nclasses);
 void pmc_mdep_free(struct pmc_mdep *md);
 #endif /* _KERNEL */
 #endif /* _SYS_PMC_H_ */
Index: projects/powernv/sys/resourcevar.h
===================================================================
--- projects/powernv/sys/resourcevar.h	(revision 290990)
+++ projects/powernv/sys/resourcevar.h	(revision 290991)
@@ -1,163 +1,164 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)resourcevar.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef	_SYS_RESOURCEVAR_H_
 #define	_SYS_RESOURCEVAR_H_
 
 #include <sys/resource.h>
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #endif
 
 /*
  * Kernel per-process accounting / statistics
  * (not necessarily resident except when running).
  *
  * Locking key:
  *      b - created at fork, never changes
  *      c - locked by proc mtx
  *      k - only accessed by curthread
  *      w - locked by proc itim lock
  *	w2 - locked by proc prof lock
  */
 struct pstats {
 #define	pstat_startzero	p_cru
 	struct	rusage p_cru;		/* Stats for reaped children. */
 	struct	itimerval p_timer[3];	/* (w) Virtual-time timers. */
 #define	pstat_endzero	pstat_startcopy
 
 #define	pstat_startcopy	p_prof
 	struct uprof {			/* Profile arguments. */
 		caddr_t	pr_base;	/* (c + w2) Buffer base. */
 		u_long	pr_size;	/* (c + w2) Buffer size. */
 		u_long	pr_off;		/* (c + w2) PC offset. */
 		u_long	pr_scale;	/* (c + w2) PC scaling. */
 	} p_prof;
 #define	pstat_endcopy	p_start
 	struct	timeval p_start;	/* (b) Starting time. */
 };
 
 #ifdef _KERNEL
 
 /*
  * Kernel shareable process resource limits.  Because this structure
  * is moderately large but changes infrequently, it is normally
  * shared copy-on-write after forks.
  */
 struct plimit {
 	struct	rlimit pl_rlimit[RLIM_NLIMITS];
 	int	pl_refcnt;		/* number of references */
 };
 
 struct racct;
 
 /*-
  * Per uid resource consumption.  This structure is used to track
  * the total resource consumption (process count, socket buffer size,
  * etc) for the uid and impose limits.
  *
  * Locking guide:
  * (a) Constant from inception
  * (b) Lockless, updated using atomics
  * (c) Locked by global uihashtbl_lock
  * (d) Locked by the ui_vmsize_mtx
  */
 struct uidinfo {
 	LIST_ENTRY(uidinfo) ui_hash;	/* (c) hash chain of uidinfos */
 	struct mtx ui_vmsize_mtx;
 	vm_ooffset_t ui_vmsize;		/* (d) swap reservation by uid */
 	long	ui_sbsize;		/* (b) socket buffer space consumed */
 	long	ui_proccnt;		/* (b) number of processes */
 	long	ui_ptscnt;		/* (b) number of pseudo-terminals */
 	long	ui_kqcnt;		/* (b) number of kqueues */
 	uid_t	ui_uid;			/* (a) uid */
 	u_int	ui_ref;			/* (b) reference count */
 #ifdef	RACCT
 	struct racct *ui_racct;		/* (a) resource accounting */
 #endif
 };
 
 #define	UIDINFO_VMSIZE_LOCK(ui)		mtx_lock(&((ui)->ui_vmsize_mtx))
 #define	UIDINFO_VMSIZE_UNLOCK(ui)	mtx_unlock(&((ui)->ui_vmsize_mtx))
 
 struct proc;
 struct rusage_ext;
 struct thread;
 
 void	 addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks);
 void	 addupc_task(struct thread *td, uintfptr_t pc, u_int ticks);
 void	 calccru(struct proc *p, struct timeval *up, struct timeval *sp);
 void	 calcru(struct proc *p, struct timeval *up, struct timeval *sp);
 int	 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max);
 int	 chgproccnt(struct uidinfo *uip, int diff, rlim_t maxval);
 int	 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to,
 	    rlim_t maxval);
 int	 chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval);
 int	 fuswintr(void *base);
 int	 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
 	    struct rlimit *limp);
 struct plimit
 	*lim_alloc(void);
 void	 lim_copy(struct plimit *dst, struct plimit *src);
 rlim_t	 lim_cur(struct thread *td, int which);
 rlim_t	 lim_cur_proc(struct proc *p, int which);
 void	 lim_fork(struct proc *p1, struct proc *p2);
 void	 lim_free(struct plimit *limp);
 struct plimit
 	*lim_hold(struct plimit *limp);
 rlim_t	 lim_max(struct thread *td, int which);
 rlim_t	 lim_max_proc(struct proc *p, int which);
 void	 lim_rlimit(struct thread *td, int which, struct rlimit *rlp);
 void	 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp);
 void	 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
 	    struct rusage_ext *rux2);
 void	 rucollect(struct rusage *ru, struct rusage *ru2);
 void	 rufetch(struct proc *p, struct rusage *ru);
 void	 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
 	    struct timeval *sp);
 void	 rufetchtd(struct thread *td, struct rusage *ru);
 void	 ruxagg(struct proc *p, struct thread *td);
 int	 suswintr(void *base, int word);
 struct uidinfo
 	*uifind(uid_t uid);
 void	 uifree(struct uidinfo *uip);
 void	 uihashinit(void);
 void	 uihold(struct uidinfo *uip);
 #ifdef	RACCT
 void	 ui_racct_foreach(void (*callback)(struct racct *racct,
-	    void *arg2, void *arg3), void *arg2, void *arg3);
+	    void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+	    void *arg2, void *arg3);
 #endif
 
 #endif /* _KERNEL */
 #endif /* !_SYS_RESOURCEVAR_H_ */
Index: projects/powernv/vm/vm_page.h
===================================================================
--- projects/powernv/vm/vm_page.h	(revision 290990)
+++ projects/powernv/vm/vm_page.h	(revision 290991)
@@ -1,680 +1,681 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.h	8.2 (Berkeley) 12/13/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Resident memory system definitions.
  */
 
 #ifndef	_VM_PAGE_
 #define	_VM_PAGE_
 
 #include <vm/pmap.h>
 
 /*
  *	Management of resident (logical) pages.
  *
  *	A small structure is kept for each resident
  *	page, indexed by page number.  Each structure
  *	is an element of several collections:
  *
  *		A radix tree used to quickly
  *		perform object/offset lookups
  *
  *		A list of all pages for a given object,
  *		so they can be quickly deactivated at
  *		time of deallocation.
  *
  *		An ordered list of pages due for pageout.
  *
  *	In addition, the structure contains the object
  *	and offset to which this page belongs (for pageout),
  *	and sundry status bits.
  *
  *	In general, operations on this structure's mutable fields are
  *	synchronized using either one of or a combination of the lock on the
  *	object that the page belongs to (O), the pool lock for the page (P),
  *	or the lock for either the free or paging queue (Q).  If a field is
  *	annotated below with two of these locks, then holding either lock is
  *	sufficient for read access, but both locks are required for write
  *	access.
  *
  *	In contrast, the synchronization of accesses to the page's
  *	dirty field is machine dependent (M).  In the
  *	machine-independent layer, the lock on the object that the
  *	page belongs to must be held in order to operate on the field.
  *	However, the pmap layer is permitted to set all bits within
  *	the field without holding that lock.  If the underlying
  *	architecture does not support atomic read-modify-write
  *	operations on the field's type, then the machine-independent
  *	layer uses a 32-bit atomic on the aligned 32-bit word that
  *	contains the dirty field.  In the machine-independent layer,
  *	the implementation of read-modify-write operations on the
  *	field is encapsulated in vm_page_clear_dirty_mask().
  */
 
 #if PAGE_SIZE == 4096
 #define VM_PAGE_BITS_ALL 0xffu
 typedef uint8_t vm_page_bits_t;
 #elif PAGE_SIZE == 8192
 #define VM_PAGE_BITS_ALL 0xffffu
 typedef uint16_t vm_page_bits_t;
 #elif PAGE_SIZE == 16384
 #define VM_PAGE_BITS_ALL 0xffffffffu
 typedef uint32_t vm_page_bits_t;
 #elif PAGE_SIZE == 32768
 #define VM_PAGE_BITS_ALL 0xfffffffffffffffflu
 typedef uint64_t vm_page_bits_t;
 #endif
 
 struct vm_page {
 	union {
 		TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */
 		struct {
 			SLIST_ENTRY(vm_page) ss; /* private slists */
 			void *pv;
 		} s;
 		struct {
 			u_long p;
 			u_long v;
 		} memguard;
 	} plinks;
 	TAILQ_ENTRY(vm_page) listq;	/* pages in same object (O) */
 	vm_object_t object;		/* which object am I in (O,P) */
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_paddr_t phys_addr;		/* physical address of page */
 	struct md_page md;		/* machine dependant stuff */
 	u_int wire_count;		/* wired down maps refs (P) */
 	volatile u_int busy_lock;	/* busy owners lock */
 	uint16_t hold_count;		/* page hold count (P) */
 	uint16_t flags;			/* page PG_* flags (P) */
 	uint8_t aflags;			/* access is atomic */
 	uint8_t oflags;			/* page VPO_* flags (O) */
 	uint8_t	queue;			/* page queue index (P,Q) */
 	int8_t psind;			/* pagesizes[] index (O) */
 	int8_t segind;
 	uint8_t	order;			/* index of the buddy queue */
 	uint8_t pool;
 	u_char	act_count;		/* page usage count (P) */
 	/* NOTE that these must support one bit per DEV_BSIZE in a page */
 	/* so, on normal X86 kernels, they must be at least 8 bits wide */
 	vm_page_bits_t valid;		/* map of valid DEV_BSIZE chunks (O) */
 	vm_page_bits_t dirty;		/* map of dirty DEV_BSIZE chunks (M) */
 };
 
 /*
  * Page flags stored in oflags:
  *
  * Access to these page flags is synchronized by the lock on the object
  * containing the page (O).
  *
  * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG)
  * 	 indicates that the page is not under PV management but
  * 	 otherwise should be treated as a normal page.  Pages not
  * 	 under PV management cannot be paged out via the
  * 	 object/vm_page_t because there is no knowledge of their pte
  * 	 mappings, and such pages are also not on any PQ queue.
  *
  */
 #define	VPO_UNUSED01	0x01		/* --available-- */
 #define	VPO_SWAPSLEEP	0x02		/* waiting for swap to finish */
 #define	VPO_UNMANAGED	0x04		/* no PV management for page */
 #define	VPO_SWAPINPROG	0x08		/* swap I/O in progress on page */
 #define	VPO_NOSYNC	0x10		/* do not collect for syncer */
 
 /*
  * Busy page implementation details.
  * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation,
  * even if the support for owner identity is removed because of size
  * constraints.  Checks on lock recursion are then not possible, while the
  * lock assertions effectiveness is someway reduced.
  */
 #define	VPB_BIT_SHARED		0x01
 #define	VPB_BIT_EXCLUSIVE	0x02
 #define	VPB_BIT_WAITERS		0x04
 #define	VPB_BIT_FLAGMASK						\
 	(VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS)
 
 #define	VPB_SHARERS_SHIFT	3
 #define	VPB_SHARERS(x)							\
 	(((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT)
 #define	VPB_SHARERS_WORD(x)	((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED)
 #define	VPB_ONE_SHARER		(1 << VPB_SHARERS_SHIFT)
 
 #define	VPB_SINGLE_EXCLUSIVER	VPB_BIT_EXCLUSIVE
 
 #define	VPB_UNBUSIED		VPB_SHARERS_WORD(0)
 
 #define	PQ_NONE		255
 #define	PQ_INACTIVE	0
 #define	PQ_ACTIVE	1
 #define	PQ_COUNT	2
 
 TAILQ_HEAD(pglist, vm_page);
 SLIST_HEAD(spglist, vm_page);
 
 struct vm_pagequeue {
 	struct mtx	pq_mutex;
 	struct pglist	pq_pl;
 	int		pq_cnt;
 	int		* const pq_vcnt;
 	const char	* const pq_name;
 } __aligned(CACHE_LINE_SIZE);
 
 
 struct vm_domain {
 	struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
 	u_int vmd_page_count;
 	u_int vmd_free_count;
 	long vmd_segs;	/* bitmask of the segments */
 	boolean_t vmd_oom;
 	int vmd_pass;	/* local pagedaemon pass */
+	int vmd_oom_seq;
 	int vmd_last_active_scan;
 	struct vm_page vmd_marker; /* marker for pagedaemon private use */
 	struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
 };
 
 extern struct vm_domain vm_dom[MAXMEMDOM];
 
 #define	vm_pagequeue_assert_locked(pq)	mtx_assert(&(pq)->pq_mutex, MA_OWNED)
 #define	vm_pagequeue_lock(pq)		mtx_lock(&(pq)->pq_mutex)
 #define	vm_pagequeue_unlock(pq)		mtx_unlock(&(pq)->pq_mutex)
 
 #ifdef _KERNEL
 static __inline void
 vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
 {
 
 #ifdef notyet
 	vm_pagequeue_assert_locked(pq);
 #endif
 	pq->pq_cnt += addend;
 	atomic_add_int(pq->pq_vcnt, addend);
 }
 #define	vm_pagequeue_cnt_inc(pq)	vm_pagequeue_cnt_add((pq), 1)
 #define	vm_pagequeue_cnt_dec(pq)	vm_pagequeue_cnt_add((pq), -1)
 #endif	/* _KERNEL */
 
 extern struct mtx_padalign vm_page_queue_free_mtx;
 extern struct mtx_padalign pa_lock[];
 
 #if defined(__arm__)
 #define	PDRSHIFT	PDR_SHIFT
 #elif !defined(PDRSHIFT)
 #define PDRSHIFT	21
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	PA_LOCKPTR(pa)	((struct mtx *)(&pa_lock[pa_index(pa) % PA_LOCK_COUNT]))
 #define	PA_LOCKOBJPTR(pa)	((struct lock_object *)PA_LOCKPTR((pa)))
 #define	PA_LOCK(pa)	mtx_lock(PA_LOCKPTR(pa))
 #define	PA_TRYLOCK(pa)	mtx_trylock(PA_LOCKPTR(pa))
 #define	PA_UNLOCK(pa)	mtx_unlock(PA_LOCKPTR(pa))
 #define	PA_UNLOCK_COND(pa) 			\
 	do {		   			\
 		if ((pa) != 0) {		\
 			PA_UNLOCK((pa));	\
 			(pa) = 0;		\
 		}				\
 	} while (0)
 
 #define	PA_LOCK_ASSERT(pa, a)	mtx_assert(PA_LOCKPTR(pa), (a))
 
 #ifdef KLD_MODULE
 #define	vm_page_lock(m)		vm_page_lock_KBI((m), LOCK_FILE, LOCK_LINE)
 #define	vm_page_unlock(m)	vm_page_unlock_KBI((m), LOCK_FILE, LOCK_LINE)
 #define	vm_page_trylock(m)	vm_page_trylock_KBI((m), LOCK_FILE, LOCK_LINE)
 #else	/* !KLD_MODULE */
 #define	vm_page_lockptr(m)	(PA_LOCKPTR(VM_PAGE_TO_PHYS((m))))
 #define	vm_page_lock(m)		mtx_lock(vm_page_lockptr((m)))
 #define	vm_page_unlock(m)	mtx_unlock(vm_page_lockptr((m)))
 #define	vm_page_trylock(m)	mtx_trylock(vm_page_lockptr((m)))
 #endif
 #if defined(INVARIANTS)
 #define	vm_page_assert_locked(m)		\
     vm_page_assert_locked_KBI((m), __FILE__, __LINE__)
 #define	vm_page_lock_assert(m, a)		\
     vm_page_lock_assert_KBI((m), (a), __FILE__, __LINE__)
 #else
 #define	vm_page_assert_locked(m)
 #define	vm_page_lock_assert(m, a)
 #endif
 
 /*
  * The vm_page's aflags are updated using atomic operations.  To set or clear
  * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear()
  * must be used.  Neither these flags nor these functions are part of the KBI.
  *
  * PGA_REFERENCED may be cleared only if the page is locked.  It is set by
  * both the MI and MD VM layers.  However, kernel loadable modules should not
  * directly set this flag.  They should call vm_page_reference() instead.
  *
  * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter().
  * When it does so, the object must be locked, or the page must be
  * exclusive busied.  The MI VM layer must never access this flag
  * directly.  Instead, it should call pmap_page_is_write_mapped().
  *
  * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
  * at least one executable mapping.  It is not consumed by the MI VM layer.
  */
 #define	PGA_WRITEABLE	0x01		/* page may be mapped writeable */
 #define	PGA_REFERENCED	0x02		/* page has been referenced */
 #define	PGA_EXECUTABLE	0x04		/* page may be mapped executable */
 
 /*
  * Page flags.  If changed at any other time than page allocation or
  * freeing, the modification must be protected by the vm_page lock.
  */
 #define	PG_CACHED	0x0001		/* page is cached */
 #define	PG_FICTITIOUS	0x0004		/* physical page doesn't exist */
 #define	PG_ZERO		0x0008		/* page is zeroed */
 #define	PG_MARKER	0x0010		/* special queue marker page */
 #define	PG_WINATCFLS	0x0040		/* flush dirty page on inactive q */
 #define	PG_NODUMP	0x0080		/* don't include this page in a dump */
 #define	PG_UNHOLDFREE	0x0100		/* delayed free of a held page */
 
 /*
  * Misc constants.
  */
 #define ACT_DECLINE		1
 #define ACT_ADVANCE		3
 #define ACT_INIT		5
 #define ACT_MAX			64
 
 #ifdef _KERNEL
 
 #include <sys/systm.h>
 
 #include <machine/atomic.h>
 
 /*
  * Each pageable resident page falls into one of four lists:
  *
  *	free
  *		Available for allocation now.
  *
  *	cache
  *		Almost available for allocation. Still associated with
  *		an object, but clean and immediately freeable.
  *
  * The following lists are LRU sorted:
  *
  *	inactive
  *		Low activity, candidates for reclamation.
  *		This is the list of pages that should be
  *		paged out next.
  *
  *	active
  *		Pages that are "active" i.e. they have been
  *		recently referenced.
  *
  */
 
 extern int vm_page_zero_count;
 
 extern vm_page_t vm_page_array;		/* First resident page in table */
 extern long vm_page_array_size;		/* number of vm_page_t's */
 extern long first_page;			/* first physical page number */
 
 #define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
 
 /*
  * PHYS_TO_VM_PAGE() returns the vm_page_t object that represents a memory
  * page to which the given physical address belongs. The correct vm_page_t
  * object is returned for addresses that are not page-aligned.
  */
 vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 
 /*
  * Page allocation parameters for vm_page for the functions
  * vm_page_alloc(), vm_page_grab(), vm_page_alloc_contig() and
  * vm_page_alloc_freelist().  Some functions support only a subset
  * of the flags, and ignore others, see the flags legend.
  *
  * Bits 0 - 1 define class.
  * Bits 2 - 15 dedicated for flags.
  * Legend:
  * (a) - vm_page_alloc() supports the flag.
  * (c) - vm_page_alloc_contig() supports the flag.
  * (f) - vm_page_alloc_freelist() supports the flag.
  * (g) - vm_page_grab() supports the flag.
  * Bits above 15 define the count of additional pages that the caller
  * intends to allocate.
  */
 #define VM_ALLOC_NORMAL		0
 #define VM_ALLOC_INTERRUPT	1
 #define VM_ALLOC_SYSTEM		2
 #define	VM_ALLOC_CLASS_MASK	3
 #define	VM_ALLOC_WIRED		0x0020	/* (acfg) Allocate non pageable page */
 #define	VM_ALLOC_ZERO		0x0040	/* (acfg) Try to obtain a zeroed page */
 #define	VM_ALLOC_NOOBJ		0x0100	/* (acg) No associated object */
 #define	VM_ALLOC_NOBUSY		0x0200	/* (acg) Do not busy the page */
 #define	VM_ALLOC_IFCACHED	0x0400	/* (ag) Fail if page is not cached */
 #define	VM_ALLOC_IFNOTCACHED	0x0800	/* (ag) Fail if page is cached */
 #define	VM_ALLOC_IGN_SBUSY	0x1000	/* (g) Ignore shared busy flag */
 #define	VM_ALLOC_NODUMP		0x2000	/* (ag) don't include in dump */
 #define	VM_ALLOC_SBUSY		0x4000	/* (acg) Shared busy the page */
 #define	VM_ALLOC_NOWAIT		0x8000	/* (g) Do not sleep, return NULL */
 #define	VM_ALLOC_COUNT_SHIFT	16
 #define	VM_ALLOC_COUNT(count)	((count) << VM_ALLOC_COUNT_SHIFT)
 
 #ifdef M_NOWAIT
 static inline int
 malloc2vm_flags(int malloc_flags)
 {
 	int pflags;
 
 	KASSERT((malloc_flags & M_USE_RESERVE) == 0 ||
 	    (malloc_flags & M_NOWAIT) != 0,
 	    ("M_USE_RESERVE requires M_NOWAIT"));
 	pflags = (malloc_flags & M_USE_RESERVE) != 0 ? VM_ALLOC_INTERRUPT :
 	    VM_ALLOC_SYSTEM;
 	if ((malloc_flags & M_ZERO) != 0)
 		pflags |= VM_ALLOC_ZERO;
 	if ((malloc_flags & M_NODUMP) != 0)
 		pflags |= VM_ALLOC_NODUMP;
 	return (pflags);
 }
 #endif
 
 void vm_page_busy_downgrade(vm_page_t m);
 void vm_page_busy_sleep(vm_page_t m, const char *msg);
 void vm_page_flash(vm_page_t m);
 void vm_page_hold(vm_page_t mem);
 void vm_page_unhold(vm_page_t mem);
 void vm_page_free(vm_page_t m);
 void vm_page_free_zero(vm_page_t m);
 
 void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
 vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr);
 vm_page_t vm_page_alloc_freelist(int, int);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
 void vm_page_cache(vm_page_t);
 void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t);
 void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t);
 int vm_page_try_to_cache (vm_page_t);
 int vm_page_try_to_free (vm_page_t);
 void vm_page_deactivate (vm_page_t);
 void vm_page_deactivate_noreuse(vm_page_t);
 void vm_page_dequeue(vm_page_t m);
 void vm_page_dequeue_locked(vm_page_t m);
 vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
 vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
 boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex);
 vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
 vm_page_t vm_page_next(vm_page_t m);
 int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
 struct vm_pagequeue *vm_page_pagequeue(vm_page_t m);
 vm_page_t vm_page_prev(vm_page_t m);
 boolean_t vm_page_ps_is_valid(vm_page_t m);
 void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);
 void vm_page_reference(vm_page_t m);
 void vm_page_remove (vm_page_t);
 int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
 vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
     vm_pindex_t pindex);
 void vm_page_requeue(vm_page_t m);
 void vm_page_requeue_locked(vm_page_t m);
 int vm_page_sbusied(vm_page_t m);
 void vm_page_set_valid_range(vm_page_t m, int base, int size);
 int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
 void vm_page_sunbusy(vm_page_t m);
 int vm_page_trysbusy(vm_page_t m);
 void vm_page_unhold_pages(vm_page_t *ma, int count);
 boolean_t vm_page_unwire(vm_page_t m, uint8_t queue);
 void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_wire (vm_page_t);
 void vm_page_xunbusy_hard(vm_page_t m);
 void vm_page_set_validclean (vm_page_t, int, int);
 void vm_page_clear_dirty (vm_page_t, int, int);
 void vm_page_set_invalid (vm_page_t, int, int);
 int vm_page_is_valid (vm_page_t, int, int);
 void vm_page_test_dirty (vm_page_t);
 vm_page_bits_t vm_page_bits(int base, int size);
 void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
 void vm_page_free_toq(vm_page_t m);
 void vm_page_zero_idle_wakeup(void);
 
 void vm_page_dirty_KBI(vm_page_t m);
 void vm_page_lock_KBI(vm_page_t m, const char *file, int line);
 void vm_page_unlock_KBI(vm_page_t m, const char *file, int line);
 int vm_page_trylock_KBI(vm_page_t m, const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line);
 void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);
 #endif
 
 #define	vm_page_assert_sbusied(m)					\
 	KASSERT(vm_page_sbusied(m),					\
 	    ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \
 	    (void *)m, __FILE__, __LINE__));
 
 #define	vm_page_assert_unbusied(m)					\
 	KASSERT(!vm_page_busied(m),					\
 	    ("vm_page_assert_unbusied: page %p busy @ %s:%d",		\
 	    (void *)m, __FILE__, __LINE__));
 
 #define	vm_page_assert_xbusied(m)					\
 	KASSERT(vm_page_xbusied(m),					\
 	    ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \
 	    (void *)m, __FILE__, __LINE__));
 
 #define	vm_page_busied(m)						\
 	((m)->busy_lock != VPB_UNBUSIED)
 
 #define	vm_page_sbusy(m) do {						\
 	if (!vm_page_trysbusy(m))					\
 		panic("%s: page %p failed shared busing", __func__, m);	\
 } while (0)
 
 #define	vm_page_tryxbusy(m)						\
 	(atomic_cmpset_acq_int(&m->busy_lock, VPB_UNBUSIED,		\
 	    VPB_SINGLE_EXCLUSIVER))
 
 #define	vm_page_xbusied(m)						\
 	((m->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0)
 
 #define	vm_page_xbusy(m) do {						\
 	if (!vm_page_tryxbusy(m))					\
 		panic("%s: page %p failed exclusive busing", __func__,	\
 		    m);							\
 } while (0)
 
 #define	vm_page_xunbusy(m) do {						\
 	if (!atomic_cmpset_rel_int(&(m)->busy_lock,			\
 	    VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED))			\
 		vm_page_xunbusy_hard(m);				\
 } while (0)
 
 #ifdef INVARIANTS
 void vm_page_object_lock_assert(vm_page_t m);
 #define	VM_PAGE_OBJECT_LOCK_ASSERT(m)	vm_page_object_lock_assert(m)
 void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits);
 #define	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)				\
 	vm_page_assert_pga_writeable(m, bits)
 #else
 #define	VM_PAGE_OBJECT_LOCK_ASSERT(m)	(void)0
 #define	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)	(void)0
 #endif
 
 /*
  * We want to use atomic updates for the aflags field, which is 8 bits wide.
  * However, not all architectures support atomic operations on 8-bit
  * destinations.  In order that we can easily use a 32-bit operation, we
  * require that the aflags field be 32-bit aligned.
  */
 CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
 
 /*
  *	Clear the given bits in the specified page.
  */
 static inline void
 vm_page_aflag_clear(vm_page_t m, uint8_t bits)
 {
 	uint32_t *addr, val;
 
 	/*
 	 * The PGA_REFERENCED flag can only be cleared if the page is locked.
 	 */
 	if ((bits & PGA_REFERENCED) != 0)
 		vm_page_assert_locked(m);
 
 	/*
 	 * Access the whole 32-bit word containing the aflags field with an
 	 * atomic update.  Parallel non-atomic updates to the other fields
 	 * within this word are handled properly by the atomic update.
 	 */
 	addr = (void *)&m->aflags;
 	KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
 	    ("vm_page_aflag_clear: aflags is misaligned"));
 	val = bits;
 #if BYTE_ORDER == BIG_ENDIAN
 	val <<= 24;
 #endif
 	atomic_clear_32(addr, val);
 }
 
 /*
  *	Set the given bits in the specified page.
  */
 static inline void
 vm_page_aflag_set(vm_page_t m, uint8_t bits)
 {
 	uint32_t *addr, val;
 
 	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits);
 
 	/*
 	 * Access the whole 32-bit word containing the aflags field with an
 	 * atomic update.  Parallel non-atomic updates to the other fields
 	 * within this word are handled properly by the atomic update.
 	 */
 	addr = (void *)&m->aflags;
 	KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
 	    ("vm_page_aflag_set: aflags is misaligned"));
 	val = bits;
 #if BYTE_ORDER == BIG_ENDIAN
 	val <<= 24;
 #endif
 	atomic_set_32(addr, val);
 } 
 
 /*
  *	vm_page_dirty:
  *
  *	Set all bits in the page's dirty field.
  *
  *	The object containing the specified page must be locked if the
  *	call is made from the machine-independent layer.
  *
  *	See vm_page_clear_dirty_mask().
  */
 static __inline void
 vm_page_dirty(vm_page_t m)
 {
 
 	/* Use vm_page_dirty_KBI() under INVARIANTS to save memory. */
 #if defined(KLD_MODULE) || defined(INVARIANTS)
 	vm_page_dirty_KBI(m);
 #else
 	m->dirty = VM_PAGE_BITS_ALL;
 #endif
 }
 
 /*
  *	vm_page_remque:
  *
  *	If the given page is in a page queue, then remove it from that page
  *	queue.
  *
  *	The page must be locked.
  */
 static inline void
 vm_page_remque(vm_page_t m)
 {
 
 	if (m->queue != PQ_NONE)
 		vm_page_dequeue(m);
 }
 
 /*
  *	vm_page_undirty:
  *
  *	Set page to not be dirty.  Note: does not clear pmap modify bits
  */
 static __inline void
 vm_page_undirty(vm_page_t m)
 {
 
 	VM_PAGE_OBJECT_LOCK_ASSERT(m);
 	m->dirty = 0;
 }
 
 #endif				/* _KERNEL */
 #endif				/* !_VM_PAGE_ */
Index: projects/powernv/vm/vm_pageout.c
===================================================================
--- projects/powernv/vm/vm_pageout.c	(revision 290990)
+++ projects/powernv/vm/vm_pageout.c	(revision 290991)
@@ -1,1938 +1,2015 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005 Yahoo! Technologies Norway AS
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	The proverbial page-out daemon.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 #include "opt_kdtrace.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 /*
  * System initialization
  */
 
 /* the kernel process "vm_pageout"*/
 static void vm_pageout(void);
 static void vm_pageout_init(void);
 static int vm_pageout_clean(vm_page_t m);
 static int vm_pageout_cluster(vm_page_t m);
 static void vm_pageout_scan(struct vm_domain *vmd, int pass);
-static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
+static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+    int starting_page_shortage);
 
 SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
     NULL);
 
 struct proc *pageproc;
 
 static struct kproc_desc page_kp = {
 	"pagedaemon",
 	vm_pageout,
 	&pageproc
 };
 SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
     &page_kp);
 
 SDT_PROVIDER_DEFINE(vm);
 SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache);
 SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
 
 #if !defined(NO_SWAPPING)
 /* the kernel process "vm_daemon"*/
 static void vm_daemon(void);
 static struct	proc *vmproc;
 
 static struct kproc_desc vm_kp = {
 	"vmdaemon",
 	vm_daemon,
 	&vmproc
 };
 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
 #endif
 
 
 int vm_pages_needed;		/* Event on which pageout daemon sleeps */
 int vm_pageout_deficit;		/* Estimated number of pages deficit */
 int vm_pageout_wakeup_thresh;
+static int vm_pageout_oom_seq = 12;
 
 #if !defined(NO_SWAPPING)
 static int vm_pageout_req_swapout;	/* XXX */
 static int vm_daemon_needed;
 static struct mtx vm_daemon_mtx;
 /* Allow for use by vm_pageout before vm_daemon is initialized. */
 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
 #endif
 static int vm_max_launder = 32;
 static int vm_pageout_update_period;
 static int defer_swap_pageouts;
 static int disable_swap_pageouts;
 static int lowmem_period = 10;
 static time_t lowmem_uptime;
 
 #if defined(NO_SWAPPING)
 static int vm_swap_enabled = 0;
 static int vm_swap_idle_enabled = 0;
 #else
 static int vm_swap_enabled = 1;
 static int vm_swap_idle_enabled = 0;
 #endif
 
 static int vm_panic_on_oom = 0;
 
 SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
 	CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
 	"panic on out of memory instead of killing the largest process");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
 	CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
 	"free page threshold for waking up the pageout daemon");
 
 SYSCTL_INT(_vm, OID_AUTO, max_launder,
 	CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
 	CTLFLAG_RW, &vm_pageout_update_period, 0,
 	"Maximum active LRU update period");
   
 SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0,
 	"Low memory callback period");
 
 #if defined(NO_SWAPPING)
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
 	CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
 	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
 #else
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
 	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
 	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
 #endif
 
 SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
 	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
 
 SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
 	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
 
 static int pageout_lock_miss;
 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
 
+SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
+	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
+	"back-to-back calls to oom detector to start OOM");
+
 #define VM_PAGEOUT_PAGE_COUNT 16
 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
 
 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 SYSCTL_INT(_vm, OID_AUTO, max_wired,
 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
 
 static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
 static boolean_t vm_pageout_launder(struct vm_pagequeue *pq, int, vm_paddr_t,
     vm_paddr_t);
 #if !defined(NO_SWAPPING)
 static void vm_pageout_map_deactivate_pages(vm_map_t, long);
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void vm_req_vmdaemon(int req);
 #endif
 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 
 /*
  * Initialize a dummy page for marking the caller's place in the specified
  * paging queue.  In principle, this function only needs to set the flag
  * PG_MARKER.  Nonetheless, it wirte busies and initializes the hold count
  * to one as safety precautions.
  */ 
 static void
 vm_pageout_init_marker(vm_page_t marker, u_short queue)
 {
 
 	bzero(marker, sizeof(*marker));
 	marker->flags = PG_MARKER;
 	marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	marker->queue = queue;
 	marker->hold_count = 1;
 }
 
 /*
  * vm_pageout_fallback_object_lock:
  * 
  * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
  * known to have failed and page queue must be either PQ_ACTIVE or
  * PQ_INACTIVE.  To avoid lock order violation, unlock the page queues
  * while locking the vm object.  Use marker page to detect page queue
  * changes and maintain notion of next page on page queue.  Return
  * TRUE if no changes were detected, FALSE otherwise.  vm object is
  * locked on return.
  * 
  * This function depends on both the lock portion of struct vm_object
  * and normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 	vm_object_t object;
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 	object = m->object;
 	
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_unlock(m);
 	VM_OBJECT_WLOCK(object);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/*
 	 * The page's object might have changed, and/or the page might
 	 * have moved from its original position in the queue.  If the
 	 * page's object has changed, then the caller should abandon
 	 * processing the page because the wrong object lock was
 	 * acquired.  Use the marker's plinks.q, not the page's, to
 	 * determine if the page has been moved.  The state of the
 	 * page's plinks.q can be indeterminate; whereas, the marker's
 	 * plinks.q must be valid.
 	 */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m->object == object &&
 	    m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * Lock the page while holding the page queue lock.  Use marker page
  * to detect page queue changes and maintain notion of next page on
  * page queue.  Return TRUE if no changes were detected, FALSE
  * otherwise.  The page is locked on return. The page queue lock might
  * be dropped and reacquired.
  *
  * This function depends on normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (vm_page_trylock(m))
 		return (TRUE);
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/* Page queue might have changed. */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * vm_pageout_clean:
  *
  * Clean the page and remove it from the laundry.
  * 
  * We set the busy bit to cause potential page faults on this page to
  * block.  Note the careful timing, however, the busy bit isn't set till
  * late and we cannot do anything that will mess with the page.
  */
 static int
 vm_pageout_cluster(vm_page_t m)
 {
 	vm_object_t object;
 	vm_page_t mc[2*vm_pageout_page_count], pb, ps;
 	int pageout_count;
 	int ib, is, page_base;
 	vm_pindex_t pindex = m->pindex;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
 	 * with the new swapper, but we could have serious problems paging
 	 * out other object types if there is insufficient memory.  
 	 *
 	 * Unfortunately, checking free memory here is far too late, so the
 	 * check has been moved up a procedural level.
 	 */
 
 	/*
 	 * Can't clean the page if it's busy or held.
 	 */
 	vm_page_assert_unbusied(m);
 	KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m));
 	vm_page_unlock(m);
 
 	mc[vm_pageout_page_count] = pb = ps = m;
 	pageout_count = 1;
 	page_base = vm_pageout_page_count;
 	ib = 1;
 	is = 1;
 
 	/*
 	 * Scan object for clusterable pages.
 	 *
 	 * We can cluster ONLY if: ->> the page is NOT
 	 * clean, wired, busy, held, or mapped into a
 	 * buffer, and one of the following:
 	 * 1) The page is inactive, or a seldom used
 	 *    active page.
 	 * -or-
 	 * 2) we force the issue.
 	 *
 	 * During heavy mmap/modification loads the pageout
 	 * daemon can really fragment the underlying file
 	 * due to flushing pages out of order and not trying
 	 * align the clusters (which leave sporatic out-of-order
 	 * holes).  To solve this problem we do the reverse scan
 	 * first and attempt to align our cluster, then do a 
 	 * forward scan if room remains.
 	 */
 more:
 	while (ib && pageout_count < vm_pageout_page_count) {
 		vm_page_t p;
 
 		if (ib > pindex) {
 			ib = 0;
 			break;
 		}
 
 		if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
 			ib = 0;
 			break;
 		}
 		vm_page_test_dirty(p);
 		if (p->dirty == 0) {
 			ib = 0;
 			break;
 		}
 		vm_page_lock(p);
 		if (p->queue != PQ_INACTIVE ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			ib = 0;
 			break;
 		}
 		vm_page_unlock(p);
 		mc[--page_base] = pb = p;
 		++pageout_count;
 		++ib;
 		/*
 		 * alignment boundry, stop here and switch directions.  Do
 		 * not clear ib.
 		 */
 		if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
 			break;
 	}
 
 	while (pageout_count < vm_pageout_page_count && 
 	    pindex + is < object->size) {
 		vm_page_t p;
 
 		if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
 			break;
 		vm_page_test_dirty(p);
 		if (p->dirty == 0)
 			break;
 		vm_page_lock(p);
 		if (p->queue != PQ_INACTIVE ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			break;
 		}
 		vm_page_unlock(p);
 		mc[page_base + pageout_count] = ps = p;
 		++pageout_count;
 		++is;
 	}
 
 	/*
 	 * If we exhausted our forward scan, continue with the reverse scan
 	 * when possible, even past a page boundry.  This catches boundry
 	 * conditions.
 	 */
 	if (ib && pageout_count < vm_pageout_page_count)
 		goto more;
 
 	/*
 	 * we allow reads during pageouts...
 	 */
 	return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL,
 	    NULL));
 }
 
 /*
  * vm_pageout_flush() - launder the given pages
  *
  *	The given pages are laundered.  Note that we setup for the start of
  *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
  *	reference count all in here rather then in the parent.  If we want
  *	the parent to do more sophisticated things we may have to change
  *	the ordering.
  *
  *	Returned runlen is the count of pages between mreq and first
  *	page after mreq with status VM_PAGER_AGAIN.
  *	*eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL
  *	for any page in runlen set.
  */
 int
 vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
     boolean_t *eio)
 {
 	vm_object_t object = mc[0]->object;
 	int pageout_status[count];
 	int numpagedout = 0;
 	int i, runlen;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * Initiate I/O.  Bump the vm_page_t->busy counter and
 	 * mark the pages read-only.
 	 *
 	 * We do not have to fixup the clean/dirty bits here... we can
 	 * allow the pager to do it after the I/O completes.
 	 *
 	 * NOTE! mc[i]->dirty may be partial or fragmented due to an
 	 * edge case with file fragments.
 	 */
 	for (i = 0; i < count; i++) {
 		KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
 		    ("vm_pageout_flush: partially invalid page %p index %d/%d",
 			mc[i], i, count));
 		vm_page_sbusy(mc[i]);
 		pmap_remove_write(mc[i]);
 	}
 	vm_object_pip_add(object, count);
 
 	vm_pager_put_pages(object, mc, count, flags, pageout_status);
 
 	runlen = count - mreq;
 	if (eio != NULL)
 		*eio = FALSE;
 	for (i = 0; i < count; i++) {
 		vm_page_t mt = mc[i];
 
 		KASSERT(pageout_status[i] == VM_PAGER_PEND ||
 		    !pmap_page_is_write_mapped(mt),
 		    ("vm_pageout_flush: page %p is not write protected", mt));
 		switch (pageout_status[i]) {
 		case VM_PAGER_OK:
 		case VM_PAGER_PEND:
 			numpagedout++;
 			break;
 		case VM_PAGER_BAD:
 			/*
 			 * Page outside of range of object. Right now we
 			 * essentially lose the changes by pretending it
 			 * worked.
 			 */
 			vm_page_undirty(mt);
 			break;
 		case VM_PAGER_ERROR:
 		case VM_PAGER_FAIL:
 			/*
 			 * If page couldn't be paged out, then reactivate the
 			 * page so it doesn't clog the inactive list.  (We
 			 * will try paging out it again later).
 			 */
 			vm_page_lock(mt);
 			vm_page_activate(mt);
 			vm_page_unlock(mt);
 			if (eio != NULL && i >= mreq && i - mreq < runlen)
 				*eio = TRUE;
 			break;
 		case VM_PAGER_AGAIN:
 			if (i >= mreq && i - mreq < runlen)
 				runlen = i - mreq;
 			break;
 		}
 
 		/*
 		 * If the operation is still going, leave the page busy to
 		 * block all other accesses. Also, leave the paging in
 		 * progress indicator set so that we don't attempt an object
 		 * collapse.
 		 */
 		if (pageout_status[i] != VM_PAGER_PEND) {
 			vm_object_pip_wakeup(object);
 			vm_page_sunbusy(mt);
 		}
 	}
 	if (prunlen != NULL)
 		*prunlen = runlen;
 	return (numpagedout);
 }
 
 static boolean_t
 vm_pageout_launder(struct vm_pagequeue *pq, int tries, vm_paddr_t low,
     vm_paddr_t high)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	vm_object_t object;
 	vm_paddr_t pa;
 	vm_page_t m, m_tmp, next;
 	int lockmode;
 
 	vm_pagequeue_lock(pq);
 	TAILQ_FOREACH_SAFE(m, &pq->pq_pl, plinks.q, next) {
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		pa = VM_PAGE_TO_PHYS(m);
 		if (pa < low || pa + PAGE_SIZE > high)
 			continue;
 		if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			continue;
 		}
 		object = m->object;
 		if ((!VM_OBJECT_TRYWLOCK(object) &&
 		    (!vm_pageout_fallback_object_lock(m, &next) ||
 		    m->hold_count != 0)) || vm_page_busied(m)) {
 			vm_page_unlock(m);
 			VM_OBJECT_WUNLOCK(object);
 			continue;
 		}
 		vm_page_test_dirty(m);
 		if (m->dirty == 0 && object->ref_count != 0)
 			pmap_remove_all(m);
 		if (m->dirty != 0) {
 			vm_page_unlock(m);
 			if (tries == 0 || (object->flags & OBJ_DEAD) != 0) {
 				VM_OBJECT_WUNLOCK(object);
 				continue;
 			}
 			if (object->type == OBJT_VNODE) {
 				vm_pagequeue_unlock(pq);
 				vp = object->handle;
 				vm_object_reference_locked(object);
 				VM_OBJECT_WUNLOCK(object);
 				(void)vn_start_write(vp, &mp, V_WAIT);
 				lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
 				    LK_SHARED : LK_EXCLUSIVE;
 				vn_lock(vp, lockmode | LK_RETRY);
 				VM_OBJECT_WLOCK(object);
 				vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
 				VM_OBJECT_WUNLOCK(object);
 				VOP_UNLOCK(vp, 0);
 				vm_object_deallocate(object);
 				vn_finished_write(mp);
 				return (TRUE);
 			} else if (object->type == OBJT_SWAP ||
 			    object->type == OBJT_DEFAULT) {
 				vm_pagequeue_unlock(pq);
 				m_tmp = m;
 				vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC,
 				    0, NULL, NULL);
 				VM_OBJECT_WUNLOCK(object);
 				return (TRUE);
 			}
 		} else {
 			/*
 			 * Dequeue here to prevent lock recursion in
 			 * vm_page_cache().
 			 */
 			vm_page_dequeue_locked(m);
 			vm_page_cache(m);
 			vm_page_unlock(m);
 		}
 		VM_OBJECT_WUNLOCK(object);
 	}
 	vm_pagequeue_unlock(pq);
 	return (FALSE);
 }
 
 /*
  * Increase the number of cached pages.  The specified value, "tries",
  * determines which categories of pages are cached:
  *
  *  0: All clean, inactive pages within the specified physical address range
  *     are cached.  Will not sleep.
  *  1: The vm_lowmem handlers are called.  All inactive pages within
  *     the specified physical address range are cached.  May sleep.
  *  2: The vm_lowmem handlers are called.  All inactive and active pages
  *     within the specified physical address range are cached.  May sleep.
  */
 void
 vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
 {
 	int actl, actmax, inactl, inactmax, dom, initial_dom;
 	static int start_dom = 0;
 
 	if (tries > 0) {
 		/*
 		 * Decrease registered cache sizes.  The vm_lowmem handlers
 		 * may acquire locks and/or sleep, so they can only be invoked
 		 * when "tries" is greater than zero.
 		 */
 		SDT_PROBE0(vm, , , vm__lowmem_cache);
 		EVENTHANDLER_INVOKE(vm_lowmem, 0);
 
 		/*
 		 * We do this explicitly after the caches have been drained
 		 * above.
 		 */
 		uma_reclaim();
 	}
 
 	/*
 	 * Make the next scan start on the next domain.
 	 */
 	initial_dom = atomic_fetchadd_int(&start_dom, 1) % vm_ndomains;
 
 	inactl = 0;
 	inactmax = vm_cnt.v_inactive_count;
 	actl = 0;
 	actmax = tries < 2 ? 0 : vm_cnt.v_active_count;
 	dom = initial_dom;
 
 	/*
 	 * Scan domains in round-robin order, first inactive queues,
 	 * then active.  Since domain usually owns large physically
 	 * contiguous chunk of memory, it makes sense to completely
 	 * exhaust one domain before switching to next, while growing
 	 * the pool of contiguous physical pages.
 	 *
 	 * Do not even start launder a domain which cannot contain
 	 * the specified address range, as indicated by segments
 	 * constituting the domain.
 	 */
 again:
 	if (inactl < inactmax) {
 		if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs,
 		    low, high) &&
 		    vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_INACTIVE],
 		    tries, low, high)) {
 			inactl++;
 			goto again;
 		}
 		if (++dom == vm_ndomains)
 			dom = 0;
 		if (dom != initial_dom)
 			goto again;
 	}
 	if (actl < actmax) {
 		if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs,
 		    low, high) &&
 		    vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_ACTIVE],
 		      tries, low, high)) {
 			actl++;
 			goto again;
 		}
 		if (++dom == vm_ndomains)
 			dom = 0;
 		if (dom != initial_dom)
 			goto again;
 	}
 }
 
 #if !defined(NO_SWAPPING)
 /*
  *	vm_pageout_object_deactivate_pages
  *
  *	Deactivate enough pages to satisfy the inactive target
  *	requirements.
  *
  *	The object and map must be locked.
  */
 static void
 vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
     long desired)
 {
 	vm_object_t backing_object, object;
 	vm_page_t p;
 	int act_delta, remove_mode;
 
 	VM_OBJECT_ASSERT_LOCKED(first_object);
 	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
 		return;
 	for (object = first_object;; object = backing_object) {
 		if (pmap_resident_count(pmap) <= desired)
 			goto unlock_return;
 		VM_OBJECT_ASSERT_LOCKED(object);
 		if ((object->flags & OBJ_UNMANAGED) != 0 ||
 		    object->paging_in_progress != 0)
 			goto unlock_return;
 
 		remove_mode = 0;
 		if (object->shadow_count > 1)
 			remove_mode = 1;
 		/*
 		 * Scan the object's entire memory queue.
 		 */
 		TAILQ_FOREACH(p, &object->memq, listq) {
 			if (pmap_resident_count(pmap) <= desired)
 				goto unlock_return;
 			if (vm_page_busied(p))
 				continue;
 			PCPU_INC(cnt.v_pdpages);
 			vm_page_lock(p);
 			if (p->wire_count != 0 || p->hold_count != 0 ||
 			    !pmap_page_exists_quick(pmap, p)) {
 				vm_page_unlock(p);
 				continue;
 			}
 			act_delta = pmap_ts_referenced(p);
 			if ((p->aflags & PGA_REFERENCED) != 0) {
 				if (act_delta == 0)
 					act_delta = 1;
 				vm_page_aflag_clear(p, PGA_REFERENCED);
 			}
 			if (p->queue != PQ_ACTIVE && act_delta != 0) {
 				vm_page_activate(p);
 				p->act_count += act_delta;
 			} else if (p->queue == PQ_ACTIVE) {
 				if (act_delta == 0) {
 					p->act_count -= min(p->act_count,
 					    ACT_DECLINE);
 					if (!remove_mode && p->act_count == 0) {
 						pmap_remove_all(p);
 						vm_page_deactivate(p);
 					} else
 						vm_page_requeue(p);
 				} else {
 					vm_page_activate(p);
 					if (p->act_count < ACT_MAX -
 					    ACT_ADVANCE)
 						p->act_count += ACT_ADVANCE;
 					vm_page_requeue(p);
 				}
 			} else if (p->queue == PQ_INACTIVE)
 				pmap_remove_all(p);
 			vm_page_unlock(p);
 		}
 		if ((backing_object = object->backing_object) == NULL)
 			goto unlock_return;
 		VM_OBJECT_RLOCK(backing_object);
 		if (object != first_object)
 			VM_OBJECT_RUNLOCK(object);
 	}
 unlock_return:
 	if (object != first_object)
 		VM_OBJECT_RUNLOCK(object);
 }
 
 /*
  * deactivate some number of pages in a map, try to do it fairly, but
  * that is really hard to do.
  */
 static void
 vm_pageout_map_deactivate_pages(map, desired)
 	vm_map_t map;
 	long desired;
 {
 	vm_map_entry_t tmpe;
 	vm_object_t obj, bigobj;
 	int nothingwired;
 
 	if (!vm_map_trylock(map))
 		return;
 
 	bigobj = NULL;
 	nothingwired = TRUE;
 
 	/*
 	 * first, search out the biggest object, and try to free pages from
 	 * that.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
 				if (obj->shadow_count <= 1 &&
 				    (bigobj == NULL ||
 				     bigobj->resident_page_count < obj->resident_page_count)) {
 					if (bigobj != NULL)
 						VM_OBJECT_RUNLOCK(bigobj);
 					bigobj = obj;
 				} else
 					VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		if (tmpe->wired_count > 0)
 			nothingwired = FALSE;
 		tmpe = tmpe->next;
 	}
 
 	if (bigobj != NULL) {
 		vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired);
 		VM_OBJECT_RUNLOCK(bigobj);
 	}
 	/*
 	 * Next, hunt around for other pages to deactivate.  We actually
 	 * do this search sort of wrong -- .text first is not the best idea.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
 			break;
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL) {
 				VM_OBJECT_RLOCK(obj);
 				vm_pageout_object_deactivate_pages(map->pmap, obj, desired);
 				VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		tmpe = tmpe->next;
 	}
 
 	/*
 	 * Remove all mappings if a process is swapped out, this will free page
 	 * table pages.
 	 */
 	if (desired == 0 && nothingwired) {
 		pmap_remove(vm_map_pmap(map), vm_map_min(map),
 		    vm_map_max(map));
 	}
 
 	vm_map_unlock(map);
 }
 #endif		/* !defined(NO_SWAPPING) */
 
 /*
  * Attempt to acquire all of the necessary locks to launder a page and
  * then call through the clustering layer to PUTPAGES.  Wait a short
  * time for a vnode lock.
  *
  * Requires the page and object lock on entry, releases both before return.
  * Returns 0 on success and an errno otherwise.
  */
 static int
 vm_pageout_clean(vm_page_t m)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int error, lockmode;
 
 	vm_page_assert_locked(m);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	error = 0;
 	vp = NULL;
 	mp = NULL;
 
 	/*
 	 * The object is already known NOT to be dead.   It
 	 * is possible for the vget() to block the whole
 	 * pageout daemon, but the new low-memory handling
 	 * code should prevent it.
 	 *
 	 * We can't wait forever for the vnode lock, we might
 	 * deadlock due to a vn_read() getting stuck in
 	 * vm_wait while holding this vnode.  We skip the 
 	 * vnode if we can't get it in a reasonable amount
 	 * of time.
 	 */
 	if (object->type == OBJT_VNODE) {
 		vm_page_unlock(m);
 		vp = object->handle;
 		if (vp->v_type == VREG &&
 		    vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			mp = NULL;
 			error = EDEADLK;
 			goto unlock_all;
 		}
 		KASSERT(mp != NULL,
 		    ("vp %p with NULL v_mount", vp));
 		vm_object_reference_locked(object);
 		pindex = m->pindex;
 		VM_OBJECT_WUNLOCK(object);
 		lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
 		    LK_SHARED : LK_EXCLUSIVE;
 		if (vget(vp, lockmode | LK_TIMELOCK, curthread)) {
 			vp = NULL;
 			error = EDEADLK;
 			goto unlock_mp;
 		}
 		VM_OBJECT_WLOCK(object);
 		vm_page_lock(m);
 		/*
 		 * While the object and page were unlocked, the page
 		 * may have been:
 		 * (1) moved to a different queue,
 		 * (2) reallocated to a different object,
 		 * (3) reallocated to a different offset, or
 		 * (4) cleaned.
 		 */
 		if (m->queue != PQ_INACTIVE || m->object != object ||
 		    m->pindex != pindex || m->dirty == 0) {
 			vm_page_unlock(m);
 			error = ENXIO;
 			goto unlock_all;
 		}
 
 		/*
 		 * The page may have been busied or held while the object
 		 * and page locks were released.
 		 */
 		if (vm_page_busied(m) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			error = EBUSY;
 			goto unlock_all;
 		}
 	}
 
 	/*
 	 * If a page is dirty, then it is either being washed
 	 * (but not yet cleaned) or it is still in the
 	 * laundry.  If it is still in the laundry, then we
 	 * start the cleaning operation. 
 	 */
 	if (vm_pageout_cluster(m) == 0)
 		error = EIO;
 
 unlock_all:
 	VM_OBJECT_WUNLOCK(object);
 
 unlock_mp:
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (mp != NULL) {
 		if (vp != NULL)
 			vput(vp);
 		vm_object_deallocate(object);
 		vn_finished_write(mp);
 	}
 
 	return (error);
 }
 
 /*
  *	vm_pageout_scan does the dirty work for the pageout daemon.
  *
  *	pass 0 - Update active LRU/deactivate pages
  *	pass 1 - Move inactive to cache or free
  *	pass 2 - Launder dirty pages
  */
 static void
 vm_pageout_scan(struct vm_domain *vmd, int pass)
 {
 	vm_page_t m, next;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
 	long min_scan;
 	int act_delta, addl_page_shortage, deficit, error, maxlaunder, maxscan;
-	int page_shortage, scan_tick, scanned, vnodes_skipped;
+	int page_shortage, scan_tick, scanned, starting_page_shortage;
+	int vnodes_skipped;
 	boolean_t pageout_ok, queues_locked;
 
 	/*
 	 * If we need to reclaim memory ask kernel caches to return
 	 * some.  We rate limit to avoid thrashing.
 	 */
 	if (vmd == &vm_dom[0] && pass > 0 &&
 	    (time_uptime - lowmem_uptime) >= lowmem_period) {
 		/*
 		 * Decrease registered cache sizes.
 		 */
 		SDT_PROBE0(vm, , , vm__lowmem_scan);
 		EVENTHANDLER_INVOKE(vm_lowmem, 0);
 		/*
 		 * We do this explicitly after the caches have been
 		 * drained above.
 		 */
 		uma_reclaim();
 		lowmem_uptime = time_uptime;
 	}
 
 	/*
 	 * The addl_page_shortage is the number of temporarily
 	 * stuck pages in the inactive queue.  In other words, the
 	 * number of pages from the inactive count that should be
 	 * discounted in setting the target for the active queue scan.
 	 */
 	addl_page_shortage = 0;
 
 	/*
 	 * Calculate the number of pages we want to either free or move
 	 * to the cache.
 	 */
 	if (pass > 0) {
 		deficit = atomic_readandclear_int(&vm_pageout_deficit);
 		page_shortage = vm_paging_target() + deficit;
 	} else
 		page_shortage = deficit = 0;
+	starting_page_shortage = page_shortage;
 
 	/*
 	 * maxlaunder limits the number of dirty pages we flush per scan.
 	 * For most systems a smaller value (16 or 32) is more robust under
 	 * extreme memory and disk pressure because any unnecessary writes
 	 * to disk can result in extreme performance degredation.  However,
 	 * systems with excessive dirty pages (especially when MAP_NOSYNC is
 	 * used) will die horribly with limited laundering.  If the pageout
 	 * daemon cannot clean enough pages in the first pass, we let it go
 	 * all out in succeeding passes.
 	 */
 	if ((maxlaunder = vm_max_launder) <= 1)
 		maxlaunder = 1;
 	if (pass > 1)
 		maxlaunder = 10000;
 
 	vnodes_skipped = 0;
 
 	/*
 	 * Start scanning the inactive queue for pages we can move to the
 	 * cache or free.  The scan will stop when the target is reached or
 	 * we have scanned the entire inactive queue.  Note that m->act_count
 	 * is not used to form decisions for the inactive queue, only for the
 	 * active queue.
 	 */
 	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
 	maxscan = pq->pq_cnt;
 	vm_pagequeue_lock(pq);
 	queues_locked = TRUE;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
 	     m != NULL && maxscan-- > 0 && page_shortage > 0;
 	     m = next) {
 		vm_pagequeue_assert_locked(pq);
 		KASSERT(queues_locked, ("unlocked queues"));
 		KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
 
 		PCPU_INC(cnt.v_pdpages);
 		next = TAILQ_NEXT(m, plinks.q);
 
 		/*
 		 * skip marker pages
 		 */
 		if (m->flags & PG_MARKER)
 			continue;
 
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in inactive queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in inactive queue", m));
 
 		/*
 		 * The page or object lock acquisitions fail if the
 		 * page was removed from the queue or moved to a
 		 * different position within the queue.  In either
 		 * case, addl_page_shortage should not be incremented.
 		 */
 		if (!vm_pageout_page_lock(m, &next))
 			goto unlock_page;
 		else if (m->hold_count != 0) {
 			/*
 			 * Held pages are essentially stuck in the
 			 * queue.  So, they ought to be discounted
 			 * from the inactive count.  See the
 			 * calculation of the page_shortage for the
 			 * loop over the active queue below.
 			 */
 			addl_page_shortage++;
 			goto unlock_page;
 		}
 		object = m->object;
 		if (!VM_OBJECT_TRYWLOCK(object)) {
 			if (!vm_pageout_fallback_object_lock(m, &next))
 				goto unlock_object;
 			else if (m->hold_count != 0) {
 				addl_page_shortage++;
 				goto unlock_object;
 			}
 		}
 		if (vm_page_busied(m)) {
 			/*
 			 * Don't mess with busy pages.  Leave them at
 			 * the front of the queue.  Most likely, they
 			 * are being paged out and will leave the
 			 * queue shortly after the scan finishes.  So,
 			 * they ought to be discounted from the
 			 * inactive count.
 			 */
 			addl_page_shortage++;
 unlock_object:
 			VM_OBJECT_WUNLOCK(object);
 unlock_page:
 			vm_page_unlock(m);
 			continue;
 		}
 		KASSERT(m->hold_count == 0, ("Held page %p", m));
 
 		/*
 		 * We unlock the inactive page queue, invalidating the
 		 * 'next' pointer.  Use our marker to remember our
 		 * place.
 		 */
 		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
 		vm_pagequeue_unlock(pq);
 		queues_locked = FALSE;
 
 		/*
 		 * Invalid pages can be easily freed. They cannot be
 		 * mapped, vm_page_free() asserts this.
 		 */
 		if (m->valid == 0)
 			goto free_page;
 
 		/*
 		 * If the page has been referenced and the object is not dead,
 		 * reactivate or requeue the page depending on whether the
 		 * object is mapped.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 		if (object->ref_count != 0) {
 			act_delta += pmap_ts_referenced(m);
 		} else {
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("vm_pageout_scan: page %p is mapped", m));
 		}
 		if (act_delta != 0) {
 			if (object->ref_count != 0) {
 				vm_page_activate(m);
 
 				/*
 				 * Increase the activation count if the page
 				 * was referenced while in the inactive queue.
 				 * This makes it less likely that the page will
 				 * be returned prematurely to the inactive
 				 * queue.
  				 */
 				m->act_count += act_delta + ACT_ADVANCE;
 				goto drop_page;
 			} else if ((object->flags & OBJ_DEAD) == 0)
 				goto requeue_page;
 		}
 
 		/*
 		 * If the page appears to be clean at the machine-independent
 		 * layer, then remove all of its mappings from the pmap in
 		 * anticipation of placing it onto the cache queue.  If,
 		 * however, any of the page's mappings allow write access,
 		 * then the page may still be modified until the last of those
 		 * mappings are removed.
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
 			if (m->dirty == 0)
 				pmap_remove_all(m);
 		}
 
 		if (m->dirty == 0) {
 			/*
 			 * Clean pages can be freed.
 			 */
 free_page:
 			vm_page_free(m);
 			PCPU_INC(cnt.v_dfree);
 			--page_shortage;
 		} else if ((object->flags & OBJ_DEAD) != 0) {
 			/*
 			 * Leave dirty pages from dead objects at the front of
 			 * the queue.  They are being paged out and freed by
 			 * the thread that destroyed the object.  They will
 			 * leave the queue shortly after the scan finishes, so 
 			 * they should be discounted from the inactive count.
 			 */
 			addl_page_shortage++;
 		} else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) {
 			/*
 			 * Dirty pages need to be paged out, but flushing
 			 * a page is extremely expensive versus freeing
 			 * a clean page.  Rather then artificially limiting
 			 * the number of pages we can flush, we instead give
 			 * dirty pages extra priority on the inactive queue
 			 * by forcing them to be cycled through the queue
 			 * twice before being flushed, after which the
 			 * (now clean) page will cycle through once more
 			 * before being freed.  This significantly extends
 			 * the thrash point for a heavily loaded machine.
 			 */
 			m->flags |= PG_WINATCFLS;
 requeue_page:
 			vm_pagequeue_lock(pq);
 			queues_locked = TRUE;
 			vm_page_requeue_locked(m);
 		} else if (maxlaunder > 0) {
 			/*
 			 * We always want to try to flush some dirty pages if
 			 * we encounter them, to keep the system stable.
 			 * Normally this number is small, but under extreme
 			 * pressure where there are insufficient clean pages
 			 * on the inactive queue, we may have to go all out.
 			 */
 
 			if (object->type != OBJT_SWAP &&
 			    object->type != OBJT_DEFAULT)
 				pageout_ok = TRUE;
 			else if (disable_swap_pageouts)
 				pageout_ok = FALSE;
 			else if (defer_swap_pageouts)
 				pageout_ok = vm_page_count_min();
 			else
 				pageout_ok = TRUE;
 			if (!pageout_ok)
 				goto requeue_page;
 			error = vm_pageout_clean(m);
 			/*
 			 * Decrement page_shortage on success to account for
 			 * the (future) cleaned page.  Otherwise we could wind
 			 * up laundering or cleaning too many pages.
 			 */
 			if (error == 0) {
 				page_shortage--;
 				maxlaunder--;
 			} else if (error == EDEADLK) {
 				pageout_lock_miss++;
 				vnodes_skipped++;
 			} else if (error == EBUSY) {
 				addl_page_shortage++;
 			}
 			vm_page_lock_assert(m, MA_NOTOWNED);
 			goto relock_queues;
 		}
 drop_page:
 		vm_page_unlock(m);
 		VM_OBJECT_WUNLOCK(object);
 relock_queues:
 		if (!queues_locked) {
 			vm_pagequeue_lock(pq);
 			queues_locked = TRUE;
 		}
 		next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
 		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	}
 	vm_pagequeue_unlock(pq);
 
 #if !defined(NO_SWAPPING)
 	/*
 	 * Wakeup the swapout daemon if we didn't cache or free the targeted
 	 * number of pages. 
 	 */
 	if (vm_swap_enabled && page_shortage > 0)
 		vm_req_vmdaemon(VM_SWAP_NORMAL);
 #endif
 
 	/*
 	 * Wakeup the sync daemon if we skipped a vnode in a writeable object
 	 * and we didn't cache or free enough pages.
 	 */
 	if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target -
 	    vm_cnt.v_free_min)
 		(void)speedup_syncer();
 
 	/*
+	 * If the inactive queue scan fails repeatedly to meet its
+	 * target, kill the largest process.
+	 */
+	vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
+
+	/*
 	 * Compute the number of pages we want to try to move from the
 	 * active queue to the inactive queue.
 	 */
 	page_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count +
 	    vm_paging_target() + deficit + addl_page_shortage;
 
 	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
 	vm_pagequeue_lock(pq);
 	maxscan = pq->pq_cnt;
 
 	/*
 	 * If we're just idle polling attempt to visit every
 	 * active page within 'update_period' seconds.
 	 */
 	scan_tick = ticks;
 	if (vm_pageout_update_period != 0) {
 		min_scan = pq->pq_cnt;
 		min_scan *= scan_tick - vmd->vmd_last_active_scan;
 		min_scan /= hz * vm_pageout_update_period;
 	} else
 		min_scan = 0;
 	if (min_scan > 0 || (page_shortage > 0 && maxscan > 0))
 		vmd->vmd_last_active_scan = scan_tick;
 
 	/*
 	 * Scan the active queue for pages that can be deactivated.  Update
 	 * the per-page activity counter and use it to identify deactivation
 	 * candidates.
 	 */
 	for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
 	    min_scan || (page_shortage > 0 && scanned < maxscan)); m = next,
 	    scanned++) {
 
 		KASSERT(m->queue == PQ_ACTIVE,
 		    ("vm_pageout_scan: page %p isn't active", m));
 
 		next = TAILQ_NEXT(m, plinks.q);
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in active queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in active queue", m));
 		if (!vm_pageout_page_lock(m, &next)) {
 			vm_page_unlock(m);
 			continue;
 		}
 
 		/*
 		 * The count for pagedaemon pages is done after checking the
 		 * page for eligibility...
 		 */
 		PCPU_INC(cnt.v_pdpages);
 
 		/*
 		 * Check to see "how much" the page has been used.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 
 		/*
 		 * Unlocked object ref count check.  Two races are possible.
 		 * 1) The ref was transitioning to zero and we saw non-zero,
 		 *    the pmap bits will be checked unnecessarily.
 		 * 2) The ref was transitioning to one and we saw zero. 
 		 *    The page lock prevents a new reference to this page so
 		 *    we need not check the reference bits.
 		 */
 		if (m->object->ref_count != 0)
 			act_delta += pmap_ts_referenced(m);
 
 		/*
 		 * Advance or decay the act_count based on recent usage.
 		 */
 		if (act_delta != 0) {
 			m->act_count += ACT_ADVANCE + act_delta;
 			if (m->act_count > ACT_MAX)
 				m->act_count = ACT_MAX;
 		} else
 			m->act_count -= min(m->act_count, ACT_DECLINE);
 
 		/*
 		 * Move this page to the tail of the active or inactive
 		 * queue depending on usage.
 		 */
 		if (m->act_count == 0) {
 			/* Dequeue to avoid later lock recursion. */
 			vm_page_dequeue_locked(m);
 			vm_page_deactivate(m);
 			page_shortage--;
 		} else
 			vm_page_requeue_locked(m);
 		vm_page_unlock(m);
 	}
 	vm_pagequeue_unlock(pq);
 #if !defined(NO_SWAPPING)
 	/*
 	 * Idle process swapout -- run once per second.
 	 */
 	if (vm_swap_idle_enabled) {
 		static long lsec;
 		if (time_second != lsec) {
 			vm_req_vmdaemon(VM_SWAP_IDLE);
 			lsec = time_second;
 		}
 	}
 #endif
-
-	/*
-	 * If we are critically low on one of RAM or swap and low on
-	 * the other, kill the largest process.  However, we avoid
-	 * doing this on the first pass in order to give ourselves a
-	 * chance to flush out dirty vnode-backed pages and to allow
-	 * active pages to be moved to the inactive queue and reclaimed.
-	 */
-	vm_pageout_mightbe_oom(vmd, pass);
 }
 
 static int vm_pageout_oom_vote;
 
 /*
  * The pagedaemon threads randlomly select one to perform the
  * OOM.  Trying to kill processes before all pagedaemons
  * failed to reach free target is premature.
  */
 static void
-vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
+vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+    int starting_page_shortage)
 {
 	int old_vote;
 
-	if (pass <= 1 || !((swap_pager_avail < 64 && vm_page_count_min()) ||
-	    (swap_pager_full && vm_paging_target() > 0))) {
+	if (starting_page_shortage <= 0 || starting_page_shortage !=
+	    page_shortage)
+		vmd->vmd_oom_seq = 0;
+	else
+		vmd->vmd_oom_seq++;
+	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
 		if (vmd->vmd_oom) {
 			vmd->vmd_oom = FALSE;
 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
 		}
 		return;
 	}
 
+	/*
+	 * Do not follow the call sequence until OOM condition is
+	 * cleared.
+	 */
+	vmd->vmd_oom_seq = 0;
+
 	if (vmd->vmd_oom)
 		return;
 
 	vmd->vmd_oom = TRUE;
 	old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1);
 	if (old_vote != vm_ndomains - 1)
 		return;
 
 	/*
 	 * The current pagedaemon thread is the last in the quorum to
 	 * start OOM.  Initiate the selection and signaling of the
 	 * victim.
 	 */
 	vm_pageout_oom(VM_OOM_MEM);
 
 	/*
 	 * After one round of OOM terror, recall our vote.  On the
 	 * next pass, current pagedaemon would vote again if the low
 	 * memory condition is still there, due to vmd_oom being
 	 * false.
 	 */
 	vmd->vmd_oom = FALSE;
 	atomic_subtract_int(&vm_pageout_oom_vote, 1);
 }
 
+/*
+ * The OOM killer is the page daemon's action of last resort when
+ * memory allocation requests have been stalled for a prolonged period
+ * of time because it cannot reclaim memory.  This function computes
+ * the approximate number of physical pages that could be reclaimed if
+ * the specified address space is destroyed.
+ *
+ * Private, anonymous memory owned by the address space is the
+ * principal resource that we expect to recover after an OOM kill.
+ * Since the physical pages mapped by the address space's COW entries
+ * are typically shared pages, they are unlikely to be released and so
+ * they are not counted.
+ *
+ * To get to the point where the page daemon runs the OOM killer, its
+ * efforts to write-back vnode-backed pages may have stalled.  This
+ * could be caused by a memory allocation deadlock in the write path
+ * that might be resolved by an OOM kill.  Therefore, physical pages
+ * belonging to vnode-backed objects are counted, because they might
+ * be freed without being written out first if the address space holds
+ * the last reference to an unlinked vnode.
+ *
+ * Similarly, physical pages belonging to OBJT_PHYS objects are
+ * counted because the address space might hold the last reference to
+ * the object.
+ */
+static long
+vm_pageout_oom_pagecount(struct vmspace *vmspace)
+{
+	vm_map_t map;
+	vm_map_entry_t entry;
+	vm_object_t obj;
+	long res;
+
+	map = &vmspace->vm_map;
+	KASSERT(!map->system_map, ("system map"));
+	sx_assert(&map->lock, SA_LOCKED);
+	res = 0;
+	for (entry = map->header.next; entry != &map->header;
+	    entry = entry->next) {
+		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
+			continue;
+		obj = entry->object.vm_object;
+		if (obj == NULL)
+			continue;
+		if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
+		    obj->ref_count != 1)
+			continue;
+		switch (obj->type) {
+		case OBJT_DEFAULT:
+		case OBJT_SWAP:
+		case OBJT_PHYS:
+		case OBJT_VNODE:
+			res += obj->resident_page_count;
+			break;
+		}
+	}
+	return (res);
+}
+
 void
 vm_pageout_oom(int shortage)
 {
 	struct proc *p, *bigproc;
 	vm_offset_t size, bigsize;
 	struct thread *td;
 	struct vmspace *vm;
 
 	/*
 	 * We keep the process bigproc locked once we find it to keep anyone
 	 * from messing with it; however, there is a possibility of
 	 * deadlock if process B is bigproc and one of it's child processes
 	 * attempts to propagate a signal to B while we are waiting for A's
 	 * lock while walking this list.  To avoid this, we don't block on
 	 * the process lock but just skip a process if it is already locked.
 	 */
 	bigproc = NULL;
 	bigsize = 0;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		int breakout;
 
 		PROC_LOCK(p);
 
 		/*
 		 * If this is a system, protected or killed process, skip it.
 		 */
 		if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
 		    P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 ||
 		    p->p_pid == 1 || P_KILLED(p) ||
 		    (p->p_pid < 48 && swap_pager_avail != 0)) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * If the process is in a non-running type state,
 		 * don't touch it.  Check all the threads individually.
 		 */
 		breakout = 0;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (!TD_ON_RUNQ(td) &&
 			    !TD_IS_RUNNING(td) &&
 			    !TD_IS_SLEEPING(td) &&
-			    !TD_IS_SUSPENDED(td)) {
+			    !TD_IS_SUSPENDED(td) &&
+			    !TD_IS_SWAPPED(td)) {
 				thread_unlock(td);
 				breakout = 1;
 				break;
 			}
 			thread_unlock(td);
 		}
 		if (breakout) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * get the process size
 		 */
 		vm = vmspace_acquire_ref(p);
 		if (vm == NULL) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		_PHOLD(p);
 		if (!vm_map_trylock_read(&vm->vm_map)) {
 			_PRELE(p);
 			PROC_UNLOCK(p);
 			vmspace_free(vm);
 			continue;
 		}
 		PROC_UNLOCK(p);
 		size = vmspace_swap_count(vm);
-		vm_map_unlock_read(&vm->vm_map);
 		if (shortage == VM_OOM_MEM)
-			size += vmspace_resident_count(vm);
+			size += vm_pageout_oom_pagecount(vm);
+		vm_map_unlock_read(&vm->vm_map);
 		vmspace_free(vm);
+
 		/*
-		 * if the this process is bigger than the biggest one
+		 * If this process is bigger than the biggest one,
 		 * remember it.
 		 */
 		if (size > bigsize) {
 			if (bigproc != NULL)
 				PRELE(bigproc);
 			bigproc = p;
 			bigsize = size;
 		} else {
 			PRELE(p);
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	if (bigproc != NULL) {
 		if (vm_panic_on_oom != 0)
 			panic("out of swap space");
 		PROC_LOCK(bigproc);
 		killproc(bigproc, "out of swap space");
 		sched_nice(bigproc, PRIO_MIN);
 		_PRELE(bigproc);
 		PROC_UNLOCK(bigproc);
 		wakeup(&vm_cnt.v_free_count);
 	}
 }
 
 static void
 vm_pageout_worker(void *arg)
 {
 	struct vm_domain *domain;
 	int domidx;
 
 	domidx = (uintptr_t)arg;
 	domain = &vm_dom[domidx];
 
 	/*
 	 * XXXKIB It could be useful to bind pageout daemon threads to
 	 * the cores belonging to the domain, from which vm_page_array
 	 * is allocated.
 	 */
 
 	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
 	domain->vmd_last_active_scan = ticks;
 	vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
 	vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);
 	TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,
 	    &domain->vmd_inacthead, plinks.q);
 
 	/*
 	 * The pageout daemon worker is never done, so loop forever.
 	 */
 	while (TRUE) {
 		/*
 		 * If we have enough free memory, wakeup waiters.  Do
 		 * not clear vm_pages_needed until we reach our target,
 		 * otherwise we may be woken up over and over again and
 		 * waste a lot of cpu.
 		 */
 		mtx_lock(&vm_page_queue_free_mtx);
 		if (vm_pages_needed && !vm_page_count_min()) {
 			if (!vm_paging_needed())
 				vm_pages_needed = 0;
 			wakeup(&vm_cnt.v_free_count);
 		}
 		if (vm_pages_needed) {
 			/*
 			 * We're still not done.  Either vm_pages_needed was
 			 * set by another thread during the previous scan
 			 * (typically, this happens during a level 0 scan) or
 			 * vm_pages_needed was already set and the scan failed
 			 * to free enough pages.  If we haven't yet performed
 			 * a level >= 2 scan (unlimited dirty cleaning), then
 			 * upgrade the level and scan again now.  Otherwise,
 			 * sleep a bit and try again later.  While sleeping,
 			 * vm_pages_needed can be cleared.
 			 */
 			if (domain->vmd_pass > 1)
 				msleep(&vm_pages_needed,
 				    &vm_page_queue_free_mtx, PVM, "psleep",
 				    hz / 2);
 		} else {
 			/*
 			 * Good enough, sleep until required to refresh
 			 * stats.
 			 */
 			msleep(&vm_pages_needed, &vm_page_queue_free_mtx,
 			    PVM, "psleep", hz);
 		}
 		if (vm_pages_needed) {
 			vm_cnt.v_pdwakeups++;
 			domain->vmd_pass++;
 		} else
 			domain->vmd_pass = 0;
 		mtx_unlock(&vm_page_queue_free_mtx);
 		vm_pageout_scan(domain, domain->vmd_pass);
 	}
 }
 
 /*
  *	vm_pageout_init initialises basic pageout daemon settings.
  */
 static void
 vm_pageout_init(void)
 {
 	/*
 	 * Initialize some paging parameters.
 	 */
 	vm_cnt.v_interrupt_free_min = 2;
 	if (vm_cnt.v_page_count < 2000)
 		vm_pageout_page_count = 8;
 
 	/*
 	 * v_free_reserved needs to include enough for the largest
 	 * swap pager structures plus enough for any pv_entry structs
 	 * when paging. 
 	 */
 	if (vm_cnt.v_page_count > 1024)
 		vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;
 	else
 		vm_cnt.v_free_min = 4;
 	vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
 	    vm_cnt.v_interrupt_free_min;
 	vm_cnt.v_free_reserved = vm_pageout_page_count +
 	    vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);
 	vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;
 	vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;
 	vm_cnt.v_free_min += vm_cnt.v_free_reserved;
 	vm_cnt.v_free_severe += vm_cnt.v_free_reserved;
 	vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;
 	if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)
 		vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;
 
 	/*
 	 * Set the default wakeup threshold to be 10% above the minimum
 	 * page limit.  This keeps the steady state out of shortfall.
 	 */
 	vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;
 
 	/*
 	 * Set interval in seconds for active scan.  We want to visit each
 	 * page at least once every ten minutes.  This is to prevent worst
 	 * case paging behaviors with stale active LRU.
 	 */
 	if (vm_pageout_update_period == 0)
 		vm_pageout_update_period = 600;
 
 	/* XXX does not really belong here */
 	if (vm_page_max_wired == 0)
 		vm_page_max_wired = vm_cnt.v_free_count / 3;
 }
 
 /*
  *     vm_pageout is the high level pageout daemon.
  */
 static void
 vm_pageout(void)
 {
 	int error;
 #if MAXMEMDOM > 1
 	int i;
 #endif
 
 	swap_pager_swap_init();
 #if MAXMEMDOM > 1
 	for (i = 1; i < vm_ndomains; i++) {
 		error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
 		    curproc, NULL, 0, 0, "dom%d", i);
 		if (error != 0) {
 			panic("starting pageout for domain %d, error %d\n",
 			    i, error);
 		}
 	}
 #endif
 	error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
 	    0, 0, "uma");
 	if (error != 0)
 		panic("starting uma_reclaim helper, error %d\n", error);
 	vm_pageout_worker((void *)(uintptr_t)0);
 }
 
 /*
  * Unless the free page queue lock is held by the caller, this function
  * should be regarded as advisory.  Specifically, the caller should
  * not msleep() on &vm_cnt.v_free_count following this function unless
  * the free page queue lock is held until the msleep() is performed.
  */
 void
 pagedaemon_wakeup(void)
 {
 
 	if (!vm_pages_needed && curthread->td_proc != pageproc) {
 		vm_pages_needed = 1;
 		wakeup(&vm_pages_needed);
 	}
 }
 
 #if !defined(NO_SWAPPING)
 static void
 vm_req_vmdaemon(int req)
 {
 	static int lastrun = 0;
 
 	mtx_lock(&vm_daemon_mtx);
 	vm_pageout_req_swapout |= req;
 	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
 		wakeup(&vm_daemon_needed);
 		lastrun = ticks;
 	}
 	mtx_unlock(&vm_daemon_mtx);
 }
 
 static void
 vm_daemon(void)
 {
 	struct rlimit rsslim;
 	struct proc *p;
 	struct thread *td;
 	struct vmspace *vm;
 	int breakout, swapout_flags, tryagain, attempts;
 #ifdef RACCT
 	uint64_t rsize, ravailable;
 #endif
 
 	while (TRUE) {
 		mtx_lock(&vm_daemon_mtx);
 		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
 #ifdef RACCT
 		    racct_enable ? hz : 0
 #else
 		    0
 #endif
 		);
 		swapout_flags = vm_pageout_req_swapout;
 		vm_pageout_req_swapout = 0;
 		mtx_unlock(&vm_daemon_mtx);
 		if (swapout_flags)
 			swapout_procs(swapout_flags);
 
 		/*
 		 * scan the processes for exceeding their rlimits or if
 		 * process is swapped out -- deactivate pages
 		 */
 		tryagain = 0;
 		attempts = 0;
 again:
 		attempts++;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			vm_pindex_t limit, size;
 
 			/*
 			 * if this is a system process or if we have already
 			 * looked at this process, skip it.
 			 */
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL ||
 			    p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * if the process is in a non-running type state,
 			 * don't touch it.
 			 */
 			breakout = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
 				thread_lock(td);
 				if (!TD_ON_RUNQ(td) &&
 				    !TD_IS_RUNNING(td) &&
 				    !TD_IS_SLEEPING(td) &&
 				    !TD_IS_SUSPENDED(td)) {
 					thread_unlock(td);
 					breakout = 1;
 					break;
 				}
 				thread_unlock(td);
 			}
 			if (breakout) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * get a limit
 			 */
 			lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
 			limit = OFF_TO_IDX(
 			    qmin(rsslim.rlim_cur, rsslim.rlim_max));
 
 			/*
 			 * let processes that are swapped out really be
 			 * swapped out set the limit to nothing (will force a
 			 * swap-out.)
 			 */
 			if ((p->p_flag & P_INMEM) == 0)
 				limit = 0;	/* XXX */
 			vm = vmspace_acquire_ref(p);
 			PROC_UNLOCK(p);
 			if (vm == NULL)
 				continue;
 
 			size = vmspace_resident_count(vm);
 			if (size >= limit) {
 				vm_pageout_map_deactivate_pages(
 				    &vm->vm_map, limit);
 			}
 #ifdef RACCT
 			if (racct_enable) {
 				rsize = IDX_TO_OFF(size);
 				PROC_LOCK(p);
 				racct_set(p, RACCT_RSS, rsize);
 				ravailable = racct_get_available(p, RACCT_RSS);
 				PROC_UNLOCK(p);
 				if (rsize > ravailable) {
 					/*
 					 * Don't be overly aggressive; this
 					 * might be an innocent process,
 					 * and the limit could've been exceeded
 					 * by some memory hog.  Don't try
 					 * to deactivate more than 1/4th
 					 * of process' resident set size.
 					 */
 					if (attempts <= 8) {
 						if (ravailable < rsize -
 						    (rsize / 4)) {
 							ravailable = rsize -
 							    (rsize / 4);
 						}
 					}
 					vm_pageout_map_deactivate_pages(
 					    &vm->vm_map,
 					    OFF_TO_IDX(ravailable));
 					/* Update RSS usage after paging out. */
 					size = vmspace_resident_count(vm);
 					rsize = IDX_TO_OFF(size);
 					PROC_LOCK(p);
 					racct_set(p, RACCT_RSS, rsize);
 					PROC_UNLOCK(p);
 					if (rsize > ravailable)
 						tryagain = 1;
 				}
 			}
 #endif
 			vmspace_free(vm);
 		}
 		sx_sunlock(&allproc_lock);
 		if (tryagain != 0 && attempts <= 10)
 			goto again;
 	}
 }
 #endif			/* !defined(NO_SWAPPING) */
Index: projects/powernv
===================================================================
--- projects/powernv	(revision 290990)
+++ projects/powernv	(revision 290991)

Property changes on: projects/powernv
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r290829-290990