diff --git a/sys/dev/beri/virtio/network/if_vtbe.c b/sys/dev/beri/virtio/network/if_vtbe.c
index 03853435a9de..5975a4a1c7f3 100644
--- a/sys/dev/beri/virtio/network/if_vtbe.c
+++ b/sys/dev/beri/virtio/network/if_vtbe.c
@@ -1,651 +1,651 @@
 /*-
  * Copyright (c) 2014 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * BERI Virtio Networking Frontend
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/rman.h>
 #include <sys/timeet.h>
 #include <sys/timetc.h>
 #include <sys/endian.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/mdioctl.h>
 #include <sys/conf.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_vlan_var.h>
 
 #include <machine/bus.h>
 #include <machine/fdt.h>
 #include <machine/cpu.h>
 #include <machine/intr.h>
 
 #include <dev/beri/virtio/virtio.h>
 #include <dev/beri/virtio/virtio_mmio_platform.h>
 
 #include <dev/altera/pio/pio.h>
 
 #include <dev/virtio/mmio/virtio_mmio.h>
 #include <dev/virtio/network/virtio_net.h>
 #include <dev/virtio/virtio_ids.h>
 #include <dev/virtio/virtio_config.h>
 #include <dev/virtio/virtio_ring.h>
 
 #include "pio_if.h"
 
 #define	DPRINTF(fmt, args...)	printf(fmt, ##args)
 
 #define	READ4(_sc, _reg) \
 	bus_read_4((_sc)->res[0], _reg)
 #define	WRITE4(_sc, _reg, _val) \
 	bus_write_4((_sc)->res[0], _reg, _val)
 
 #define	VTBE_LOCK(sc)			mtx_lock(&(sc)->mtx)
 #define	VTBE_UNLOCK(sc)			mtx_unlock(&(sc)->mtx)
 #define	VTBE_ASSERT_LOCKED(sc)		mtx_assert(&(sc)->mtx, MA_OWNED);
 #define	VTBE_ASSERT_UNLOCKED(sc)	mtx_assert(&(sc)->mtx, MA_NOTOWNED);
 
 /*
  * Driver data and defines.
  */
 #define	DESC_COUNT	256
 
 struct vtbe_softc {
 	struct resource		*res[2];
 	bus_space_tag_t		bst;
 	bus_space_handle_t	bsh;
 	device_t		dev;
 	struct ifnet		*ifp;
 	int			if_flags;
 	struct mtx		mtx;
 	boolean_t		is_attached;
 
 	int			beri_mem_offset;
 	device_t		pio_send;
 	device_t		pio_recv;
 	int			opened;
 
 	struct vqueue_info	vs_queues[2];
 	int			vs_curq;
 	int			hdrsize;
 };
 
 static struct resource_spec vtbe_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
 	{ -1, 0 }
 };
 
 static void vtbe_txfinish_locked(struct vtbe_softc *sc);
 static void vtbe_rxfinish_locked(struct vtbe_softc *sc);
 static void vtbe_stop_locked(struct vtbe_softc *sc);
 static int pio_enable_irq(struct vtbe_softc *sc, int enable);
 
 static void
 vtbe_txstart_locked(struct vtbe_softc *sc)
 {
 	struct iovec iov[DESC_COUNT];
 	struct virtio_net_hdr *vnh;
 	struct vqueue_info *vq;
 	struct iovec *tiov;
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct uio uio;
 	int enqueued;
 	int iolen;
 	int error;
 	int reg;
 	int len;
 	int n;
 
 	VTBE_ASSERT_LOCKED(sc);
 
 	/* RX queue */
 	vq = &sc->vs_queues[0];
 	if (!vq_has_descs(vq)) {
 		return;
 	}
 
 	ifp = sc->ifp;
 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE) {
 		return;
 	}
 
 	enqueued = 0;
 
 	if (!vq_ring_ready(vq))
 		return;
 
 	vq->vq_save_used = be16toh(vq->vq_used->idx);
 
 	for (;;) {
 		if (!vq_has_descs(vq)) {
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL) {
 			break;
 		}
 
 		n = vq_getchain(sc->beri_mem_offset, vq, iov,
 			DESC_COUNT, NULL);
 		KASSERT(n == 2,
 			("Unexpected amount of descriptors (%d)", n));
 
 		tiov = getcopy(iov, n);
 		vnh = iov[0].iov_base;
 		memset(vnh, 0, sc->hdrsize);
 
 		len = iov[1].iov_len;
 		uio.uio_resid = len;
 		uio.uio_iov = &tiov[1];
 		uio.uio_segflg = UIO_SYSSPACE;
 		uio.uio_iovcnt = 1;
 		uio.uio_offset = 0;
 		uio.uio_rw = UIO_READ;
 
 		error = m_mbuftouio(&uio, m, 0);
 		if (error)
 			panic("m_mbuftouio failed\n");
 
 		iolen = (len - uio.uio_resid + sc->hdrsize);
 
 		free(tiov, M_DEVBUF);
 		vq_relchain(vq, iov, n, iolen);
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 
 		BPF_MTAP(ifp, m);
 		m_freem(m);
 
 		++enqueued;
 	}
 
 	if (enqueued != 0) {
 		reg = htobe32(VIRTIO_MMIO_INT_VRING);
 		WRITE4(sc, VIRTIO_MMIO_INTERRUPT_STATUS, reg);
 
 		PIO_SET(sc->pio_send, Q_INTR, 1);
 	}
 }
 
 static void
 vtbe_txstart(struct ifnet *ifp)
 {
 	struct vtbe_softc *sc = ifp->if_softc;
 
 	VTBE_LOCK(sc);
 	vtbe_txstart_locked(sc);
 	VTBE_UNLOCK(sc);
 }
 
 static void
 vtbe_stop_locked(struct vtbe_softc *sc)
 {
 	struct ifnet *ifp;
 
 	VTBE_ASSERT_LOCKED(sc);
 
 	ifp = sc->ifp;
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 }
 
 static void
 vtbe_init_locked(struct vtbe_softc *sc)
 {
 	struct ifnet *ifp = sc->ifp;
 
 	VTBE_ASSERT_LOCKED(sc);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 }
 
 static void
 vtbe_init(void *if_softc)
 {
 	struct vtbe_softc *sc = if_softc;
 
 	VTBE_LOCK(sc);
 	vtbe_init_locked(sc);
 	VTBE_UNLOCK(sc);
 }
 
 static int
 vtbe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifmediareq *ifmr;
 	struct vtbe_softc *sc;
 	struct ifreq *ifr;
 	int mask, error;
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *)data;
 
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		VTBE_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			pio_enable_irq(sc, 1);
 
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 				vtbe_init_locked(sc);
 			}
 		} else {
 			pio_enable_irq(sc, 0);
 
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				vtbe_stop_locked(sc);
 			}
 		}
 		sc->if_flags = ifp->if_flags;
 		VTBE_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		ifmr = (struct ifmediareq *)data;
 		ifmr->ifm_count = 1;
 		ifmr->ifm_status = (IFM_AVALID | IFM_ACTIVE);
 		ifmr->ifm_active = (IFM_ETHER | IFM_10G_T | IFM_FDX);
 		ifmr->ifm_current = ifmr->ifm_active;
 		break;
 	case SIOCSIFCAP:
 		mask = ifp->if_capenable ^ ifr->ifr_reqcap;
 		if (mask & IFCAP_VLAN_MTU) {
 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
 		}
 		break;
 
 	case SIOCSIFADDR:
 		pio_enable_irq(sc, 1);
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 static void
 vtbe_txfinish_locked(struct vtbe_softc *sc)
 {
 	struct ifnet *ifp;
 
 	VTBE_ASSERT_LOCKED(sc);
 
 	ifp = sc->ifp;
 }
 
 static int
 vq_init(struct vtbe_softc *sc)
 {
 	struct vqueue_info *vq;
 	uint8_t *base;
 	int size;
 	int reg;
 	int pfn;
 
 	vq = &sc->vs_queues[sc->vs_curq];
 	vq->vq_qsize = DESC_COUNT;
 
 	reg = READ4(sc, VIRTIO_MMIO_QUEUE_PFN);
 	pfn = be32toh(reg);
 	vq->vq_pfn = pfn;
 
 	size = vring_size(vq->vq_qsize, VRING_ALIGN);
 	base = paddr_map(sc->beri_mem_offset,
 		(pfn << PAGE_SHIFT), size);
 
 	/* First pages are descriptors */
 	vq->vq_desc = (struct vring_desc *)base;
 	base += vq->vq_qsize * sizeof(struct vring_desc);
 
 	/* Then avail ring */
 	vq->vq_avail = (struct vring_avail *)base;
 	base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
 
 	/* Then it's rounded up to the next page */
 	base = (uint8_t *)roundup2((uintptr_t)base, VRING_ALIGN);
 
 	/* And the last pages are the used ring */
 	vq->vq_used = (struct vring_used *)base;
 
 	/* Mark queue as allocated, and start at 0 when we use it. */
 	vq->vq_flags = VQ_ALLOC;
 	vq->vq_last_avail = 0;
 
 	return (0);
 }
 
 static void
 vtbe_proc_rx(struct vtbe_softc *sc, struct vqueue_info *vq)
 {
 	struct iovec iov[DESC_COUNT];
 	struct iovec *tiov;
 	struct ifnet *ifp;
 	struct uio uio;
 	struct mbuf *m;
 	int iolen;
 	int i;
 	int n;
 
 	ifp = sc->ifp;
 
 	n = vq_getchain(sc->beri_mem_offset, vq, iov,
 		DESC_COUNT, NULL);
 
 	KASSERT(n >= 1 && n <= DESC_COUNT,
 		("wrong n %d", n));
 
 	tiov = getcopy(iov, n);
 
 	iolen = 0;
 	for (i = 1; i < n; i++) {
 		iolen += iov[i].iov_len;
 	}
 
 	uio.uio_resid = iolen;
 	uio.uio_iov = &tiov[1];
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_iovcnt = (n - 1);
 	uio.uio_rw = UIO_WRITE;
 
 	if ((m = m_uiotombuf(&uio, M_NOWAIT, 0, ETHER_ALIGN,
 	    M_PKTHDR)) == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto done;
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 
 	CURVNET_SET(ifp->if_vnet);
 	VTBE_UNLOCK(sc);
 	(*ifp->if_input)(ifp, m);
 	VTBE_LOCK(sc);
 	CURVNET_RESTORE();
 
 done:
 	free(tiov, M_DEVBUF);
 	vq_relchain(vq, iov, n, iolen + sc->hdrsize);
 }
 
 static void
 vtbe_rxfinish_locked(struct vtbe_softc *sc)
 {
 	struct vqueue_info *vq;
 	int reg;
 
 	/* TX queue */
 	vq = &sc->vs_queues[1];
 	if (!vq_ring_ready(vq))
 		return;
 
 	/* Process new descriptors */
 	vq->vq_save_used = be16toh(vq->vq_used->idx);
 
 	while (vq_has_descs(vq)) {
 		vtbe_proc_rx(sc, vq);
 	}
 
 	/* Interrupt the other side */
 	reg = htobe32(VIRTIO_MMIO_INT_VRING);
 	WRITE4(sc, VIRTIO_MMIO_INTERRUPT_STATUS, reg);
 
 	PIO_SET(sc->pio_send, Q_INTR, 1);
 }
 
 static void
 vtbe_intr(void *arg)
 {
 	struct vtbe_softc *sc;
 	int pending;
 	uint32_t reg;
 
 	sc = arg;
 
 	VTBE_LOCK(sc);
 
 	reg = PIO_READ(sc->pio_recv);
 
 	/* Ack */
 	PIO_SET(sc->pio_recv, reg, 0);
 
 	pending = htobe32(reg);
 	if (pending & Q_SEL) {
 		reg = READ4(sc, VIRTIO_MMIO_QUEUE_SEL);
 		sc->vs_curq = be32toh(reg);
 	}
 
 	if (pending & Q_PFN) {
 		vq_init(sc);
 	}
 
 	if (pending & Q_NOTIFY) {
 		/* beri rx / arm tx notify */
 		vtbe_txfinish_locked(sc);
 	}
 
 	if (pending & Q_NOTIFY1) {
 		vtbe_rxfinish_locked(sc);
 	}
 
 	VTBE_UNLOCK(sc);
 }
 
 static int
 vtbe_get_hwaddr(struct vtbe_softc *sc, uint8_t *hwaddr)
 {
 	int rnd;
 
 	/*
 	 * Generate MAC address, use 'bsd' + random 24 low-order bits.
 	 */
 
 	rnd = arc4random() & 0x00ffffff;
 
 	hwaddr[0] = 'b';
 	hwaddr[1] = 's';
 	hwaddr[2] = 'd';
 	hwaddr[3] = rnd >> 16;
 	hwaddr[4] = rnd >>  8;
 	hwaddr[5] = rnd >>  0;
 
 	return (0);
 }
 
 static int
 pio_enable_irq(struct vtbe_softc *sc, int enable)
 {
 
 	/*
 	 * IRQ lines should be disabled while reprogram FPGA core.
 	 */
 
 	if (enable) {
 		if (sc->opened == 0) {
 			sc->opened = 1;
 			PIO_SETUP_IRQ(sc->pio_recv, vtbe_intr, sc);
 		}
 	} else {
 		if (sc->opened == 1) {
 			PIO_TEARDOWN_IRQ(sc->pio_recv);
 			sc->opened = 0;
 		}
 	}
 
 	return (0);
 }
 
 static int
 vtbe_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "sri-cambridge,beri-vtnet"))
 		return (ENXIO);
 
 	device_set_desc(dev, "Virtio BERI Ethernet Controller");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 vtbe_attach(device_t dev)
 {
 	uint8_t macaddr[ETHER_ADDR_LEN];
 	struct vtbe_softc *sc;
 	struct ifnet *ifp;
 	int reg;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	sc->hdrsize = sizeof(struct virtio_net_hdr);
 
 	if (bus_alloc_resources(dev, vtbe_spec, sc->res)) {
 		device_printf(dev, "could not allocate resources\n");
 		return (ENXIO);
 	}
 
 	/* Memory interface */
 	sc->bst = rman_get_bustag(sc->res[0]);
 	sc->bsh = rman_get_bushandle(sc->res[0]);
 
 	mtx_init(&sc->mtx, device_get_nameunit(sc->dev),
 	    MTX_NETWORK_LOCK, MTX_DEF);
 
 	if (setup_offset(dev, &sc->beri_mem_offset) != 0)
 		return (ENXIO);
 	if (setup_pio(dev, "pio-send", &sc->pio_send) != 0)
 		return (ENXIO);
 	if (setup_pio(dev, "pio-recv", &sc->pio_recv) != 0)
 		return (ENXIO);
 
 	/* Setup MMIO */
 
 	/* Specify that we provide network device */
 	reg = htobe32(VIRTIO_ID_NETWORK);
 	WRITE4(sc, VIRTIO_MMIO_DEVICE_ID, reg);
 
 	/* The number of desc we support */
 	reg = htobe32(DESC_COUNT);
 	WRITE4(sc, VIRTIO_MMIO_QUEUE_NUM_MAX, reg);
 
 	/* Our features */
 	reg = htobe32(VIRTIO_NET_F_MAC |
     			VIRTIO_F_NOTIFY_ON_EMPTY);
 	WRITE4(sc, VIRTIO_MMIO_HOST_FEATURES, reg);
 
 	/* Get MAC */
 	if (vtbe_get_hwaddr(sc, macaddr)) {
 		device_printf(sc->dev, "can't get mac\n");
 		return (ENXIO);
 	}
 
 	/* Set up the ethernet interface. */
 	sc->ifp = ifp = if_alloc(IFT_ETHER);
 	ifp->if_baudrate = IF_Gbps(10);
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX |
-			 IFF_MULTICAST | IFF_PROMISC | IFF_NEEDSEPOCH);
+			 IFF_MULTICAST | IFF_PROMISC);
 	ifp->if_capabilities = IFCAP_VLAN_MTU;
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_start = vtbe_txstart;
 	ifp->if_ioctl = vtbe_ioctl;
 	ifp->if_init = vtbe_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, DESC_COUNT - 1);
 	ifp->if_snd.ifq_drv_maxlen = DESC_COUNT - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	/* All ready to run, attach the ethernet interface. */
 	ether_ifattach(ifp, macaddr);
 
 	sc->is_attached = true;
 
 	return (0);
 }
 
 static device_method_t vtbe_methods[] = {
 	DEVMETHOD(device_probe,		vtbe_probe),
 	DEVMETHOD(device_attach,	vtbe_attach),
 
 	{ 0, 0 }
 };
 
 static driver_t vtbe_driver = {
 	"vtbe",
 	vtbe_methods,
 	sizeof(struct vtbe_softc),
 };
 
 static devclass_t vtbe_devclass;
 
 DRIVER_MODULE(vtbe, simplebus, vtbe_driver, vtbe_devclass, 0, 0);
 MODULE_DEPEND(vtbe, ether, 1, 1, 1);
diff --git a/sys/dev/dpaa/if_dtsec.c b/sys/dev/dpaa/if_dtsec.c
index 704aa22eda54..2c6291b07e34 100644
--- a/sys/dev/dpaa/if_dtsec.c
+++ b/sys/dev/dpaa/if_dtsec.c
@@ -1,856 +1,856 @@
 /*-
  * Copyright (c) 2011-2012 Semihalf.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/sockio.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 
 #include "miibus_if.h"
 
 #include <contrib/ncsw/inc/integrations/dpaa_integration_ext.h>
 #include <contrib/ncsw/inc/Peripherals/fm_mac_ext.h>
 #include <contrib/ncsw/inc/Peripherals/fm_port_ext.h>
 #include <contrib/ncsw/inc/xx_ext.h>
 
 #include "fman.h"
 #include "if_dtsec.h"
 #include "if_dtsec_im.h"
 #include "if_dtsec_rm.h"
 
 #define	DTSEC_MIN_FRAME_SIZE	64
 #define	DTSEC_MAX_FRAME_SIZE	9600
 
 #define	DTSEC_REG_MAXFRM	0x110
 
 /**
  * @group dTSEC private defines.
  * @{
  */
 /**
  * dTSEC FMan MAC exceptions info struct.
  */
 struct dtsec_fm_mac_ex_str {
 	const int num;
 	const char *str;
 };
 /** @} */
 
 
 /**
  * @group FMan MAC routines.
  * @{
  */
 #define	DTSEC_MAC_EXCEPTIONS_END	(-1)
 
 /**
  * FMan MAC exceptions.
  */
 static const struct dtsec_fm_mac_ex_str dtsec_fm_mac_exceptions[] = {
 	{ e_FM_MAC_EX_10G_MDIO_SCAN_EVENTMDIO, "MDIO scan event" },
 	{ e_FM_MAC_EX_10G_MDIO_CMD_CMPL, "MDIO command completion" },
 	{ e_FM_MAC_EX_10G_REM_FAULT, "Remote fault" },
 	{ e_FM_MAC_EX_10G_LOC_FAULT, "Local fault" },
 	{ e_FM_MAC_EX_10G_1TX_ECC_ER, "Transmit frame ECC error" },
 	{ e_FM_MAC_EX_10G_TX_FIFO_UNFL, "Transmit FIFO underflow" },
 	{ e_FM_MAC_EX_10G_TX_FIFO_OVFL, "Receive FIFO overflow" },
 	{ e_FM_MAC_EX_10G_TX_ER, "Transmit frame error" },
 	{ e_FM_MAC_EX_10G_RX_FIFO_OVFL, "Receive FIFO overflow" },
 	{ e_FM_MAC_EX_10G_RX_ECC_ER, "Receive frame ECC error" },
 	{ e_FM_MAC_EX_10G_RX_JAB_FRM, "Receive jabber frame" },
 	{ e_FM_MAC_EX_10G_RX_OVRSZ_FRM, "Receive oversized frame" },
 	{ e_FM_MAC_EX_10G_RX_RUNT_FRM, "Receive runt frame" },
 	{ e_FM_MAC_EX_10G_RX_FRAG_FRM, "Receive fragment frame" },
 	{ e_FM_MAC_EX_10G_RX_LEN_ER, "Receive payload length error" },
 	{ e_FM_MAC_EX_10G_RX_CRC_ER, "Receive CRC error" },
 	{ e_FM_MAC_EX_10G_RX_ALIGN_ER, "Receive alignment error" },
 	{ e_FM_MAC_EX_1G_BAB_RX, "Babbling receive error" },
 	{ e_FM_MAC_EX_1G_RX_CTL, "Receive control (pause frame) interrupt" },
 	{ e_FM_MAC_EX_1G_GRATEFUL_TX_STP_COMPLET, "Graceful transmit stop "
 	    "complete" },
 	{ e_FM_MAC_EX_1G_BAB_TX, "Babbling transmit error" },
 	{ e_FM_MAC_EX_1G_TX_CTL, "Transmit control (pause frame) interrupt" },
 	{ e_FM_MAC_EX_1G_TX_ERR, "Transmit error" },
 	{ e_FM_MAC_EX_1G_LATE_COL, "Late collision" },
 	{ e_FM_MAC_EX_1G_COL_RET_LMT, "Collision retry limit" },
 	{ e_FM_MAC_EX_1G_TX_FIFO_UNDRN, "Transmit FIFO underrun" },
 	{ e_FM_MAC_EX_1G_MAG_PCKT, "Magic Packet detected when dTSEC is in "
 	    "Magic Packet detection mode" },
 	{ e_FM_MAC_EX_1G_MII_MNG_RD_COMPLET, "MII management read completion" },
 	{ e_FM_MAC_EX_1G_MII_MNG_WR_COMPLET, "MII management write completion" },
 	{ e_FM_MAC_EX_1G_GRATEFUL_RX_STP_COMPLET, "Graceful receive stop "
 	    "complete" },
 	{ e_FM_MAC_EX_1G_TX_DATA_ERR, "Internal data error on transmit" },
 	{ e_FM_MAC_EX_1G_RX_DATA_ERR, "Internal data error on receive" },
 	{ e_FM_MAC_EX_1G_1588_TS_RX_ERR, "Time-Stamp Receive Error" },
 	{ e_FM_MAC_EX_1G_RX_MIB_CNT_OVFL, "MIB counter overflow" },
 	{ DTSEC_MAC_EXCEPTIONS_END, "" }
 };
 
 static const char *
 dtsec_fm_mac_ex_to_str(e_FmMacExceptions exception)
 {
 	int i;
 
 	for (i = 0; dtsec_fm_mac_exceptions[i].num != exception &&
 	    dtsec_fm_mac_exceptions[i].num != DTSEC_MAC_EXCEPTIONS_END; ++i)
 		;
 
 	if (dtsec_fm_mac_exceptions[i].num == DTSEC_MAC_EXCEPTIONS_END)
 		return ("<Unknown Exception>");
 
 	return (dtsec_fm_mac_exceptions[i].str);
 }
 
 static void
 dtsec_fm_mac_mdio_event_callback(t_Handle h_App,
     e_FmMacExceptions exception)
 {
 	struct dtsec_softc *sc;
 
 	sc = h_App;
 	device_printf(sc->sc_dev, "MDIO event %i: %s.\n", exception,
 	    dtsec_fm_mac_ex_to_str(exception));
 }
 
 static void
 dtsec_fm_mac_exception_callback(t_Handle app, e_FmMacExceptions exception)
 {
 	struct dtsec_softc *sc;
 
 	sc = app;
 	device_printf(sc->sc_dev, "MAC exception %i: %s.\n", exception,
 	    dtsec_fm_mac_ex_to_str(exception));
 }
 
 static void
 dtsec_fm_mac_free(struct dtsec_softc *sc)
 {
 	if (sc->sc_mach == NULL)
 		return;
 
 	FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX);
 	FM_MAC_Free(sc->sc_mach);
 	sc->sc_mach = NULL;
 }
 
 static int
 dtsec_fm_mac_init(struct dtsec_softc *sc, uint8_t *mac)
 {
 	t_FmMacParams params;
 	t_Error error;
 
 	memset(&params, 0, sizeof(params));
 	memcpy(&params.addr, mac, sizeof(params.addr));
 
 	params.baseAddr = rman_get_bushandle(sc->sc_mem);
 	params.enetMode = sc->sc_mac_enet_mode;
 	params.macId = sc->sc_eth_id;
 	params.mdioIrq = sc->sc_mac_mdio_irq;
 	params.f_Event = dtsec_fm_mac_mdio_event_callback;
 	params.f_Exception = dtsec_fm_mac_exception_callback;
 	params.h_App = sc;
 	params.h_Fm = sc->sc_fmh;
 
 	sc->sc_mach = FM_MAC_Config(&params);
 	if (sc->sc_mach == NULL) {
 		device_printf(sc->sc_dev, "couldn't configure FM_MAC module.\n"
 		    );
 		return (ENXIO);
 	}
 
 	error = FM_MAC_ConfigResetOnInit(sc->sc_mach, TRUE);
 	if (error != E_OK) {
 		device_printf(sc->sc_dev, "couldn't enable reset on init "
 		    "feature.\n");
 		dtsec_fm_mac_free(sc);
 		return (ENXIO);
 	}
 
 	/* Do not inform about pause frames */
 	error = FM_MAC_ConfigException(sc->sc_mach, e_FM_MAC_EX_1G_RX_CTL,
 	    FALSE);
 	if (error != E_OK) {
 		device_printf(sc->sc_dev, "couldn't disable pause frames "
 			"exception.\n");
 		dtsec_fm_mac_free(sc);
 		return (ENXIO);
 	}
 
 	error = FM_MAC_Init(sc->sc_mach);
 	if (error != E_OK) {
 		device_printf(sc->sc_dev, "couldn't initialize FM_MAC module."
 		    "\n");
 		dtsec_fm_mac_free(sc);
 		return (ENXIO);
 	}
 
 	return (0);
 }
 /** @} */
 
 
 /**
  * @group FMan PORT routines.
  * @{
  */
 static const char *
 dtsec_fm_port_ex_to_str(e_FmPortExceptions exception)
 {
 
 	switch (exception) {
 	case e_FM_PORT_EXCEPTION_IM_BUSY:
 		return ("IM: RX busy");
 	default:
 		return ("<Unknown Exception>");
 	}
 }
 
 void
 dtsec_fm_port_rx_exception_callback(t_Handle app,
     e_FmPortExceptions exception)
 {
 	struct dtsec_softc *sc;
 
 	sc = app;
 	device_printf(sc->sc_dev, "RX exception: %i: %s.\n", exception,
 	    dtsec_fm_port_ex_to_str(exception));
 }
 
 void
 dtsec_fm_port_tx_exception_callback(t_Handle app,
     e_FmPortExceptions exception)
 {
 	struct dtsec_softc *sc;
 
 	sc = app;
 	device_printf(sc->sc_dev, "TX exception: %i: %s.\n", exception,
 	    dtsec_fm_port_ex_to_str(exception));
 }
 
 e_FmPortType
 dtsec_fm_port_rx_type(enum eth_dev_type type)
 {
 	switch (type) {
 	case ETH_DTSEC:
 		return (e_FM_PORT_TYPE_RX);
 	case ETH_10GSEC:
 		return (e_FM_PORT_TYPE_RX_10G);
 	default:
 		return (e_FM_PORT_TYPE_DUMMY);
 	}
 }
 
 e_FmPortType
 dtsec_fm_port_tx_type(enum eth_dev_type type)
 {
 
 	switch (type) {
 	case ETH_DTSEC:
 		return (e_FM_PORT_TYPE_TX);
 	case ETH_10GSEC:
 		return (e_FM_PORT_TYPE_TX_10G);
 	default:
 		return (e_FM_PORT_TYPE_DUMMY);
 	}
 }
 
 static void
 dtsec_fm_port_free_both(struct dtsec_softc *sc)
 {
 	if (sc->sc_rxph) {
 		FM_PORT_Free(sc->sc_rxph);
 		sc->sc_rxph = NULL;
 	}
 
 	if (sc->sc_txph) {
 		FM_PORT_Free(sc->sc_txph);
 		sc->sc_txph = NULL;
 	}
 }
 /** @} */
 
 
 /**
  * @group IFnet routines.
  * @{
  */
 static int
 dtsec_set_mtu(struct dtsec_softc *sc, unsigned int mtu)
 {
 
 	mtu += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
 
 	DTSEC_LOCK_ASSERT(sc);
 
 	if (mtu >= DTSEC_MIN_FRAME_SIZE && mtu <= DTSEC_MAX_FRAME_SIZE) {
 		bus_write_4(sc->sc_mem, DTSEC_REG_MAXFRM, mtu);
 		return (mtu);
 	}
 
 	return (0);
 }
 
 static int
 dtsec_if_enable_locked(struct dtsec_softc *sc)
 {
 	int error;
 
 	DTSEC_LOCK_ASSERT(sc);
 
 	error = FM_MAC_Enable(sc->sc_mach, e_COMM_MODE_RX_AND_TX);
 	if (error != E_OK)
 		return (EIO);
 
 	error = FM_PORT_Enable(sc->sc_rxph);
 	if (error != E_OK)
 		return (EIO);
 
 	error = FM_PORT_Enable(sc->sc_txph);
 	if (error != E_OK)
 		return (EIO);
 
 	sc->sc_ifnet->if_drv_flags |= IFF_DRV_RUNNING;
 
 	/* Refresh link state */
 	dtsec_miibus_statchg(sc->sc_dev);
 
 	return (0);
 }
 
 static int
 dtsec_if_disable_locked(struct dtsec_softc *sc)
 {
 	int error;
 
 	DTSEC_LOCK_ASSERT(sc);
 
 	error = FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX);
 	if (error != E_OK)
 		return (EIO);
 
 	error = FM_PORT_Disable(sc->sc_rxph);
 	if (error != E_OK)
 		return (EIO);
 
 	error = FM_PORT_Disable(sc->sc_txph);
 	if (error != E_OK)
 		return (EIO);
 
 	sc->sc_ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	return (0);
 }
 
 static int
 dtsec_if_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct dtsec_softc *sc;
 	struct ifreq *ifr;
 	int error;
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *)data;
 	error = 0;
 
 	/* Basic functionality to achieve media status reports */
 	switch (command) {
 	case SIOCSIFMTU:
 		DTSEC_LOCK(sc);
 		if (dtsec_set_mtu(sc, ifr->ifr_mtu))
 			ifp->if_mtu = ifr->ifr_mtu;
 		else
 			error = EINVAL;
 		DTSEC_UNLOCK(sc);
 		break;
 	case SIOCSIFFLAGS:
 		DTSEC_LOCK(sc);
 
 		if (sc->sc_ifnet->if_flags & IFF_UP)
 			error = dtsec_if_enable_locked(sc);
 		else
 			error = dtsec_if_disable_locked(sc);
 
 		DTSEC_UNLOCK(sc);
 		break;
 
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media,
 		    command);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 	}
 
 	return (error);
 }
 
 static void
 dtsec_if_tick(void *arg)
 {
 	struct dtsec_softc *sc;
 
 	sc = arg;
 
 	/* TODO */
 	DTSEC_LOCK(sc);
 
 	mii_tick(sc->sc_mii);
 	callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc);
 
 	DTSEC_UNLOCK(sc);
 }
 
 static void
 dtsec_if_deinit_locked(struct dtsec_softc *sc)
 {
 
 	DTSEC_LOCK_ASSERT(sc);
 
 	DTSEC_UNLOCK(sc);
 	callout_drain(&sc->sc_tick_callout);
 	DTSEC_LOCK(sc);
 }
 
 static void
 dtsec_if_init_locked(struct dtsec_softc *sc)
 {
 	int error;
 
 	DTSEC_LOCK_ASSERT(sc);
 
 	/* Set MAC address */
 	error = FM_MAC_ModifyMacAddr(sc->sc_mach,
 	    (t_EnetAddr *)IF_LLADDR(sc->sc_ifnet));
 	if (error != E_OK) {
 		device_printf(sc->sc_dev, "couldn't set MAC address.\n");
 		goto err;
 	}
 
 	/* Start MII polling */
 	if (sc->sc_mii)
 		callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc);
 
 	if (sc->sc_ifnet->if_flags & IFF_UP) {
 		error = dtsec_if_enable_locked(sc);
 		if (error != 0)
 			goto err;
 	} else {
 		error = dtsec_if_disable_locked(sc);
 		if (error != 0)
 			goto err;
 	}
 
 	return;
 
 err:
 	dtsec_if_deinit_locked(sc);
 	device_printf(sc->sc_dev, "initialization error.\n");
 	return;
 }
 
 static void
 dtsec_if_init(void *data)
 {
 	struct dtsec_softc *sc;
 
 	sc = data;
 
 	DTSEC_LOCK(sc);
 	dtsec_if_init_locked(sc);
 	DTSEC_UNLOCK(sc);
 }
 
 static void
 dtsec_if_start(struct ifnet *ifp)
 {
 	struct dtsec_softc *sc;
 
 	sc = ifp->if_softc;
 	DTSEC_LOCK(sc);
 	sc->sc_start_locked(sc);
 	DTSEC_UNLOCK(sc);
 }
 
 static void
 dtsec_if_watchdog(struct ifnet *ifp)
 {
 	/* TODO */
 }
 /** @} */
 
 
 /**
  * @group IFmedia routines.
  * @{
  */
 static int
 dtsec_ifmedia_upd(struct ifnet *ifp)
 {
 	struct dtsec_softc *sc = ifp->if_softc;
 
 	DTSEC_LOCK(sc);
 	mii_mediachg(sc->sc_mii);
 	DTSEC_UNLOCK(sc);
 
 	return (0);
 }
 
 static void
 dtsec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct dtsec_softc *sc = ifp->if_softc;
 
 	DTSEC_LOCK(sc);
 
 	mii_pollstat(sc->sc_mii);
 
 	ifmr->ifm_active = sc->sc_mii->mii_media_active;
 	ifmr->ifm_status = sc->sc_mii->mii_media_status;
 
 	DTSEC_UNLOCK(sc);
 }
 /** @} */
 
 
 /**
  * @group dTSEC bus interface.
  * @{
  */
 static void
 dtsec_configure_mode(struct dtsec_softc *sc)
 {
 	char tunable[64];
 
 	snprintf(tunable, sizeof(tunable), "%s.independent_mode",
 	    device_get_nameunit(sc->sc_dev));
 
 	sc->sc_mode = DTSEC_MODE_REGULAR;
 	TUNABLE_INT_FETCH(tunable, &sc->sc_mode);
 
 	if (sc->sc_mode == DTSEC_MODE_REGULAR) {
 		sc->sc_port_rx_init = dtsec_rm_fm_port_rx_init;
 		sc->sc_port_tx_init = dtsec_rm_fm_port_tx_init;
 		sc->sc_start_locked = dtsec_rm_if_start_locked;
 	} else {
 		sc->sc_port_rx_init = dtsec_im_fm_port_rx_init;
 		sc->sc_port_tx_init = dtsec_im_fm_port_tx_init;
 		sc->sc_start_locked = dtsec_im_if_start_locked;
 	}
 
 	device_printf(sc->sc_dev, "Configured for %s mode.\n",
 	    (sc->sc_mode == DTSEC_MODE_REGULAR) ? "regular" : "independent");
 }
 
 int
 dtsec_attach(device_t dev)
 {
 	struct dtsec_softc *sc;
 	device_t parent;
 	int error;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 
 	parent = device_get_parent(dev);
 	sc->sc_dev = dev;
 	sc->sc_mac_mdio_irq = NO_IRQ;
 
 	/* Check if MallocSmart allocator is ready */
 	if (XX_MallocSmartInit() != E_OK)
 		return (ENXIO);
 
 	/* Init locks */
 	mtx_init(&sc->sc_lock, device_get_nameunit(dev),
 	    "DTSEC Global Lock", MTX_DEF);
 
 	mtx_init(&sc->sc_mii_lock, device_get_nameunit(dev),
 	    "DTSEC MII Lock", MTX_DEF);
 
 	/* Init callouts */
 	callout_init(&sc->sc_tick_callout, CALLOUT_MPSAFE);
 
 	/* Read configuraton */
 	if ((error = fman_get_handle(parent, &sc->sc_fmh)) != 0)
 		return (error);
 
 	if ((error = fman_get_muram_handle(parent, &sc->sc_muramh)) != 0)
 		return (error);
 
 	if ((error = fman_get_bushandle(parent, &sc->sc_fm_base)) != 0)
 		return (error);
 
 	/* Configure working mode */
 	dtsec_configure_mode(sc);
 
 	/* If we are working in regular mode configure BMAN and QMAN */
 	if (sc->sc_mode == DTSEC_MODE_REGULAR) {
 		/* Create RX buffer pool */
 		error = dtsec_rm_pool_rx_init(sc);
 		if (error != 0)
 			return (EIO);
 
 		/* Create RX frame queue range */
 		error = dtsec_rm_fqr_rx_init(sc);
 		if (error != 0)
 			return (EIO);
 
 		/* Create frame info pool */
 		error = dtsec_rm_fi_pool_init(sc);
 		if (error != 0)
 			return (EIO);
 
 		/* Create TX frame queue range */
 		error = dtsec_rm_fqr_tx_init(sc);
 		if (error != 0)
 			return (EIO);
 	}
 
 	/* Init FMan MAC module. */
 	error = dtsec_fm_mac_init(sc, sc->sc_mac_addr);
 	if (error != 0) {
 		dtsec_detach(dev);
 		return (ENXIO);
 	}
 
 	/* Init FMan TX port */
 	error = sc->sc_port_tx_init(sc, device_get_unit(sc->sc_dev));
 	if (error != 0) {
 		dtsec_detach(dev);
 		return (ENXIO);
 	}
 
 	/* Init FMan RX port */
 	error = sc->sc_port_rx_init(sc, device_get_unit(sc->sc_dev));
 	if (error != 0) {
 		dtsec_detach(dev);
 		return (ENXIO);
 	}
 
 	/* Create network interface for upper layers */
 	ifp = sc->sc_ifnet = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(sc->sc_dev, "if_alloc() failed.\n");
 		dtsec_detach(dev);
 		return (ENOMEM);
 	}
 
 	ifp->if_softc = sc;
 	ifp->if_mtu = ETHERMTU;	/* TODO: Configure */
-	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_NEEDSEPOCH;
+	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST;
 	ifp->if_init = dtsec_if_init;
 	ifp->if_start = dtsec_if_start;
 	ifp->if_ioctl = dtsec_if_ioctl;
 	ifp->if_snd.ifq_maxlen = IFQ_MAXLEN;
 
 	if (sc->sc_phy_addr >= 0)
 		if_initname(ifp, device_get_name(sc->sc_dev),
 		    device_get_unit(sc->sc_dev));
 	else
 		if_initname(ifp, "dtsec_phy", device_get_unit(sc->sc_dev));
 
 	/* TODO */
 #if 0
 	IFQ_SET_MAXLEN(&ifp->if_snd, TSEC_TX_NUM_DESC - 1);
 	ifp->if_snd.ifq_drv_maxlen = TSEC_TX_NUM_DESC - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 #endif
 	ifp->if_capabilities = IFCAP_JUMBO_MTU; /* TODO: HWCSUM */
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/* Attach PHY(s) */
 	error = mii_attach(sc->sc_dev, &sc->sc_mii_dev, ifp, dtsec_ifmedia_upd,
 	    dtsec_ifmedia_sts, BMSR_DEFCAPMASK, sc->sc_phy_addr,
 	    MII_OFFSET_ANY, 0);
 	if (error) {
 		device_printf(sc->sc_dev, "attaching PHYs failed: %d\n", error);
 		dtsec_detach(sc->sc_dev);
 		return (error);
 	}
 	sc->sc_mii = device_get_softc(sc->sc_mii_dev);
 
 	/* Attach to stack */
 	ether_ifattach(ifp, sc->sc_mac_addr);
 
 	return (0);
 }
 
 int
 dtsec_detach(device_t dev)
 {
 	struct dtsec_softc *sc;
 	if_t ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->sc_ifnet;
 
 	if (device_is_attached(dev)) {
 		ether_ifdetach(ifp);
 		/* Shutdown interface */
 		DTSEC_LOCK(sc);
 		dtsec_if_deinit_locked(sc);
 		DTSEC_UNLOCK(sc);
 	}
 
 	if (sc->sc_ifnet) {
 		if_free(sc->sc_ifnet);
 		sc->sc_ifnet = NULL;
 	}
 
 	if (sc->sc_mode == DTSEC_MODE_REGULAR) {
 		/* Free RX/TX FQRs */
 		dtsec_rm_fqr_rx_free(sc);
 		dtsec_rm_fqr_tx_free(sc);
 
 		/* Free frame info pool */
 		dtsec_rm_fi_pool_free(sc);
 
 		/* Free RX buffer pool */
 		dtsec_rm_pool_rx_free(sc);
 	}
 
 	dtsec_fm_mac_free(sc);
 	dtsec_fm_port_free_both(sc);
 
 	/* Destroy lock */
 	mtx_destroy(&sc->sc_lock);
 
 	return (0);
 }
 
 int
 dtsec_suspend(device_t dev)
 {
 
 	return (0);
 }
 
 int
 dtsec_resume(device_t dev)
 {
 
 	return (0);
 }
 
 int
 dtsec_shutdown(device_t dev)
 {
 
 	return (0);
 }
 /** @} */
 
 
 /**
  * @group MII bus interface.
  * @{
  */
 int
 dtsec_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct dtsec_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (MIIBUS_READREG(sc->sc_mdio, phy, reg));
 }
 
 int
 dtsec_miibus_writereg(device_t dev, int phy, int reg, int value)
 {
 
 	struct dtsec_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (MIIBUS_WRITEREG(sc->sc_mdio, phy, reg, value));
 }
 
 void
 dtsec_miibus_statchg(device_t dev)
 {
 	struct dtsec_softc *sc;
 	e_EnetSpeed speed;
 	bool duplex;
 	int error;
 
 	sc = device_get_softc(dev);
 
 	DTSEC_LOCK_ASSERT(sc);
 
 	duplex = ((sc->sc_mii->mii_media_active & IFM_GMASK) == IFM_FDX);
 
 	switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) {
 	case IFM_1000_T:
 	case IFM_1000_SX:
 		speed = e_ENET_SPEED_1000;
 		break;
 
         case IFM_100_TX:
 		speed = e_ENET_SPEED_100;
 		break;
 
         case IFM_10_T:
 		speed = e_ENET_SPEED_10;
 		break;
 
 	default:
 		speed = e_ENET_SPEED_10;
 	}
 
 	error = FM_MAC_AdjustLink(sc->sc_mach, speed, duplex);
 	if (error != E_OK)
 		device_printf(sc->sc_dev, "error while adjusting MAC speed.\n");
 }
 /** @} */
diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c
index bc96775ad553..e9d1b9439671 100644
--- a/sys/dev/hyperv/netvsc/if_hn.c
+++ b/sys/dev/hyperv/netvsc/if_hn.c
@@ -1,7575 +1,7574 @@
 /*-
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_hn.h"
 #include "opt_inet6.h"
 #include "opt_inet.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/counter.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/buf_ring.h>
 #include <sys/eventhandler.h>
 
 #include <machine/atomic.h>
 #include <machine/in_cksum.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/rndis.h>
 #ifdef RSS
 #include <net/rss_config.h>
 #endif
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/udp.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 
 #include <dev/hyperv/netvsc/ndis.h>
 #include <dev/hyperv/netvsc/if_hnreg.h>
 #include <dev/hyperv/netvsc/if_hnvar.h>
 #include <dev/hyperv/netvsc/hn_nvs.h>
 #include <dev/hyperv/netvsc/hn_rndis.h>
 
 #include "vmbus_if.h"
 
 #define HN_IFSTART_SUPPORT
 
 #define HN_RING_CNT_DEF_MAX		8
 
 #define HN_VFMAP_SIZE_DEF		8
 
 #define HN_XPNT_VF_ATTWAIT_MIN		2	/* seconds */
 
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT			512
 
 #define HN_RNDIS_PKT_LEN					\
 	(sizeof(struct rndis_packet_msg) +			\
 	 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) +	\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) +		\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +		\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
 #define HN_RNDIS_PKT_BOUNDARY		PAGE_SIZE
 #define HN_RNDIS_PKT_ALIGN		CACHE_LINE_SIZE
 
 #define HN_TX_DATA_BOUNDARY		PAGE_SIZE
 #define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
 #define HN_TX_DATA_SEGSIZE		PAGE_SIZE
 /* -1 for RNDIS packet message */
 #define HN_TX_DATA_SEGCNT_MAX		(HN_GPACNT_MAX - 1)
 
 #define HN_DIRECT_TX_SIZE_DEF		128
 
 #define HN_EARLY_TXEOF_THRESH		8
 
 #define HN_PKTBUF_LEN_DEF		(16 * 1024)
 
 #define HN_LROENT_CNT_DEF		128
 
 #define HN_LRO_LENLIM_MULTIRX_DEF	(12 * ETHERMTU)
 #define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
 #define HN_LRO_LENLIM_MIN(ifp)		(2 * (ifp)->if_mtu)
 
 #define HN_LRO_ACKCNT_DEF		1
 
 #define HN_LOCK_INIT(sc)		\
 	sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
 #define HN_LOCK_DESTROY(sc)		sx_destroy(&(sc)->hn_lock)
 #define HN_LOCK_ASSERT(sc)		sx_assert(&(sc)->hn_lock, SA_XLOCKED)
 #define HN_LOCK(sc)					\
 do {							\
 	while (sx_try_xlock(&(sc)->hn_lock) == 0)	\
 		DELAY(1000);				\
 } while (0)
 #define HN_UNLOCK(sc)			sx_xunlock(&(sc)->hn_lock)
 
 #define HN_CSUM_IP_MASK			(CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
 #define HN_CSUM_IP6_MASK		(CSUM_IP6_TCP | CSUM_IP6_UDP)
 #define HN_CSUM_IP_HWASSIST(sc)		\
 	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
 #define HN_CSUM_IP6_HWASSIST(sc)	\
 	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
 
 #define HN_PKTSIZE_MIN(align)		\
 	roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
 	    HN_RNDIS_PKT_LEN, (align))
 #define HN_PKTSIZE(m, align)		\
 	roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
 
 #ifdef RSS
 #define HN_RING_IDX2CPU(sc, idx)	rss_getcpu((idx) % rss_getnumbuckets())
 #else
 #define HN_RING_IDX2CPU(sc, idx)	(((sc)->hn_cpu + (idx)) % mp_ncpus)
 #endif
 
 struct hn_txdesc {
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_ENTRY(hn_txdesc)		link;
 #endif
 	STAILQ_ENTRY(hn_txdesc)		agg_link;
 
 	/* Aggregated txdescs, in sending order. */
 	STAILQ_HEAD(, hn_txdesc)	agg_list;
 
 	/* The oldest packet, if transmission aggregation happens. */
 	struct mbuf			*m;
 	struct hn_tx_ring		*txr;
 	int				refs;
 	uint32_t			flags;	/* HN_TXD_FLAG_ */
 	struct hn_nvs_sendctx		send_ctx;
 	uint32_t			chim_index;
 	int				chim_size;
 
 	bus_dmamap_t			data_dmap;
 
 	bus_addr_t			rndis_pkt_paddr;
 	struct rndis_packet_msg		*rndis_pkt;
 	bus_dmamap_t			rndis_pkt_dmap;
 };
 
 #define HN_TXD_FLAG_ONLIST		0x0001
 #define HN_TXD_FLAG_DMAMAP		0x0002
 #define HN_TXD_FLAG_ONAGG		0x0004
 
 struct hn_rxinfo {
 	uint32_t			vlan_info;
 	uint32_t			csum_info;
 	uint32_t			hash_info;
 	uint32_t			hash_value;
 };
 
 struct hn_rxvf_setarg {
 	struct hn_rx_ring	*rxr;
 	struct ifnet		*vf_ifp;
 };
 
 #define HN_RXINFO_VLAN			0x0001
 #define HN_RXINFO_CSUM			0x0002
 #define HN_RXINFO_HASHINF		0x0004
 #define HN_RXINFO_HASHVAL		0x0008
 #define HN_RXINFO_ALL			\
 	(HN_RXINFO_VLAN |		\
 	 HN_RXINFO_CSUM |		\
 	 HN_RXINFO_HASHINF |		\
 	 HN_RXINFO_HASHVAL)
 
 #define HN_NDIS_VLAN_INFO_INVALID	0xffffffff
 #define HN_NDIS_RXCSUM_INFO_INVALID	0
 #define HN_NDIS_HASH_INFO_INVALID	0
 
 static int			hn_probe(device_t);
 static int			hn_attach(device_t);
 static int			hn_detach(device_t);
 static int			hn_shutdown(device_t);
 static void			hn_chan_callback(struct vmbus_channel *,
 				    void *);
 
 static void			hn_init(void *);
 static int			hn_ioctl(struct ifnet *, u_long, caddr_t);
 #ifdef HN_IFSTART_SUPPORT
 static void			hn_start(struct ifnet *);
 #endif
 static int			hn_transmit(struct ifnet *, struct mbuf *);
 static void			hn_xmit_qflush(struct ifnet *);
 static int			hn_ifmedia_upd(struct ifnet *);
 static void			hn_ifmedia_sts(struct ifnet *,
 				    struct ifmediareq *);
 
 static void			hn_ifnet_event(void *, struct ifnet *, int);
 static void			hn_ifaddr_event(void *, struct ifnet *);
 static void			hn_ifnet_attevent(void *, struct ifnet *);
 static void			hn_ifnet_detevent(void *, struct ifnet *);
 static void			hn_ifnet_lnkevent(void *, struct ifnet *, int);
 
 static bool			hn_ismyvf(const struct hn_softc *,
 				    const struct ifnet *);
 static void			hn_rxvf_change(struct hn_softc *,
 				    struct ifnet *, bool);
 static void			hn_rxvf_set(struct hn_softc *, struct ifnet *);
 static void			hn_rxvf_set_task(void *, int);
 static void			hn_xpnt_vf_input(struct ifnet *, struct mbuf *);
 static int			hn_xpnt_vf_iocsetflags(struct hn_softc *);
 static int			hn_xpnt_vf_iocsetcaps(struct hn_softc *,
 				    struct ifreq *);
 static void			hn_xpnt_vf_saveifflags(struct hn_softc *);
 static bool			hn_xpnt_vf_isready(struct hn_softc *);
 static void			hn_xpnt_vf_setready(struct hn_softc *);
 static void			hn_xpnt_vf_init_taskfunc(void *, int);
 static void			hn_xpnt_vf_init(struct hn_softc *);
 static void			hn_xpnt_vf_setenable(struct hn_softc *);
 static void			hn_xpnt_vf_setdisable(struct hn_softc *, bool);
 static void			hn_vf_rss_fixup(struct hn_softc *, bool);
 static void			hn_vf_rss_restore(struct hn_softc *);
 
 static int			hn_rndis_rxinfo(const void *, int,
 				    struct hn_rxinfo *);
 static void			hn_rndis_rx_data(struct hn_rx_ring *,
 				    const void *, int);
 static void			hn_rndis_rx_status(struct hn_softc *,
 				    const void *, int);
 static void			hn_rndis_init_fixat(struct hn_softc *, int);
 
 static void			hn_nvs_handle_notify(struct hn_softc *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_handle_comp(struct hn_softc *,
 				    struct vmbus_channel *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_handle_rxbuf(struct hn_rx_ring *,
 				    struct vmbus_channel *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_ack_rxbuf(struct hn_rx_ring *,
 				    struct vmbus_channel *, uint64_t);
 
 #if __FreeBSD_version >= 1100099
 static int			hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int			hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
 #if __FreeBSD_version < 1100095
 static int			hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
 #else
 static int			hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int			hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
 #ifndef RSS
 static int			hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int			hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_vflist_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS);
 
 static void			hn_stop(struct hn_softc *, bool);
 static void			hn_init_locked(struct hn_softc *);
 static int			hn_chan_attach(struct hn_softc *,
 				    struct vmbus_channel *);
 static void			hn_chan_detach(struct hn_softc *,
 				    struct vmbus_channel *);
 static int			hn_attach_subchans(struct hn_softc *);
 static void			hn_detach_allchans(struct hn_softc *);
 static void			hn_chan_rollup(struct hn_rx_ring *,
 				    struct hn_tx_ring *);
 static void			hn_set_ring_inuse(struct hn_softc *, int);
 static int			hn_synth_attach(struct hn_softc *, int);
 static void			hn_synth_detach(struct hn_softc *);
 static int			hn_synth_alloc_subchans(struct hn_softc *,
 				    int *);
 static bool			hn_synth_attachable(const struct hn_softc *);
 static void			hn_suspend(struct hn_softc *);
 static void			hn_suspend_data(struct hn_softc *);
 static void			hn_suspend_mgmt(struct hn_softc *);
 static void			hn_resume(struct hn_softc *);
 static void			hn_resume_data(struct hn_softc *);
 static void			hn_resume_mgmt(struct hn_softc *);
 static void			hn_suspend_mgmt_taskfunc(void *, int);
 static void			hn_chan_drain(struct hn_softc *,
 				    struct vmbus_channel *);
 static void			hn_disable_rx(struct hn_softc *);
 static void			hn_drain_rxtx(struct hn_softc *, int);
 static void			hn_polling(struct hn_softc *, u_int);
 static void			hn_chan_polling(struct vmbus_channel *, u_int);
 static void			hn_mtu_change_fixup(struct hn_softc *);
 
 static void			hn_update_link_status(struct hn_softc *);
 static void			hn_change_network(struct hn_softc *);
 static void			hn_link_taskfunc(void *, int);
 static void			hn_netchg_init_taskfunc(void *, int);
 static void			hn_netchg_status_taskfunc(void *, int);
 static void			hn_link_status(struct hn_softc *);
 
 static int			hn_create_rx_data(struct hn_softc *, int);
 static void			hn_destroy_rx_data(struct hn_softc *);
 static int			hn_check_iplen(const struct mbuf *, int);
 static void			hn_rxpkt_proto(const struct mbuf *, int *, int *);
 static int			hn_set_rxfilter(struct hn_softc *, uint32_t);
 static int			hn_rxfilter_config(struct hn_softc *);
 static int			hn_rss_reconfig(struct hn_softc *);
 static void			hn_rss_ind_fixup(struct hn_softc *);
 static void			hn_rss_mbuf_hash(struct hn_softc *, uint32_t);
 static int			hn_rxpkt(struct hn_rx_ring *, const void *,
 				    int, const struct hn_rxinfo *);
 static uint32_t			hn_rss_type_fromndis(uint32_t);
 static uint32_t			hn_rss_type_tondis(uint32_t);
 
 static int			hn_tx_ring_create(struct hn_softc *, int);
 static void			hn_tx_ring_destroy(struct hn_tx_ring *);
 static int			hn_create_tx_data(struct hn_softc *, int);
 static void			hn_fixup_tx_data(struct hn_softc *);
 static void			hn_fixup_rx_data(struct hn_softc *);
 static void			hn_destroy_tx_data(struct hn_softc *);
 static void			hn_txdesc_dmamap_destroy(struct hn_txdesc *);
 static void			hn_txdesc_gc(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_encap(struct ifnet *, struct hn_tx_ring *,
 				    struct hn_txdesc *, struct mbuf **);
 static int			hn_txpkt(struct ifnet *, struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static void			hn_set_chim_size(struct hn_softc *, int);
 static void			hn_set_tso_maxsize(struct hn_softc *, int, int);
 static bool			hn_tx_ring_pending(struct hn_tx_ring *);
 static void			hn_tx_ring_qflush(struct hn_tx_ring *);
 static void			hn_resume_tx(struct hn_softc *, int);
 static void			hn_set_txagg(struct hn_softc *);
 static void			*hn_try_txagg(struct ifnet *,
 				    struct hn_tx_ring *, struct hn_txdesc *,
 				    int);
 static int			hn_get_txswq_depth(const struct hn_tx_ring *);
 static void			hn_txpkt_done(struct hn_nvs_sendctx *,
 				    struct hn_softc *, struct vmbus_channel *,
 				    const void *, int);
 static int			hn_txpkt_sglist(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_txpkt_chim(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_xmit(struct hn_tx_ring *, int);
 static void			hn_xmit_taskfunc(void *, int);
 static void			hn_xmit_txeof(struct hn_tx_ring *);
 static void			hn_xmit_txeof_taskfunc(void *, int);
 #ifdef HN_IFSTART_SUPPORT
 static int			hn_start_locked(struct hn_tx_ring *, int);
 static void			hn_start_taskfunc(void *, int);
 static void			hn_start_txeof(struct hn_tx_ring *);
 static void			hn_start_txeof_taskfunc(void *, int);
 #endif
 
 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "Hyper-V network interface");
 
 /* Trust tcp segements verification on host side. */
 static int			hn_trust_hosttcp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
     &hn_trust_hosttcp, 0,
     "Trust tcp segement verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust udp datagrams verification on host side. */
 static int			hn_trust_hostudp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
     &hn_trust_hostudp, 0,
     "Trust udp datagram verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust ip packets verification on host side. */
 static int			hn_trust_hostip = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
     &hn_trust_hostip, 0,
     "Trust ip packet verification on host side, "
     "when csum info is missing (global setting)");
 
 /*
  * Offload UDP/IPv4 checksum.
  */
 static int			hn_enable_udp4cs = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN,
     &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum");
 
 /*
  * Offload UDP/IPv6 checksum.
  */
 static int			hn_enable_udp6cs = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN,
     &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum");
 
 /* Stats. */
 static counter_u64_t		hn_udpcs_fixup;
 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW,
     &hn_udpcs_fixup, "# of UDP checksum fixup");
 
 /*
  * See hn_set_hlen().
  *
  * This value is for Azure.  For Hyper-V, set this above
  * 65536 to disable UDP datagram checksum fixup.
  */
 static int			hn_udpcs_fixup_mtu = 1420;
 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN,
     &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold");
 
 /* Limit TSO burst size */
 static int			hn_tso_maxlen = IP_MAXPACKET;
 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
     &hn_tso_maxlen, 0, "TSO burst limit");
 
 /* Limit chimney send size */
 static int			hn_tx_chimney_size = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
     &hn_tx_chimney_size, 0, "Chimney send packet size limit");
 
 /* Limit the size of packet for direct transmission */
 static int			hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
     &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
 
 /* # of LRO entries per RX ring */
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 static int			hn_lro_entry_count = HN_LROENT_CNT_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
     &hn_lro_entry_count, 0, "LRO entry count");
 #endif
 #endif
 
 static int			hn_tx_taskq_cnt = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
     &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
 
 #define HN_TX_TASKQ_M_INDEP	0
 #define HN_TX_TASKQ_M_GLOBAL	1
 #define HN_TX_TASKQ_M_EVTTQ	2
 
 static int			hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
     &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
     "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
 
 #ifndef HN_USE_TXDESC_BUFRING
 static int			hn_use_txdesc_bufring = 0;
 #else
 static int			hn_use_txdesc_bufring = 1;
 #endif
 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
     &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
 
 #ifdef HN_IFSTART_SUPPORT
 /* Use ifnet.if_start instead of ifnet.if_transmit */
 static int			hn_use_if_start = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
     &hn_use_if_start, 0, "Use if_start TX method");
 #endif
 
 /* # of channels to use */
 static int			hn_chan_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
     &hn_chan_cnt, 0,
     "# of channels to use; each channel has one RX ring and one TX ring");
 
 /* # of transmit rings to use */
 static int			hn_tx_ring_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
     &hn_tx_ring_cnt, 0, "# of TX rings to use");
 
 /* Software TX ring deptch */
 static int			hn_tx_swq_depth = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
     &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
 
 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
 #if __FreeBSD_version >= 1100095
 static u_int			hn_lro_mbufq_depth = 0;
 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
     &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
 #endif
 
 /* Packet transmission aggregation size limit */
 static int			hn_tx_agg_size = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
     &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
 
 /* Packet transmission aggregation count limit */
 static int			hn_tx_agg_pkts = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
     &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
 
 /* VF list */
 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING,
     0, 0, hn_vflist_sysctl, "A", "VF list");
 
 /* VF mapping */
 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING,
     0, 0, hn_vfmap_sysctl, "A", "VF mapping");
 
 /* Transparent VF */
 static int			hn_xpnt_vf = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN,
     &hn_xpnt_vf, 0, "Transparent VF mod");
 
 /* Accurate BPF support for Transparent VF */
 static int			hn_xpnt_vf_accbpf = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN,
     &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF");
 
 /* Extra wait for transparent VF attach routing; unit seconds. */
 static int			hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN,
     &hn_xpnt_vf_attwait, 0,
     "Extra wait for transparent VF attach routing; unit: seconds");
 
 static u_int			hn_cpu_index;	/* next CPU for channel */
 static struct taskqueue		**hn_tx_taskque;/* shared TX taskqueues */
 
 static struct rmlock		hn_vfmap_lock;
 static int			hn_vfmap_size;
 static struct ifnet		**hn_vfmap;
 
 #ifndef RSS
 static const uint8_t
 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
 #endif	/* !RSS */
 
 static const struct hyperv_guid	hn_guid = {
 	.hv_guid = {
 	    0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
 	    0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
 };
 
 static device_method_t hn_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		hn_probe),
 	DEVMETHOD(device_attach,	hn_attach),
 	DEVMETHOD(device_detach,	hn_detach),
 	DEVMETHOD(device_shutdown,	hn_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t hn_driver = {
 	"hn",
 	hn_methods,
 	sizeof(struct hn_softc)
 };
 
 static devclass_t hn_devclass;
 
 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
 MODULE_VERSION(hn, 1);
 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
 
 #if __FreeBSD_version >= 1100099
 static void
 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
 }
 #endif
 
 static int
 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
 	    txd->chim_size == 0, ("invalid rndis sglist txd"));
 	return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
 	    &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
 }
 
 static int
 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	struct hn_nvs_rndis rndis;
 
 	KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
 	    txd->chim_size > 0, ("invalid rndis chim txd"));
 
 	rndis.nvs_type = HN_NVS_TYPE_RNDIS;
 	rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
 	rndis.nvs_chim_idx = txd->chim_index;
 	rndis.nvs_chim_sz = txd->chim_size;
 
 	return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
 	    &rndis, sizeof(rndis), &txd->send_ctx));
 }
 
 static __inline uint32_t
 hn_chim_alloc(struct hn_softc *sc)
 {
 	int i, bmap_cnt = sc->hn_chim_bmap_cnt;
 	u_long *bmap = sc->hn_chim_bmap;
 	uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
 
 	for (i = 0; i < bmap_cnt; ++i) {
 		int idx;
 
 		idx = ffsl(~bmap[i]);
 		if (idx == 0)
 			continue;
 
 		--idx; /* ffsl is 1-based */
 		KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
 		    ("invalid i %d and idx %d", i, idx));
 
 		if (atomic_testandset_long(&bmap[i], idx))
 			continue;
 
 		ret = i * LONG_BIT + idx;
 		break;
 	}
 	return (ret);
 }
 
 static __inline void
 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
 {
 	u_long mask;
 	uint32_t idx;
 
 	idx = chim_idx / LONG_BIT;
 	KASSERT(idx < sc->hn_chim_bmap_cnt,
 	    ("invalid chimney index 0x%x", chim_idx));
 
 	mask = 1UL << (chim_idx % LONG_BIT);
 	KASSERT(sc->hn_chim_bmap[idx] & mask,
 	    ("index bitmap 0x%lx, chimney index %u, "
 	     "bitmap idx %d, bitmask 0x%lx",
 	     sc->hn_chim_bmap[idx], chim_idx, idx, mask));
 
 	atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
 }
 
 #if defined(INET6) || defined(INET)
 
 #define PULLUP_HDR(m, len)				\
 do {							\
 	if (__predict_false((m)->m_len < (len))) {	\
 		(m) = m_pullup((m), (len));		\
 		if ((m) == NULL)			\
 			return (NULL);			\
 	}						\
 } while (0)
 
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
 static __inline struct mbuf *
 hn_tso_fixup(struct mbuf *m_head)
 {
 	struct ether_vlan_header *evl;
 	struct tcphdr *th;
 	int ehlen;
 
 	KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
 
 	PULLUP_HDR(m_head, sizeof(*evl));
 	evl = mtod(m_head, struct ether_vlan_header *);
 	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
 		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	else
 		ehlen = ETHER_HDR_LEN;
 	m_head->m_pkthdr.l2hlen = ehlen;
 
 #ifdef INET
 	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 		struct ip *ip;
 		int iphlen;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
 		ip = mtodo(m_head, ehlen);
 		iphlen = ip->ip_hl << 2;
 		m_head->m_pkthdr.l3hlen = iphlen;
 
 		PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
 		th = mtodo(m_head, ehlen + iphlen);
 
 		ip->ip_len = 0;
 		ip->ip_sum = 0;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET6
 	{
 		struct ip6_hdr *ip6;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
 		ip6 = mtodo(m_head, ehlen);
 		if (ip6->ip6_nxt != IPPROTO_TCP) {
 			m_freem(m_head);
 			return (NULL);
 		}
 		m_head->m_pkthdr.l3hlen = sizeof(*ip6);
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
 		th = mtodo(m_head, ehlen + sizeof(*ip6));
 
 		ip6->ip6_plen = 0;
 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 	}
 #endif
 	return (m_head);
 }
 
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
 static __inline struct mbuf *
 hn_set_hlen(struct mbuf *m_head)
 {
 	const struct ether_vlan_header *evl;
 	int ehlen;
 
 	PULLUP_HDR(m_head, sizeof(*evl));
 	evl = mtod(m_head, const struct ether_vlan_header *);
 	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
 		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	else
 		ehlen = ETHER_HDR_LEN;
 	m_head->m_pkthdr.l2hlen = ehlen;
 
 #ifdef INET
 	if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) {
 		const struct ip *ip;
 		int iphlen;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
 		ip = mtodo(m_head, ehlen);
 		iphlen = ip->ip_hl << 2;
 		m_head->m_pkthdr.l3hlen = iphlen;
 
 		/*
 		 * UDP checksum offload does not work in Azure, if the
 		 * following conditions meet:
 		 * - sizeof(IP hdr + UDP hdr + payload) > 1420.
 		 * - IP_DF is not set in the IP hdr.
 		 *
 		 * Fallback to software checksum for these UDP datagrams.
 		 */
 		if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) &&
 		    m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen &&
 		    (ntohs(ip->ip_off) & IP_DF) == 0) {
 			uint16_t off = ehlen + iphlen;
 
 			counter_u64_add(hn_udpcs_fixup, 1);
 			PULLUP_HDR(m_head, off + sizeof(struct udphdr));
 			*(uint16_t *)(m_head->m_data + off +
                             m_head->m_pkthdr.csum_data) = in_cksum_skip(
 			    m_head, m_head->m_pkthdr.len, off);
 			m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP;
 		}
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET6
 	{
 		const struct ip6_hdr *ip6;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
 		ip6 = mtodo(m_head, ehlen);
 		if (ip6->ip6_nxt != IPPROTO_TCP &&
 		    ip6->ip6_nxt != IPPROTO_UDP) {
 			m_freem(m_head);
 			return (NULL);
 		}
 		m_head->m_pkthdr.l3hlen = sizeof(*ip6);
 	}
 #endif
 	return (m_head);
 }
 
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
 static __inline struct mbuf *
 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn)
 {
 	const struct tcphdr *th;
 	int ehlen, iphlen;
 
 	*tcpsyn = 0;
 	ehlen = m_head->m_pkthdr.l2hlen;
 	iphlen = m_head->m_pkthdr.l3hlen;
 
 	PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
 	th = mtodo(m_head, ehlen + iphlen);
 	if (th->th_flags & TH_SYN)
 		*tcpsyn = 1;
 	return (m_head);
 }
 
 #undef PULLUP_HDR
 
 #endif	/* INET6 || INET */
 
 static int
 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
 {
 	int error = 0;
 
 	HN_LOCK_ASSERT(sc);
 
 	if (sc->hn_rx_filter != filter) {
 		error = hn_rndis_set_rxfilter(sc, filter);
 		if (!error)
 			sc->hn_rx_filter = filter;
 	}
 	return (error);
 }
 
 static int
 hn_rxfilter_config(struct hn_softc *sc)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	uint32_t filter;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * If the non-transparent mode VF is activated, we don't know how
 	 * its RX filter is configured, so stick the synthetic device in
 	 * the promiscous mode.
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) {
 		filter = NDIS_PACKET_TYPE_PROMISCUOUS;
 	} else {
 		filter = NDIS_PACKET_TYPE_DIRECTED;
 		if (ifp->if_flags & IFF_BROADCAST)
 			filter |= NDIS_PACKET_TYPE_BROADCAST;
 		/* TODO: support multicast list */
 		if ((ifp->if_flags & IFF_ALLMULTI) ||
 		    !CK_STAILQ_EMPTY(&ifp->if_multiaddrs))
 			filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 	}
 	return (hn_set_rxfilter(sc, filter));
 }
 
 static void
 hn_set_txagg(struct hn_softc *sc)
 {
 	uint32_t size, pkts;
 	int i;
 
 	/*
 	 * Setup aggregation size.
 	 */
 	if (sc->hn_agg_size < 0)
 		size = UINT32_MAX;
 	else
 		size = sc->hn_agg_size;
 
 	if (sc->hn_rndis_agg_size < size)
 		size = sc->hn_rndis_agg_size;
 
 	/* NOTE: We only aggregate packets using chimney sending buffers. */
 	if (size > (uint32_t)sc->hn_chim_szmax)
 		size = sc->hn_chim_szmax;
 
 	if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 		goto done;
 	}
 
 	/* NOTE: Type of the per TX ring setting is 'int'. */
 	if (size > INT_MAX)
 		size = INT_MAX;
 
 	/*
 	 * Setup aggregation packet count.
 	 */
 	if (sc->hn_agg_pkts < 0)
 		pkts = UINT32_MAX;
 	else
 		pkts = sc->hn_agg_pkts;
 
 	if (sc->hn_rndis_agg_pkts < pkts)
 		pkts = sc->hn_rndis_agg_pkts;
 
 	if (pkts <= 1) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 		goto done;
 	}
 
 	/* NOTE: Type of the per TX ring setting is 'short'. */
 	if (pkts > SHRT_MAX)
 		pkts = SHRT_MAX;
 
 done:
 	/* NOTE: Type of the per TX ring setting is 'short'. */
 	if (sc->hn_rndis_agg_align > SHRT_MAX) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 	}
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
 		    size, pkts, sc->hn_rndis_agg_align);
 	}
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_agg_szmax = size;
 		txr->hn_agg_pktmax = pkts;
 		txr->hn_agg_align = sc->hn_rndis_agg_align;
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 }
 
 static int
 hn_get_txswq_depth(const struct hn_tx_ring *txr)
 {
 
 	KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
 	if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
 		return txr->hn_txdesc_cnt;
 	return hn_tx_swq_depth;
 }
 
 static int
 hn_rss_reconfig(struct hn_softc *sc)
 {
 	int error;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		return (ENXIO);
 
 	/*
 	 * Disable RSS first.
 	 *
 	 * NOTE:
 	 * Direct reconfiguration by setting the UNCHG flags does
 	 * _not_ work properly.
 	 */
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "disable RSS\n");
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
 	if (error) {
 		if_printf(sc->hn_ifp, "RSS disable failed\n");
 		return (error);
 	}
 
 	/*
 	 * Reenable the RSS w/ the updated RSS key or indirect
 	 * table.
 	 */
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "reconfig RSS\n");
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
 	if (error) {
 		if_printf(sc->hn_ifp, "RSS reconfig failed\n");
 		return (error);
 	}
 	return (0);
 }
 
 static void
 hn_rss_ind_fixup(struct hn_softc *sc)
 {
 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
 	int i, nchan;
 
 	nchan = sc->hn_rx_ring_inuse;
 	KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
 
 	/*
 	 * Check indirect table to make sure that all channels in it
 	 * can be used.
 	 */
 	for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
 		if (rss->rss_ind[i] >= nchan) {
 			if_printf(sc->hn_ifp,
 			    "RSS indirect table %d fixup: %u -> %d\n",
 			    i, rss->rss_ind[i], nchan - 1);
 			rss->rss_ind[i] = nchan - 1;
 		}
 	}
 }
 
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct hn_softc *sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
 		ifmr->ifm_active |= IFM_NONE;
 		return;
 	}
 	ifmr->ifm_status |= IFM_ACTIVE;
 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 }
 
 static void
 hn_rxvf_set_task(void *xarg, int pending __unused)
 {
 	struct hn_rxvf_setarg *arg = xarg;
 
 	arg->rxr->hn_rxvf_ifp = arg->vf_ifp;
 }
 
 static void
 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp)
 {
 	struct hn_rx_ring *rxr;
 	struct hn_rxvf_setarg arg;
 	struct task task;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	TASK_INIT(&task, 0, hn_rxvf_set_task, &arg);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 
 		if (i < sc->hn_rx_ring_inuse) {
 			arg.rxr = rxr;
 			arg.vf_ifp = vf_ifp;
 			vmbus_chan_run_task(rxr->hn_chan, &task);
 		} else {
 			rxr->hn_rxvf_ifp = vf_ifp;
 		}
 	}
 }
 
 static bool
 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp)
 {
 	const struct ifnet *hn_ifp;
 
 	hn_ifp = sc->hn_ifp;
 
 	if (ifp == hn_ifp)
 		return (false);
 
 	if (ifp->if_alloctype != IFT_ETHER)
 		return (false);
 
 	/* Ignore lagg/vlan interfaces */
 	if (strcmp(ifp->if_dname, "lagg") == 0 ||
 	    strcmp(ifp->if_dname, "vlan") == 0)
 		return (false);
 
 	/*
 	 * During detach events ifp->if_addr might be NULL.
 	 * Make sure the bcmp() below doesn't panic on that:
 	 */
 	if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL)
 		return (false);
 
 	if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
 		return (false);
 
 	return (true);
 }
 
 static void
 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf)
 {
 	struct ifnet *hn_ifp;
 
 	HN_LOCK(sc);
 
 	if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
 		goto out;
 
 	if (!hn_ismyvf(sc, ifp))
 		goto out;
 	hn_ifp = sc->hn_ifp;
 
 	if (rxvf) {
 		if (sc->hn_flags & HN_FLAG_RXVF)
 			goto out;
 
 		sc->hn_flags |= HN_FLAG_RXVF;
 		hn_rxfilter_config(sc);
 	} else {
 		if (!(sc->hn_flags & HN_FLAG_RXVF))
 			goto out;
 
 		sc->hn_flags &= ~HN_FLAG_RXVF;
 		if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
 			hn_rxfilter_config(sc);
 		else
 			hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
 	}
 
 	hn_nvs_set_datapath(sc,
 	    rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH);
 
 	hn_rxvf_set(sc, rxvf ? ifp : NULL);
 
 	if (rxvf) {
 		hn_vf_rss_fixup(sc, true);
 		hn_suspend_mgmt(sc);
 		sc->hn_link_flags &=
 		    ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
 		if_link_state_change(hn_ifp, LINK_STATE_DOWN);
 	} else {
 		hn_vf_rss_restore(sc);
 		hn_resume_mgmt(sc);
 	}
 
 	devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname,
 	    rxvf ? "VF_UP" : "VF_DOWN", NULL);
 
 	if (bootverbose) {
 		if_printf(hn_ifp, "datapath is switched %s %s\n",
 		    rxvf ? "to" : "from", ifp->if_xname);
 	}
 out:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
 {
 
 	if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
 		return;
 	hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP);
 }
 
 static void
 hn_ifaddr_event(void *arg, struct ifnet *ifp)
 {
 
 	hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP);
 }
 
 static int
 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr)
 {
 	struct ifnet *ifp, *vf_ifp;
 	uint64_t tmp;
 	int error;
 
 	HN_LOCK_ASSERT(sc);
 	ifp = sc->hn_ifp;
 	vf_ifp = sc->hn_vf_ifp;
 
 	/*
 	 * Fix up requested capabilities w/ supported capabilities,
 	 * since the supported capabilities could have been changed.
 	 */
 	ifr->ifr_reqcap &= ifp->if_capabilities;
 	/* Pass SIOCSIFCAP to VF. */
 	error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr);
 
 	/*
 	 * NOTE:
 	 * The error will be propagated to the callers, however, it
 	 * is _not_ useful here.
 	 */
 
 	/*
 	 * Merge VF's enabled capabilities.
 	 */
 	ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities;
 
 	tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc);
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= tmp;
 	else
 		ifp->if_hwassist &= ~tmp;
 
 	tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc);
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		ifp->if_hwassist |= tmp;
 	else
 		ifp->if_hwassist &= ~tmp;
 
 	tmp = vf_ifp->if_hwassist & CSUM_IP_TSO;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= tmp;
 	else
 		ifp->if_hwassist &= ~tmp;
 
 	tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO;
 	if (ifp->if_capenable & IFCAP_TSO6)
 		ifp->if_hwassist |= tmp;
 	else
 		ifp->if_hwassist &= ~tmp;
 
 	return (error);
 }
 
 static int
 hn_xpnt_vf_iocsetflags(struct hn_softc *sc)
 {
 	struct ifnet *vf_ifp;
 	struct ifreq ifr;
 
 	HN_LOCK_ASSERT(sc);
 	vf_ifp = sc->hn_vf_ifp;
 
 	memset(&ifr, 0, sizeof(ifr));
 	strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
 	ifr.ifr_flags = vf_ifp->if_flags & 0xffff;
 	ifr.ifr_flagshigh = vf_ifp->if_flags >> 16;
 	return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
 }
 
 static void
 hn_xpnt_vf_saveifflags(struct hn_softc *sc)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	int allmulti = 0;
 
 	HN_LOCK_ASSERT(sc);
 
 	/* XXX vlan(4) style mcast addr maintenance */
 	if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs))
 		allmulti = IFF_ALLMULTI;
 
 	/* Always set the VF's if_flags */
 	sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti;
 }
 
 static void
 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m)
 {
 	struct rm_priotracker pt;
 	struct ifnet *hn_ifp = NULL;
 	struct mbuf *mn;
 
 	/*
 	 * XXX racy, if hn(4) ever detached.
 	 */
 	rm_rlock(&hn_vfmap_lock, &pt);
 	if (vf_ifp->if_index < hn_vfmap_size)
 		hn_ifp = hn_vfmap[vf_ifp->if_index];
 	rm_runlock(&hn_vfmap_lock, &pt);
 
 	if (hn_ifp != NULL) {
 		for (mn = m; mn != NULL; mn = mn->m_nextpkt) {
 			/*
 			 * Allow tapping on the VF.
 			 */
 			ETHER_BPF_MTAP(vf_ifp, mn);
 
 			/*
 			 * Update VF stats.
 			 */
 			if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) {
 				if_inc_counter(vf_ifp, IFCOUNTER_IBYTES,
 				    mn->m_pkthdr.len);
 			}
 			/*
 			 * XXX IFCOUNTER_IMCAST
 			 * This stat updating is kinda invasive, since it
 			 * requires two checks on the mbuf: the length check
 			 * and the ethernet header check.  As of this write,
 			 * all multicast packets go directly to hn(4), which
 			 * makes imcast stat updating in the VF a try in vian.
 			 */
 
 			/*
 			 * Fix up rcvif and increase hn(4)'s ipackets.
 			 */
 			mn->m_pkthdr.rcvif = hn_ifp;
 			if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
 		}
 		/*
 		 * Go through hn(4)'s if_input.
 		 */
 		hn_ifp->if_input(hn_ifp, m);
 	} else {
 		/*
 		 * In the middle of the transition; free this
 		 * mbuf chain.
 		 */
 		while (m != NULL) {
 			mn = m->m_nextpkt;
 			m->m_nextpkt = NULL;
 			m_freem(m);
 			m = mn;
 		}
 	}
 }
 
 static void
 hn_mtu_change_fixup(struct hn_softc *sc)
 {
 	struct ifnet *ifp;
 
 	HN_LOCK_ASSERT(sc);
 	ifp = sc->hn_ifp;
 
 	hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
 #if __FreeBSD_version >= 1100099
 	if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp))
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
 #endif
 }
 
 static uint32_t
 hn_rss_type_fromndis(uint32_t rss_hash)
 {
 	uint32_t types = 0;
 
 	if (rss_hash & NDIS_HASH_IPV4)
 		types |= RSS_TYPE_IPV4;
 	if (rss_hash & NDIS_HASH_TCP_IPV4)
 		types |= RSS_TYPE_TCP_IPV4;
 	if (rss_hash & NDIS_HASH_IPV6)
 		types |= RSS_TYPE_IPV6;
 	if (rss_hash & NDIS_HASH_IPV6_EX)
 		types |= RSS_TYPE_IPV6_EX;
 	if (rss_hash & NDIS_HASH_TCP_IPV6)
 		types |= RSS_TYPE_TCP_IPV6;
 	if (rss_hash & NDIS_HASH_TCP_IPV6_EX)
 		types |= RSS_TYPE_TCP_IPV6_EX;
 	if (rss_hash & NDIS_HASH_UDP_IPV4_X)
 		types |= RSS_TYPE_UDP_IPV4;
 	return (types);
 }
 
 static uint32_t
 hn_rss_type_tondis(uint32_t types)
 {
 	uint32_t rss_hash = 0;
 
 	KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0,
 	    ("UDP6 and UDP6EX are not supported"));
 
 	if (types & RSS_TYPE_IPV4)
 		rss_hash |= NDIS_HASH_IPV4;
 	if (types & RSS_TYPE_TCP_IPV4)
 		rss_hash |= NDIS_HASH_TCP_IPV4;
 	if (types & RSS_TYPE_IPV6)
 		rss_hash |= NDIS_HASH_IPV6;
 	if (types & RSS_TYPE_IPV6_EX)
 		rss_hash |= NDIS_HASH_IPV6_EX;
 	if (types & RSS_TYPE_TCP_IPV6)
 		rss_hash |= NDIS_HASH_TCP_IPV6;
 	if (types & RSS_TYPE_TCP_IPV6_EX)
 		rss_hash |= NDIS_HASH_TCP_IPV6_EX;
 	if (types & RSS_TYPE_UDP_IPV4)
 		rss_hash |= NDIS_HASH_UDP_IPV4_X;
 	return (rss_hash);
 }
 
 static void
 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash;
 }
 
 static void
 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf)
 {
 	struct ifnet *ifp, *vf_ifp;
 	struct ifrsshash ifrh;
 	struct ifrsskey ifrk;
 	int error;
 	uint32_t my_types, diff_types, mbuf_types = 0;
 
 	HN_LOCK_ASSERT(sc);
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname));
 
 	if (sc->hn_rx_ring_inuse == 1) {
 		/* No RSS on synthetic parts; done. */
 		return;
 	}
 	if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) {
 		/* Synthetic parts do not support Toeplitz; done. */
 		return;
 	}
 
 	ifp = sc->hn_ifp;
 	vf_ifp = sc->hn_vf_ifp;
 
 	/*
 	 * Extract VF's RSS key.  Only 40 bytes key for Toeplitz is
 	 * supported.
 	 */
 	memset(&ifrk, 0, sizeof(ifrk));
 	strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name));
 	error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk);
 	if (error) {
 		if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n",
 		    vf_ifp->if_xname, error);
 		goto done;
 	}
 	if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) {
 		if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
 		    vf_ifp->if_xname, ifrk.ifrk_func);
 		goto done;
 	}
 	if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) {
 		if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n",
 		    vf_ifp->if_xname, ifrk.ifrk_keylen);
 		goto done;
 	}
 
 	/*
 	 * Extract VF's RSS hash.  Only Toeplitz is supported.
 	 */
 	memset(&ifrh, 0, sizeof(ifrh));
 	strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name));
 	error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh);
 	if (error) {
 		if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n",
 		    vf_ifp->if_xname, error);
 		goto done;
 	}
 	if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) {
 		if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
 		    vf_ifp->if_xname, ifrh.ifrh_func);
 		goto done;
 	}
 
 	my_types = hn_rss_type_fromndis(sc->hn_rss_hcap);
 	if ((ifrh.ifrh_types & my_types) == 0) {
 		/* This disables RSS; ignore it then */
 		if_printf(ifp, "%s intersection of RSS types failed.  "
 		    "VF %#x, mine %#x\n", vf_ifp->if_xname,
 		    ifrh.ifrh_types, my_types);
 		goto done;
 	}
 
 	diff_types = my_types ^ ifrh.ifrh_types;
 	my_types &= ifrh.ifrh_types;
 	mbuf_types = my_types;
 
 	/*
 	 * Detect RSS hash value/type confliction.
 	 *
 	 * NOTE:
 	 * We don't disable the hash type, but stop delivery the hash
 	 * value/type through mbufs on RX path.
 	 *
 	 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple
 	 * hash is delivered with type of TCP_IPV4.  This means if
 	 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at
 	 * least to hn_mbuf_hash.  However, given that _all_ of the
 	 * NICs implement TCP_IPV4, this will _not_ impose any issues
 	 * here.
 	 */
 	if ((my_types & RSS_TYPE_IPV4) &&
 	    (diff_types & ifrh.ifrh_types &
 	     (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) {
 		/* Conflict; disable IPV4 hash type/value delivery. */
 		if_printf(ifp, "disable IPV4 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_IPV4;
 	}
 	if ((my_types & RSS_TYPE_IPV6) &&
 	    (diff_types & ifrh.ifrh_types &
 	     (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
 	      RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
 	      RSS_TYPE_IPV6_EX))) {
 		/* Conflict; disable IPV6 hash type/value delivery. */
 		if_printf(ifp, "disable IPV6 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_IPV6;
 	}
 	if ((my_types & RSS_TYPE_IPV6_EX) &&
 	    (diff_types & ifrh.ifrh_types &
 	     (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
 	      RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
 	      RSS_TYPE_IPV6))) {
 		/* Conflict; disable IPV6_EX hash type/value delivery. */
 		if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_IPV6_EX;
 	}
 	if ((my_types & RSS_TYPE_TCP_IPV6) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) {
 		/* Conflict; disable TCP_IPV6 hash type/value delivery. */
 		if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_TCP_IPV6;
 	}
 	if ((my_types & RSS_TYPE_TCP_IPV6_EX) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) {
 		/* Conflict; disable TCP_IPV6_EX hash type/value delivery. */
 		if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX;
 	}
 	if ((my_types & RSS_TYPE_UDP_IPV6) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) {
 		/* Conflict; disable UDP_IPV6 hash type/value delivery. */
 		if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_UDP_IPV6;
 	}
 	if ((my_types & RSS_TYPE_UDP_IPV6_EX) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) {
 		/* Conflict; disable UDP_IPV6_EX hash type/value delivery. */
 		if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX;
 	}
 
 	/*
 	 * Indirect table does not matter.
 	 */
 
 	sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) |
 	    hn_rss_type_tondis(my_types);
 	memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key));
 	sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 
 	if (reconf) {
 		error = hn_rss_reconfig(sc);
 		if (error) {
 			/* XXX roll-back? */
 			if_printf(ifp, "hn_rss_reconfig failed: %d\n", error);
 			/* XXX keep going. */
 		}
 	}
 done:
 	/* Hash deliverability for mbufs. */
 	hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types));
 }
 
 static void
 hn_vf_rss_restore(struct hn_softc *sc)
 {
 
 	HN_LOCK_ASSERT(sc);
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname));
 
 	if (sc->hn_rx_ring_inuse == 1)
 		goto done;
 
 	/*
 	 * Restore hash types.  Key does _not_ matter.
 	 */
 	if (sc->hn_rss_hash != sc->hn_rss_hcap) {
 		int error;
 
 		sc->hn_rss_hash = sc->hn_rss_hcap;
 		error = hn_rss_reconfig(sc);
 		if (error) {
 			if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n",
 			    error);
 			/* XXX keep going. */
 		}
 	}
 done:
 	/* Hash deliverability for mbufs. */
 	hn_rss_mbuf_hash(sc, NDIS_HASH_ALL);
 }
 
 static void
 hn_xpnt_vf_setready(struct hn_softc *sc)
 {
 	struct ifnet *ifp, *vf_ifp;
 	struct ifreq ifr;
 
 	HN_LOCK_ASSERT(sc);
 	ifp = sc->hn_ifp;
 	vf_ifp = sc->hn_vf_ifp;
 
 	/*
 	 * Mark the VF ready.
 	 */
 	sc->hn_vf_rdytick = 0;
 
 	/*
 	 * Save information for restoration.
 	 */
 	sc->hn_saved_caps = ifp->if_capabilities;
 	sc->hn_saved_tsomax = ifp->if_hw_tsomax;
 	sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount;
 	sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize;
 
 	/*
 	 * Intersect supported/enabled capabilities.
 	 *
 	 * NOTE:
 	 * if_hwassist is not changed here.
 	 */
 	ifp->if_capabilities &= vf_ifp->if_capabilities;
 	ifp->if_capenable &= ifp->if_capabilities;
 
 	/*
 	 * Fix TSO settings.
 	 */
 	if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax)
 		ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax;
 	if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount)
 		ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount;
 	if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize)
 		ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize;
 
 	/*
 	 * Change VF's enabled capabilities.
 	 */
 	memset(&ifr, 0, sizeof(ifr));
 	strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
 	ifr.ifr_reqcap = ifp->if_capenable;
 	hn_xpnt_vf_iocsetcaps(sc, &ifr);
 
 	if (ifp->if_mtu != ETHERMTU) {
 		int error;
 
 		/*
 		 * Change VF's MTU.
 		 */
 		memset(&ifr, 0, sizeof(ifr));
 		strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
 		ifr.ifr_mtu = ifp->if_mtu;
 		error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr);
 		if (error) {
 			if_printf(ifp, "%s SIOCSIFMTU %u failed\n",
 			    vf_ifp->if_xname, ifp->if_mtu);
 			if (ifp->if_mtu > ETHERMTU) {
 				if_printf(ifp, "change MTU to %d\n", ETHERMTU);
 
 				/*
 				 * XXX
 				 * No need to adjust the synthetic parts' MTU;
 				 * failure of the adjustment will cause us
 				 * infinite headache.
 				 */
 				ifp->if_mtu = ETHERMTU;
 				hn_mtu_change_fixup(sc);
 			}
 		}
 	}
 }
 
 static bool
 hn_xpnt_vf_isready(struct hn_softc *sc)
 {
 
 	HN_LOCK_ASSERT(sc);
 
 	if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL)
 		return (false);
 
 	if (sc->hn_vf_rdytick == 0)
 		return (true);
 
 	if (sc->hn_vf_rdytick > ticks)
 		return (false);
 
 	/* Mark VF as ready. */
 	hn_xpnt_vf_setready(sc);
 	return (true);
 }
 
 static void
 hn_xpnt_vf_setenable(struct hn_softc *sc)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
 	rm_wlock(&sc->hn_vf_lock);
 	sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
 	rm_wunlock(&sc->hn_vf_lock);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF;
 }
 
 static void
 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
 	rm_wlock(&sc->hn_vf_lock);
 	sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
 	if (clear_vf)
 		sc->hn_vf_ifp = NULL;
 	rm_wunlock(&sc->hn_vf_lock);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF;
 }
 
 static void
 hn_xpnt_vf_init(struct hn_softc *sc)
 {
 	int error;
 
 	HN_LOCK_ASSERT(sc);
 
 	KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
 	    ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "try bringing up %s\n",
 		    sc->hn_vf_ifp->if_xname);
 	}
 
 	/*
 	 * Bring the VF up.
 	 */
 	hn_xpnt_vf_saveifflags(sc);
 	sc->hn_vf_ifp->if_flags |= IFF_UP;
 	error = hn_xpnt_vf_iocsetflags(sc);
 	if (error) {
 		if_printf(sc->hn_ifp, "bringing up %s failed: %d\n",
 		    sc->hn_vf_ifp->if_xname, error);
 		return;
 	}
 
 	/*
 	 * NOTE:
 	 * Datapath setting must happen _after_ bringing the VF up.
 	 */
 	hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
 
 	/*
 	 * NOTE:
 	 * Fixup RSS related bits _after_ the VF is brought up, since
 	 * many VFs generate RSS key during it's initialization.
 	 */
 	hn_vf_rss_fixup(sc, true);
 
 	/* Mark transparent mode VF as enabled. */
 	hn_xpnt_vf_setenable(sc);
 }
 
 static void
 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		goto done;
 	if (sc->hn_vf_ifp == NULL)
 		goto done;
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 		goto done;
 
 	if (sc->hn_vf_rdytick != 0) {
 		/* Mark VF as ready. */
 		hn_xpnt_vf_setready(sc);
 	}
 
 	if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		/*
 		 * Delayed VF initialization.
 		 */
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "delayed initialize %s\n",
 			    sc->hn_vf_ifp->if_xname);
 		}
 		hn_xpnt_vf_init(sc);
 	}
 done:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 
 	if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
 		goto done;
 
 	if (!hn_ismyvf(sc, ifp))
 		goto done;
 
 	if (sc->hn_vf_ifp != NULL) {
 		if_printf(sc->hn_ifp, "%s was attached as VF\n",
 		    sc->hn_vf_ifp->if_xname);
 		goto done;
 	}
 
 	if (hn_xpnt_vf && ifp->if_start != NULL) {
 		/*
 		 * ifnet.if_start is _not_ supported by transparent
 		 * mode VF; mainly due to the IFF_DRV_OACTIVE flag.
 		 */
 		if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported "
 		    "in transparent VF mode.\n", ifp->if_xname);
 		goto done;
 	}
 
 	rm_wlock(&hn_vfmap_lock);
 
 	if (ifp->if_index >= hn_vfmap_size) {
 		struct ifnet **newmap;
 		int newsize;
 
 		newsize = ifp->if_index + HN_VFMAP_SIZE_DEF;
 		newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF,
 		    M_WAITOK | M_ZERO);
 
 		memcpy(newmap, hn_vfmap,
 		    sizeof(struct ifnet *) * hn_vfmap_size);
 		free(hn_vfmap, M_DEVBUF);
 		hn_vfmap = newmap;
 		hn_vfmap_size = newsize;
 	}
 	KASSERT(hn_vfmap[ifp->if_index] == NULL,
 	    ("%s: ifindex %d was mapped to %s",
 	     ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname));
 	hn_vfmap[ifp->if_index] = sc->hn_ifp;
 
 	rm_wunlock(&hn_vfmap_lock);
 
 	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
 	rm_wlock(&sc->hn_vf_lock);
 	KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
 	    ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
 	sc->hn_vf_ifp = ifp;
 	rm_wunlock(&sc->hn_vf_lock);
 
 	if (hn_xpnt_vf) {
 		int wait_ticks;
 
 		/*
 		 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp.
 		 * Save vf_ifp's current if_input for later restoration.
 		 */
 		sc->hn_vf_input = ifp->if_input;
 		ifp->if_input = hn_xpnt_vf_input;
 
 		/*
 		 * Stop link status management; use the VF's.
 		 */
 		hn_suspend_mgmt(sc);
 
 		/*
 		 * Give VF sometime to complete its attach routing.
 		 */
 		wait_ticks = hn_xpnt_vf_attwait * hz;
 		sc->hn_vf_rdytick = ticks + wait_ticks;
 
 		taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init,
 		    wait_ticks);
 	}
 done:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_detevent(void *xsc, struct ifnet *ifp)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 
 	if (sc->hn_vf_ifp == NULL)
 		goto done;
 
 	if (!hn_ismyvf(sc, ifp))
 		goto done;
 
 	if (hn_xpnt_vf) {
 		/*
 		 * Make sure that the delayed initialization is not running.
 		 *
 		 * NOTE:
 		 * - This lock _must_ be released, since the hn_vf_init task
 		 *   will try holding this lock.
 		 * - It is safe to release this lock here, since the
 		 *   hn_ifnet_attevent() is interlocked by the hn_vf_ifp.
 		 *
 		 * XXX racy, if hn(4) ever detached.
 		 */
 		HN_UNLOCK(sc);
 		taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init);
 		HN_LOCK(sc);
 
 		KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved",
 		    sc->hn_ifp->if_xname));
 		ifp->if_input = sc->hn_vf_input;
 		sc->hn_vf_input = NULL;
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) &&
 		    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED))
 			hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
 
 		if (sc->hn_vf_rdytick == 0) {
 			/*
 			 * The VF was ready; restore some settings.
 			 */
 			sc->hn_ifp->if_capabilities = sc->hn_saved_caps;
 			/*
 			 * NOTE:
 			 * There is _no_ need to fixup if_capenable and
 			 * if_hwassist, since the if_capabilities before
 			 * restoration was an intersection of the VF's
 			 * if_capabilites and the synthetic device's
 			 * if_capabilites.
 			 */
 			sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax;
 			sc->hn_ifp->if_hw_tsomaxsegcount =
 			    sc->hn_saved_tsosegcnt;
 			sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz;
 		}
 
 		if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
 			/*
 			 * Restore RSS settings.
 			 */
 			hn_vf_rss_restore(sc);
 
 			/*
 			 * Resume link status management, which was suspended
 			 * by hn_ifnet_attevent().
 			 */
 			hn_resume_mgmt(sc);
 		}
 	}
 
 	/* Mark transparent mode VF as disabled. */
 	hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */);
 
 	rm_wlock(&hn_vfmap_lock);
 
 	KASSERT(ifp->if_index < hn_vfmap_size,
 	    ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size));
 	if (hn_vfmap[ifp->if_index] != NULL) {
 		KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp,
 		    ("%s: ifindex %d was mapped to %s",
 		     ifp->if_xname, ifp->if_index,
 		     hn_vfmap[ifp->if_index]->if_xname));
 		hn_vfmap[ifp->if_index] = NULL;
 	}
 
 	rm_wunlock(&hn_vfmap_lock);
 done:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state)
 {
 	struct hn_softc *sc = xsc;
 
 	if (sc->hn_vf_ifp == ifp)
 		if_link_state_change(sc->hn_ifp, link_state);
 }
 
 static int
 hn_probe(device_t dev)
 {
 
 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) {
 		device_set_desc(dev, "Hyper-V Network Interface");
 		return BUS_PROBE_DEFAULT;
 	}
 	return ENXIO;
 }
 
 static int
 hn_attach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	uint8_t eaddr[ETHER_ADDR_LEN];
 	struct ifnet *ifp = NULL;
 	int error, ring_cnt, tx_ring_cnt;
 	uint32_t mtu;
 
 	sc->hn_dev = dev;
 	sc->hn_prichan = vmbus_get_channel(dev);
 	HN_LOCK_INIT(sc);
 	rm_init(&sc->hn_vf_lock, "hnvf");
 	if (hn_xpnt_vf && hn_xpnt_vf_accbpf)
 		sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
 
 	/*
 	 * Initialize these tunables once.
 	 */
 	sc->hn_agg_size = hn_tx_agg_size;
 	sc->hn_agg_pkts = hn_tx_agg_pkts;
 
 	/*
 	 * Setup taskqueue for transmission.
 	 */
 	if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
 		int i;
 
 		sc->hn_tx_taskqs =
 		    malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
 		    M_DEVBUF, M_WAITOK);
 		for (i = 0; i < hn_tx_taskq_cnt; ++i) {
 			sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
 			    M_WAITOK, taskqueue_thread_enqueue,
 			    &sc->hn_tx_taskqs[i]);
 			taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
 			    "%s tx%d", device_get_nameunit(dev), i);
 		}
 	} else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
 		sc->hn_tx_taskqs = hn_tx_taskque;
 	}
 
 	/*
 	 * Setup taskqueue for mangement tasks, e.g. link status.
 	 */
 	sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
 	    taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
 	taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
 	    device_get_nameunit(dev));
 	TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
 	TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
 	TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
 	    hn_netchg_status_taskfunc, sc);
 
 	if (hn_xpnt_vf) {
 		/*
 		 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up.
 		 */
 		sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK,
 		    taskqueue_thread_enqueue, &sc->hn_vf_taskq);
 		taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf",
 		    device_get_nameunit(dev));
 		TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0,
 		    hn_xpnt_vf_init_taskfunc, sc);
 	}
 
 	/*
 	 * Allocate ifnet and setup its name earlier, so that if_printf
 	 * can be used by functions, which will be called after
 	 * ether_ifattach().
 	 */
 	ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
 	/*
 	 * Initialize ifmedia earlier so that it can be unconditionally
 	 * destroyed, if error happened later on.
 	 */
 	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
 
 	/*
 	 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
 	 * to use (tx_ring_cnt).
 	 *
 	 * NOTE:
 	 * The # of RX rings to use is same as the # of channels to use.
 	 */
 	ring_cnt = hn_chan_cnt;
 	if (ring_cnt <= 0) {
 		/* Default */
 		ring_cnt = mp_ncpus;
 		if (ring_cnt > HN_RING_CNT_DEF_MAX)
 			ring_cnt = HN_RING_CNT_DEF_MAX;
 	} else if (ring_cnt > mp_ncpus) {
 		ring_cnt = mp_ncpus;
 	}
 #ifdef RSS
 	if (ring_cnt > rss_getnumbuckets())
 		ring_cnt = rss_getnumbuckets();
 #endif
 
 	tx_ring_cnt = hn_tx_ring_cnt;
 	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
 		tx_ring_cnt = ring_cnt;
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		/* ifnet.if_start only needs one TX ring. */
 		tx_ring_cnt = 1;
 	}
 #endif
 
 	/*
 	 * Set the leader CPU for channels.
 	 */
 	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
 	/*
 	 * Create enough TX/RX rings, even if only limited number of
 	 * channels can be allocated.
 	 */
 	error = hn_create_tx_data(sc, tx_ring_cnt);
 	if (error)
 		goto failed;
 	error = hn_create_rx_data(sc, ring_cnt);
 	if (error)
 		goto failed;
 
 	/*
 	 * Create transaction context for NVS and RNDIS transactions.
 	 */
 	sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
 	    HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
 	if (sc->hn_xact == NULL) {
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Install orphan handler for the revocation of this device's
 	 * primary channel.
 	 *
 	 * NOTE:
 	 * The processing order is critical here:
 	 * Install the orphan handler, _before_ testing whether this
 	 * device's primary channel has been revoked or not.
 	 */
 	vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
 	if (vmbus_chan_is_revoked(sc->hn_prichan)) {
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Attach the synthetic parts, i.e. NVS and RNDIS.
 	 */
 	error = hn_synth_attach(sc, ETHERMTU);
 	if (error)
 		goto failed;
 
 	error = hn_rndis_get_eaddr(sc, eaddr);
 	if (error)
 		goto failed;
 
 	error = hn_rndis_get_mtu(sc, &mtu);
 	if (error)
 		mtu = ETHERMTU;
 	else if (bootverbose)
 		device_printf(dev, "RNDIS mtu %u\n", mtu);
 
 #if __FreeBSD_version >= 1100099
 	if (sc->hn_rx_ring_inuse > 1) {
 		/*
 		 * Reduce TCP segment aggregation limit for multiple
 		 * RX rings to increase ACK timeliness.
 		 */
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
 	}
 #endif
 
 	/*
 	 * Fixup TX/RX stuffs after synthetic parts are attached.
 	 */
 	hn_fixup_tx_data(sc);
 	hn_fixup_rx_data(sc);
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
 	    &sc->hn_nvs_ver, 0, "NVS version");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_ndis_version_sysctl, "A", "NDIS version");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_caps_sysctl, "A", "capabilities");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_hwassist_sysctl, "A", "hwassist");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max",
 	    CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt",
 	    CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0,
 	    "max # of TSO segments");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz",
 	    CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0,
 	    "max size of TSO segment");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rxfilter_sysctl, "A", "rxfilter");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_hash_sysctl, "A", "RSS hash");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_hcap_sysctl, "A", "RSS hash capabilities");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
 	    CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
 #ifndef RSS
 	/*
 	 * Don't allow RSS key/indirect table changes, if RSS is defined.
 	 */
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
 	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_key_sysctl, "IU", "RSS key");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
 	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_ind_sysctl, "IU", "RSS indirect table");
 #endif
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
 	    "RNDIS offered packet transmission aggregation size limit");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
 	    "RNDIS offered packet transmission aggregation count limit");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
 	    "RNDIS packet transmission aggregation alignment");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_size_sysctl, "I",
 	    "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_pkts_sysctl, "I",
 	    "Packet transmission aggregation packets, "
 	    "0 -- disable, -1 -- auto");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_polling_sysctl, "I",
 	    "Polling frequency: [100,1000000], 0 disable polling");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_vf_sysctl, "A", "Virtual Function's name");
 	if (!hn_xpnt_vf) {
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
 		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 		    hn_rxvf_sysctl, "A", "activated Virtual Function's name");
 	} else {
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled",
 		    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 		    hn_xpnt_vf_enabled_sysctl, "I",
 		    "Transparent VF enabled");
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf",
 		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 		    hn_xpnt_vf_accbpf_sysctl, "I",
 		    "Accurate BPF for transparent VF");
 	}
 
 	/*
 	 * Setup the ifmedia, which has been initialized earlier.
 	 */
 	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
 	/* XXX ifmedia_set really should do this for us */
 	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
 
 	/*
 	 * Setup the ifnet for this interface.
 	 */
 
 	ifp->if_baudrate = IF_Gbps(10);
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
-	    IFF_NEEDSEPOCH;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = hn_ioctl;
 	ifp->if_init = hn_init;
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
 
 		ifp->if_start = hn_start;
 		IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
 		ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
 		IFQ_SET_READY(&ifp->if_snd);
 	} else
 #endif
 	{
 		ifp->if_transmit = hn_transmit;
 		ifp->if_qflush = hn_xmit_qflush;
 	}
 
 	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE;
 #ifdef foo
 	/* We can't diff IPv6 packets from IPv4 packets on RX path. */
 	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
 #endif
 	if (sc->hn_caps & HN_CAP_VLAN) {
 		/* XXX not sure about VLAN_MTU. */
 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
 	}
 
 	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
 	if (ifp->if_hwassist & HN_CSUM_IP_MASK)
 		ifp->if_capabilities |= IFCAP_TXCSUM;
 	if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
 		ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
 	if (sc->hn_caps & HN_CAP_TSO4) {
 		ifp->if_capabilities |= IFCAP_TSO4;
 		ifp->if_hwassist |= CSUM_IP_TSO;
 	}
 	if (sc->hn_caps & HN_CAP_TSO6) {
 		ifp->if_capabilities |= IFCAP_TSO6;
 		ifp->if_hwassist |= CSUM_IP6_TSO;
 	}
 
 	/* Enable all available capabilities by default. */
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/*
 	 * Disable IPv6 TSO and TXCSUM by default, they still can
 	 * be enabled through SIOCSIFCAP.
 	 */
 	ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
 	ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
 
 	if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
 		/*
 		 * Lock hn_set_tso_maxsize() to simplify its
 		 * internal logic.
 		 */
 		HN_LOCK(sc);
 		hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
 		HN_UNLOCK(sc);
 		ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
 		ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 	}
 
 	ether_ifattach(ifp, eaddr);
 
 	if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
 		if_printf(ifp, "TSO segcnt %u segsz %u\n",
 		    ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
 	}
 	if (mtu < ETHERMTU) {
 		if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu);
 		ifp->if_mtu = mtu;
 	}
 
 	/* Inform the upper layer about the long frame support. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	/*
 	 * Kick off link status check.
 	 */
 	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
 	hn_update_link_status(sc);
 
 	if (!hn_xpnt_vf) {
 		sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
 		    hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
 		sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
 		    hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
 	} else {
 		sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event,
 		    hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY);
 	}
 
 	/*
 	 * NOTE:
 	 * Subscribe ether_ifattach event, instead of ifnet_arrival event,
 	 * since interface's LLADDR is needed; interface LLADDR is not
 	 * available when ifnet_arrival event is triggered.
 	 */
 	sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event,
 	    hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY);
 	sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event,
 	    hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY);
 
 	return (0);
 failed:
 	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
 		hn_synth_detach(sc);
 	hn_detach(dev);
 	return (error);
 }
 
 static int
 hn_detach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 	struct ifnet *ifp = sc->hn_ifp, *vf_ifp;
 
 	if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
 		/*
 		 * In case that the vmbus missed the orphan handler
 		 * installation.
 		 */
 		vmbus_xact_ctx_orphan(sc->hn_xact);
 	}
 
 	if (sc->hn_ifaddr_evthand != NULL)
 		EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
 	if (sc->hn_ifnet_evthand != NULL)
 		EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
 	if (sc->hn_ifnet_atthand != NULL) {
 		EVENTHANDLER_DEREGISTER(ether_ifattach_event,
 		    sc->hn_ifnet_atthand);
 	}
 	if (sc->hn_ifnet_dethand != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    sc->hn_ifnet_dethand);
 	}
 	if (sc->hn_ifnet_lnkhand != NULL)
 		EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand);
 
 	vf_ifp = sc->hn_vf_ifp;
 	__compiler_membar();
 	if (vf_ifp != NULL)
 		hn_ifnet_detevent(sc, vf_ifp);
 
 	if (device_is_attached(dev)) {
 		HN_LOCK(sc);
 		if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				hn_stop(sc, true);
 			/*
 			 * NOTE:
 			 * hn_stop() only suspends data, so managment
 			 * stuffs have to be suspended manually here.
 			 */
 			hn_suspend_mgmt(sc);
 			hn_synth_detach(sc);
 		}
 		HN_UNLOCK(sc);
 		ether_ifdetach(ifp);
 	}
 
 	ifmedia_removeall(&sc->hn_media);
 	hn_destroy_rx_data(sc);
 	hn_destroy_tx_data(sc);
 
 	if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
 		int i;
 
 		for (i = 0; i < hn_tx_taskq_cnt; ++i)
 			taskqueue_free(sc->hn_tx_taskqs[i]);
 		free(sc->hn_tx_taskqs, M_DEVBUF);
 	}
 	taskqueue_free(sc->hn_mgmt_taskq0);
 	if (sc->hn_vf_taskq != NULL)
 		taskqueue_free(sc->hn_vf_taskq);
 
 	if (sc->hn_xact != NULL) {
 		/*
 		 * Uninstall the orphan handler _before_ the xact is
 		 * destructed.
 		 */
 		vmbus_chan_unset_orphan(sc->hn_prichan);
 		vmbus_xact_ctx_destroy(sc->hn_xact);
 	}
 
 	if_free(ifp);
 
 	HN_LOCK_DESTROY(sc);
 	rm_destroy(&sc->hn_vf_lock);
 	return (0);
 }
 
 static int
 hn_shutdown(device_t dev)
 {
 
 	return (0);
 }
 
 static void
 hn_link_status(struct hn_softc *sc)
 {
 	uint32_t link_status;
 	int error;
 
 	error = hn_rndis_get_linkstatus(sc, &link_status);
 	if (error) {
 		/* XXX what to do? */
 		return;
 	}
 
 	if (link_status == NDIS_MEDIA_STATE_CONNECTED)
 		sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
 	else
 		sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
 	if_link_state_change(sc->hn_ifp,
 	    (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
 	    LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static void
 hn_link_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
 		return;
 	hn_link_status(sc);
 }
 
 static void
 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	/* Prevent any link status checks from running. */
 	sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
 
 	/*
 	 * Fake up a [link down --> link up] state change; 5 seconds
 	 * delay is used, which closely simulates miibus reaction
 	 * upon link down event.
 	 */
 	sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
 	if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
 	taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
 	    &sc->hn_netchg_status, 5 * hz);
 }
 
 static void
 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	/* Re-allow link status checks. */
 	sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
 	hn_link_status(sc);
 }
 
 static void
 hn_update_link_status(struct hn_softc *sc)
 {
 
 	if (sc->hn_mgmt_taskq != NULL)
 		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
 }
 
 static void
 hn_change_network(struct hn_softc *sc)
 {
 
 	if (sc->hn_mgmt_taskq != NULL)
 		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
 }
 
 static __inline int
 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
 
 	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
 
 		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
 		if (m_new == NULL)
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
 		txr->hn_tx_collapsed++;
 
 		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
 	return error;
 }
 
 static __inline int
 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
 	    ("put an onlist txd %#x", txd->flags));
 	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("put an onagg txd %#x", txd->flags));
 
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
 	if (!STAILQ_EMPTY(&txd->agg_list)) {
 		struct hn_txdesc *tmp_txd;
 
 		while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
 			int freed;
 
 			KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
 			    ("resursive aggregation on aggregated txdesc"));
 			KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
 			    ("not aggregated txdesc"));
 			KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
 			    ("aggregated txdesc uses dmamap"));
 			KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
 			    ("aggregated txdesc consumes "
 			     "chimney sending buffer"));
 			KASSERT(tmp_txd->chim_size == 0,
 			    ("aggregated txdesc has non-zero "
 			     "chimney sending size"));
 
 			STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
 			tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
 			freed = hn_txdesc_put(txr, tmp_txd);
 			KASSERT(freed, ("failed to free aggregated txdesc"));
 		}
 	}
 
 	if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
 		KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
 		    ("chim txd uses dmamap"));
 		hn_chim_free(txr->hn_sc, txd->chim_index);
 		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 		txd->chim_size = 0;
 	} else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
 	}
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	KASSERT(txr->hn_txdesc_avail >= 0 &&
 	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
 	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
 	txr->hn_txdesc_avail++;
 	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else	/* HN_USE_TXDESC_BUFRING */
 #ifdef HN_DEBUG
 	atomic_add_int(&txr->hn_txdesc_avail, 1);
 #endif
 	buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif	/* !HN_USE_TXDESC_BUFRING */
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
 hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
 		KASSERT(txr->hn_txdesc_avail > 0,
 		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
 		txr->hn_txdesc_avail--;
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
 #endif
 
 	if (txd != NULL) {
 #ifdef HN_USE_TXDESC_BUFRING
 #ifdef HN_DEBUG
 		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
 #endif
 #endif	/* HN_USE_TXDESC_BUFRING */
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
 		    STAILQ_EMPTY(&txd->agg_list) &&
 		    txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
 		    txd->chim_size == 0 &&
 		    (txd->flags & HN_TXD_FLAG_ONLIST) &&
 		    (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
 		    (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
 		txd->flags &= ~HN_TXD_FLAG_ONLIST;
 		txd->refs = 1;
 	}
 	return txd;
 }
 
 static __inline void
 hn_txdesc_hold(struct hn_txdesc *txd)
 {
 
 	/* 0->1 transition will never work */
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	atomic_add_int(&txd->refs, 1);
 }
 
 static __inline void
 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
 {
 
 	KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("recursive aggregation on aggregating txdesc"));
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("already aggregated"));
 	KASSERT(STAILQ_EMPTY(&txd->agg_list),
 	    ("recursive aggregation on to-be-aggregated txdesc"));
 
 	txd->flags |= HN_TXD_FLAG_ONAGG;
 	STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
 }
 
 static bool
 hn_tx_ring_pending(struct hn_tx_ring *txr)
 {
 	bool pending = false;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
 		pending = true;
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	if (!buf_ring_full(txr->hn_txdesc_br))
 		pending = true;
 #endif
 	return (pending);
 }
 
 static __inline void
 hn_txeof(struct hn_tx_ring *txr)
 {
 	txr->hn_has_txeof = 0;
 	txr->hn_txeof(txr);
 }
 
 static void
 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
     struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
 {
 	struct hn_txdesc *txd = sndc->hn_cbarg;
 	struct hn_tx_ring *txr;
 
 	txr = txd->txr;
 	KASSERT(txr->hn_chan == chan,
 	    ("channel mismatch, on chan%u, should be chan%u",
 	     vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
 
 	txr->hn_has_txeof = 1;
 	hn_txdesc_put(txr, txd);
 
 	++txr->hn_txdone_cnt;
 	if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
 		txr->hn_txdone_cnt = 0;
 		if (txr->hn_oactive)
 			hn_txeof(txr);
 	}
 }
 
 static void
 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
 {
 #if defined(INET) || defined(INET6)
 	tcp_lro_flush_all(&rxr->hn_lro);
 #endif
 
 	/*
 	 * NOTE:
 	 * 'txr' could be NULL, if multiple channels and
 	 * ifnet.if_start method are enabled.
 	 */
 	if (txr == NULL || !txr->hn_has_txeof)
 		return;
 
 	txr->hn_txdone_cnt = 0;
 	hn_txeof(txr);
 }
 
 static __inline uint32_t
 hn_rndis_pktmsg_offset(uint32_t ofs)
 {
 
 	KASSERT(ofs >= sizeof(struct rndis_packet_msg),
 	    ("invalid RNDIS packet msg offset %u", ofs));
 	return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
 }
 
 static __inline void *
 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
     size_t pi_dlen, uint32_t pi_type)
 {
 	const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
 	struct rndis_pktinfo *pi;
 
 	KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
 	    ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
 
 	/*
 	 * Per-packet-info does not move; it only grows.
 	 *
 	 * NOTE:
 	 * rm_pktinfooffset in this phase counts from the beginning
 	 * of rndis_packet_msg.
 	 */
 	KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
 	    ("%u pktinfo overflows RNDIS packet msg", pi_type));
 	pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
 	    pkt->rm_pktinfolen);
 	pkt->rm_pktinfolen += pi_size;
 
 	pi->rm_size = pi_size;
 	pi->rm_type = pi_type;
 	pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
 
 	return (pi->rm_data);
 }
 
 static __inline int
 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 	struct mbuf *m;
 	int error, pkts;
 
 	txd = txr->hn_agg_txd;
 	KASSERT(txd != NULL, ("no aggregate txdesc"));
 
 	/*
 	 * Since hn_txpkt() will reset this temporary stat, save
 	 * it now, so that oerrors can be updated properly, if
 	 * hn_txpkt() ever fails.
 	 */
 	pkts = txr->hn_stat_pkts;
 
 	/*
 	 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
 	 * failure, save it for later freeing, if hn_txpkt() ever
 	 * fails.
 	 */
 	m = txd->m;
 	error = hn_txpkt(ifp, txr, txd);
 	if (__predict_false(error)) {
 		/* txd is freed, but m is not. */
 		m_freem(m);
 
 		txr->hn_flush_failed++;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
 	}
 
 	/* Reset all aggregation states. */
 	txr->hn_agg_txd = NULL;
 	txr->hn_agg_szleft = 0;
 	txr->hn_agg_pktleft = 0;
 	txr->hn_agg_prevpkt = NULL;
 
 	return (error);
 }
 
 static void *
 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
     int pktsize)
 {
 	void *chim;
 
 	if (txr->hn_agg_txd != NULL) {
 		if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
 			struct hn_txdesc *agg_txd = txr->hn_agg_txd;
 			struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
 			int olen;
 
 			/*
 			 * Update the previous RNDIS packet's total length,
 			 * it can be increased due to the mandatory alignment
 			 * padding for this RNDIS packet.  And update the
 			 * aggregating txdesc's chimney sending buffer size
 			 * accordingly.
 			 *
 			 * XXX
 			 * Zero-out the padding, as required by the RNDIS spec.
 			 */
 			olen = pkt->rm_len;
 			pkt->rm_len = roundup2(olen, txr->hn_agg_align);
 			agg_txd->chim_size += pkt->rm_len - olen;
 
 			/* Link this txdesc to the parent. */
 			hn_txdesc_agg(agg_txd, txd);
 
 			chim = (uint8_t *)pkt + pkt->rm_len;
 			/* Save the current packet for later fixup. */
 			txr->hn_agg_prevpkt = chim;
 
 			txr->hn_agg_pktleft--;
 			txr->hn_agg_szleft -= pktsize;
 			if (txr->hn_agg_szleft <=
 			    HN_PKTSIZE_MIN(txr->hn_agg_align)) {
 				/*
 				 * Probably can't aggregate more packets,
 				 * flush this aggregating txdesc proactively.
 				 */
 				txr->hn_agg_pktleft = 0;
 			}
 			/* Done! */
 			return (chim);
 		}
 		hn_flush_txagg(ifp, txr);
 	}
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	txr->hn_tx_chimney_tried++;
 	txd->chim_index = hn_chim_alloc(txr->hn_sc);
 	if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
 		return (NULL);
 	txr->hn_tx_chimney++;
 
 	chim = txr->hn_sc->hn_chim +
 	    (txd->chim_index * txr->hn_sc->hn_chim_szmax);
 
 	if (txr->hn_agg_pktmax > 1 &&
 	    txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
 		txr->hn_agg_txd = txd;
 		txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
 		txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
 		txr->hn_agg_prevpkt = chim;
 	}
 	return (chim);
 }
 
 /*
  * NOTE:
  * If this function fails, then both txd and m_head0 will be freed.
  */
 static int
 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head0)
 {
 	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
 	int error, nsegs, i;
 	struct mbuf *m_head = *m_head0;
 	struct rndis_packet_msg *pkt;
 	uint32_t *pi_data;
 	void *chim = NULL;
 	int pkt_hlen, pkt_size;
 
 	pkt = txd->rndis_pkt;
 	pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
 	if (pkt_size < txr->hn_chim_size) {
 		chim = hn_try_txagg(ifp, txr, txd, pkt_size);
 		if (chim != NULL)
 			pkt = chim;
 	} else {
 		if (txr->hn_agg_txd != NULL)
 			hn_flush_txagg(ifp, txr);
 	}
 
 	pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
 	pkt->rm_len = m_head->m_pkthdr.len;
 	pkt->rm_dataoffset = 0;
 	pkt->rm_datalen = m_head->m_pkthdr.len;
 	pkt->rm_oobdataoffset = 0;
 	pkt->rm_oobdatalen = 0;
 	pkt->rm_oobdataelements = 0;
 	pkt->rm_pktinfooffset = sizeof(*pkt);
 	pkt->rm_pktinfolen = 0;
 	pkt->rm_vchandle = 0;
 	pkt->rm_reserved = 0;
 
 	if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
 		/*
 		 * Set the hash value for this packet, so that the host could
 		 * dispatch the TX done event for this packet back to this TX
 		 * ring's channel.
 		 */
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
 		*pi_data = txr->hn_tx_idx;
 	}
 
 	if (m_head->m_flags & M_VLANTAG) {
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
 		*pi_data = NDIS_VLAN_INFO_MAKE(
 		    EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
 		    EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
 		    EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
 	}
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 #if defined(INET6) || defined(INET)
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
 #ifdef INET
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 			*pi_data = NDIS_LSO2_INFO_MAKEIPV4(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
 			    m_head->m_pkthdr.tso_segsz);
 		}
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET6
 		{
 			*pi_data = NDIS_LSO2_INFO_MAKEIPV6(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
 			    m_head->m_pkthdr.tso_segsz);
 		}
 #endif
 #endif	/* INET6 || INET */
 	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
 		if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
 			*pi_data = NDIS_TXCSUM_INFO_IPV6;
 		} else {
 			*pi_data = NDIS_TXCSUM_INFO_IPV4;
 			if (m_head->m_pkthdr.csum_flags & CSUM_IP)
 				*pi_data |= NDIS_TXCSUM_INFO_IPCS;
 		}
 
 		if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP_TCP | CSUM_IP6_TCP)) {
 			*pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
 		} else if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP_UDP | CSUM_IP6_UDP)) {
 			*pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
 		}
 	}
 
 	pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
 	/* Fixup RNDIS packet message total length */
 	pkt->rm_len += pkt_hlen;
 	/* Convert RNDIS packet message offsets */
 	pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
 	pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
 
 	/*
 	 * Fast path: Chimney sending.
 	 */
 	if (chim != NULL) {
 		struct hn_txdesc *tgt_txd = txd;
 
 		if (txr->hn_agg_txd != NULL) {
 			tgt_txd = txr->hn_agg_txd;
 #ifdef INVARIANTS
 			*m_head0 = NULL;
 #endif
 		}
 
 		KASSERT(pkt == chim,
 		    ("RNDIS pkt not in chimney sending buffer"));
 		KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
 		    ("chimney sending buffer is not used"));
 		tgt_txd->chim_size += pkt->rm_len;
 
 		m_copydata(m_head, 0, m_head->m_pkthdr.len,
 		    ((uint8_t *)chim) + pkt_hlen);
 
 		txr->hn_gpa_cnt = 0;
 		txr->hn_sendpkt = hn_txpkt_chim;
 		goto done;
 	}
 
 	KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
 	    ("chimney buffer is used"));
 	KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
 
 	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
 	if (__predict_false(error)) {
 		int freed;
 
 		/*
 		 * This mbuf is not linked w/ the txd yet, so free it now.
 		 */
 		m_freem(m_head);
 		*m_head0 = NULL;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon txdma error"));
 
 		txr->hn_txdma_failed++;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return error;
 	}
 	*m_head0 = m_head;
 
 	/* +1 RNDIS packet message */
 	txr->hn_gpa_cnt = nsegs + 1;
 
 	/* send packet with page buffer */
 	txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
 	txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
 	txr->hn_gpa[0].gpa_len = pkt_hlen;
 
 	/*
 	 * Fill the page buffers with mbuf info after the page
 	 * buffer for RNDIS packet message.
 	 */
 	for (i = 0; i < nsegs; ++i) {
 		struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
 
 		gpa->gpa_page = atop(segs[i].ds_addr);
 		gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
 		gpa->gpa_len = segs[i].ds_len;
 	}
 
 	txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 	txd->chim_size = 0;
 	txr->hn_sendpkt = hn_txpkt_sglist;
 done:
 	txd->m = m_head;
 
 	/* Set the completion routine */
 	hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
 
 	/* Update temporary stats for later use. */
 	txr->hn_stat_pkts++;
 	txr->hn_stat_size += m_head->m_pkthdr.len;
 	if (m_head->m_flags & M_MCAST)
 		txr->hn_stat_mcasts++;
 
 	return 0;
 }
 
 /*
  * NOTE:
  * If this function fails, then txd will be freed, but the mbuf
  * associated w/ the txd will _not_ be freed.
  */
 static int
 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	int error, send_failed = 0, has_bpf;
 
 again:
 	has_bpf = bpf_peers_present(ifp->if_bpf);
 	if (has_bpf) {
 		/*
 		 * Make sure that this txd and any aggregated txds are not
 		 * freed before ETHER_BPF_MTAP.
 		 */
 		hn_txdesc_hold(txd);
 	}
 	error = txr->hn_sendpkt(txr, txd);
 	if (!error) {
 		if (has_bpf) {
 			const struct hn_txdesc *tmp_txd;
 
 			ETHER_BPF_MTAP(ifp, txd->m);
 			STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
 				ETHER_BPF_MTAP(ifp, tmp_txd->m);
 		}
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
 #ifdef HN_IFSTART_SUPPORT
 		if (!hn_use_if_start)
 #endif
 		{
 			if_inc_counter(ifp, IFCOUNTER_OBYTES,
 			    txr->hn_stat_size);
 			if (txr->hn_stat_mcasts != 0) {
 				if_inc_counter(ifp, IFCOUNTER_OMCASTS,
 				    txr->hn_stat_mcasts);
 			}
 		}
 		txr->hn_pkts += txr->hn_stat_pkts;
 		txr->hn_sends++;
 	}
 	if (has_bpf)
 		hn_txdesc_put(txr, txd);
 
 	if (__predict_false(error)) {
 		int freed;
 
 		/*
 		 * This should "really rarely" happen.
 		 *
 		 * XXX Too many RX to be acked or too many sideband
 		 * commands to run?  Ask netvsc_channel_rollup()
 		 * to kick start later.
 		 */
 		txr->hn_has_txeof = 1;
 		if (!send_failed) {
 			txr->hn_send_failed++;
 			send_failed = 1;
 			/*
 			 * Try sending again after set hn_has_txeof;
 			 * in case that we missed the last
 			 * netvsc_channel_rollup().
 			 */
 			goto again;
 		}
 		if_printf(ifp, "send failed\n");
 
 		/*
 		 * Caller will perform further processing on the
 		 * associated mbuf, so don't free it in hn_txdesc_put();
 		 * only unload it from the DMA map in hn_txdesc_put(),
 		 * if it was loaded.
 		 */
 		txd->m = NULL;
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon send error"));
 
 		txr->hn_send_failed++;
 	}
 
 	/* Reset temporary stats, after this sending is done. */
 	txr->hn_stat_size = 0;
 	txr->hn_stat_pkts = 0;
 	txr->hn_stat_mcasts = 0;
 
 	return (error);
 }
 
 /*
  * Append the specified data to the indicated mbuf chain,
  * Extend the mbuf chain if the new data does not fit in
  * existing space.
  *
  * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
  * There should be an equivalent in the kernel mbuf code,
  * but there does not appear to be one yet.
  *
  * Differs from m_append() in that additional mbufs are
  * allocated with cluster size MJUMPAGESIZE, and filled
  * accordingly.
  *
  * Return 1 if able to complete the job; otherwise 0.
  */
 static int
 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
 	int remainder, space;
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	remainder = len;
 	space = M_TRAILINGSPACE(m);
 	if (space > 0) {
 		/*
 		 * Copy into available space.
 		 */
 		if (space > remainder)
 			space = remainder;
 		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
 		m->m_len += space;
 		cp += space;
 		remainder -= space;
 	}
 	while (remainder > 0) {
 		/*
 		 * Allocate a new mbuf; could check space
 		 * and allocate a cluster instead.
 		 */
 		n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
 		if (n == NULL)
 			break;
 		n->m_len = min(MJUMPAGESIZE, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len;
 		remainder -= n->m_len;
 		m->m_next = n;
 		m = n;
 	}
 	if (m0->m_flags & M_PKTHDR)
 		m0->m_pkthdr.len += len - remainder;
 
 	return (remainder == 0);
 }
 
 #if defined(INET) || defined(INET6)
 static __inline int
 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
 {
 #if __FreeBSD_version >= 1100095
 	if (hn_lro_mbufq_depth) {
 		tcp_lro_queue_mbuf(lc, m);
 		return 0;
 	}
 #endif
 	return tcp_lro_rx(lc, m, 0);
 }
 #endif
 
 static int
 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
     const struct hn_rxinfo *info)
 {
 	struct ifnet *ifp, *hn_ifp = rxr->hn_ifp;
 	struct mbuf *m_new;
 	int size, do_lro = 0, do_csum = 1, is_vf = 0;
 	int hash_type = M_HASHTYPE_NONE;
 	int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE;
 
 	ifp = hn_ifp;
 	if (rxr->hn_rxvf_ifp != NULL) {
 		/*
 		 * Non-transparent mode VF; pretend this packet is from
 		 * the VF.
 		 */
 		ifp = rxr->hn_rxvf_ifp;
 		is_vf = 1;
 	} else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) {
 		/* Transparent mode VF. */
 		is_vf = 1;
 	}
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		/*
 		 * NOTE:
 		 * See the NOTE of hn_rndis_init_fixat().  This
 		 * function can be reached, immediately after the
 		 * RNDIS is initialized but before the ifnet is
 		 * setup on the hn_attach() path; drop the unexpected
 		 * packets.
 		 */
 		return (0);
 	}
 
 	if (__predict_false(dlen < ETHER_HDR_LEN)) {
 		if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1);
 		return (0);
 	}
 
 	if (dlen <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 		memcpy(mtod(m_new, void *), data, dlen);
 		m_new->m_pkthdr.len = m_new->m_len = dlen;
 		rxr->hn_small_pkts++;
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
 		 * get a standard 2K cluster.  For anything larger, get a
 		 * 4K cluster.  Any buffers larger than 4K can cause problems
 		 * if looped around to the Hyper-V TX channel, so avoid them.
 		 */
 		size = MCLBYTES;
 		if (dlen > MCLBYTES) {
 			/* 4096 */
 			size = MJUMPAGESIZE;
 		}
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
 			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 
 		hv_m_append(m_new, dlen, data);
 	}
 	m_new->m_pkthdr.rcvif = ifp;
 
 	if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
 	if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
 		/* IP csum offload */
 		if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			rxr->hn_csum_ip++;
 		}
 
 		/* TCP/UDP csum offload */
 		if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
 		     NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
 			if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
 				rxr->hn_csum_tcp++;
 			else
 				rxr->hn_csum_udp++;
 		}
 
 		/*
 		 * XXX
 		 * As of this write (Oct 28th, 2016), host side will turn
 		 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
 		 * the do_lro setting here is actually _not_ accurate.  We
 		 * depend on the RSS hash type check to reset do_lro.
 		 */
 		if ((info->csum_info &
 		     (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
 		    (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
 			do_lro = 1;
 	} else {
 		hn_rxpkt_proto(m_new, &l3proto, &l4proto);
 		if (l3proto == ETHERTYPE_IP) {
 			if (l4proto == IPPROTO_TCP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_TCP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 				do_lro = 1;
 			} else if (l4proto == IPPROTO_UDP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_UDP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 			} else if (l4proto != IPPROTO_DONE && do_csum &&
 			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
 				rxr->hn_csum_trusted++;
 				m_new->m_pkthdr.csum_flags |=
 				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			}
 		}
 	}
 
 	if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
 		m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
 		    NDIS_VLAN_INFO_ID(info->vlan_info),
 		    NDIS_VLAN_INFO_PRI(info->vlan_info),
 		    NDIS_VLAN_INFO_CFI(info->vlan_info));
 		m_new->m_flags |= M_VLANTAG;
 	}
 
 	/*
 	 * If VF is activated (tranparent/non-transparent mode does not
 	 * matter here).
 	 *
 	 * - Disable LRO
 	 *
 	 *   hn(4) will only receive broadcast packets, multicast packets,
 	 *   TCP SYN and SYN|ACK (in Azure), LRO is useless for these
 	 *   packet types.
 	 *
 	 *   For non-transparent, we definitely _cannot_ enable LRO at
 	 *   all, since the LRO flush will use hn(4) as the receiving
 	 *   interface; i.e. hn_ifp->if_input(hn_ifp, m).
 	 */
 	if (is_vf)
 		do_lro = 0;
 
 	/*
 	 * If VF is activated (tranparent/non-transparent mode does not
 	 * matter here), do _not_ mess with unsupported hash types or
 	 * functions.
 	 */
 	if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
 		rxr->hn_rss_pkts++;
 		m_new->m_pkthdr.flowid = info->hash_value;
 		if (!is_vf)
 			hash_type = M_HASHTYPE_OPAQUE_HASH;
 		if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
 		    NDIS_HASH_FUNCTION_TOEPLITZ) {
 			uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK &
 			    rxr->hn_mbuf_hash);
 
 			/*
 			 * NOTE:
 			 * do_lro is resetted, if the hash types are not TCP
 			 * related.  See the comment in the above csum_flags
 			 * setup section.
 			 */
 			switch (type) {
 			case NDIS_HASH_IPV4:
 				hash_type = M_HASHTYPE_RSS_IPV4;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_TCP_IPV4:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV4;
 				if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) {
 					int def_htype = M_HASHTYPE_OPAQUE_HASH;
 
 					if (is_vf)
 						def_htype = M_HASHTYPE_NONE;
 
 					/*
 					 * UDP 4-tuple hash is delivered as
 					 * TCP 4-tuple hash.
 					 */
 					if (l3proto == ETHERTYPE_MAX) {
 						hn_rxpkt_proto(m_new,
 						    &l3proto, &l4proto);
 					}
 					if (l3proto == ETHERTYPE_IP) {
 						if (l4proto == IPPROTO_UDP &&
 						    (rxr->hn_mbuf_hash &
 						     NDIS_HASH_UDP_IPV4_X)) {
 							hash_type =
 							M_HASHTYPE_RSS_UDP_IPV4;
 							do_lro = 0;
 						} else if (l4proto !=
 						    IPPROTO_TCP) {
 							hash_type = def_htype;
 							do_lro = 0;
 						}
 					} else {
 						hash_type = def_htype;
 						do_lro = 0;
 					}
 				}
 				break;
 
 			case NDIS_HASH_IPV6:
 				hash_type = M_HASHTYPE_RSS_IPV6;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_IPV6_EX;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_TCP_IPV6:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6;
 				break;
 
 			case NDIS_HASH_TCP_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
 				break;
 			}
 		}
 	} else if (!is_vf) {
 		m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
 		hash_type = M_HASHTYPE_OPAQUE;
 	}
 	M_HASHTYPE_SET(m_new, hash_type);
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if (hn_ifp != ifp) {
 		const struct ether_header *eh;
 
 		/*
 		 * Non-transparent mode VF is activated.
 		 */
 
 		/*
 		 * Allow tapping on hn(4).
 		 */
 		ETHER_BPF_MTAP(hn_ifp, m_new);
 
 		/*
 		 * Update hn(4)'s stats.
 		 */
 		if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len);
 		/* Checked at the beginning of this function. */
 		KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame"));
 		eh = mtod(m_new, struct ether_header *);
 		if (ETHER_IS_MULTICAST(eh->ether_dhost))
 			if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1);
 	}
 	rxr->hn_pkts++;
 
 	if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
 		struct lro_ctrl *lro = &rxr->hn_lro;
 
 		if (lro->lro_cnt) {
 			rxr->hn_lro_tried++;
 			if (hn_lro_rx(lro, m_new) == 0) {
 				/* DONE! */
 				return 0;
 			}
 		}
 #endif
 	}
 	ifp->if_input(ifp, m_new);
 
 	return (0);
 }
 
 static int
 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data, ifr_vf;
 	struct ifnet *vf_ifp;
 	int mask, error = 0;
 	struct ifrsskey *ifrk;
 	struct ifrsshash *ifrh;
 	uint32_t mtu;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu > HN_MTU_MAX) {
 			error = EINVAL;
 			break;
 		}
 
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if ((sc->hn_caps & HN_CAP_MTU) == 0) {
 			/* Can't change MTU */
 			HN_UNLOCK(sc);
 			error = EOPNOTSUPP;
 			break;
 		}
 
 		if (ifp->if_mtu == ifr->ifr_mtu) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if (hn_xpnt_vf_isready(sc)) {
 			vf_ifp = sc->hn_vf_ifp;
 			ifr_vf = *ifr;
 			strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname,
 			    sizeof(ifr_vf.ifr_name));
 			error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU,
 			    (caddr_t)&ifr_vf);
 			if (error) {
 				HN_UNLOCK(sc);
 				if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n",
 				    vf_ifp->if_xname, ifr->ifr_mtu, error);
 				break;
 			}
 		}
 
 		/*
 		 * Suspend this interface before the synthetic parts
 		 * are ripped.
 		 */
 		hn_suspend(sc);
 
 		/*
 		 * Detach the synthetics parts, i.e. NVS and RNDIS.
 		 */
 		hn_synth_detach(sc);
 
 		/*
 		 * Reattach the synthetic parts, i.e. NVS and RNDIS,
 		 * with the new MTU setting.
 		 */
 		error = hn_synth_attach(sc, ifr->ifr_mtu);
 		if (error) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		error = hn_rndis_get_mtu(sc, &mtu);
 		if (error)
 			mtu = ifr->ifr_mtu;
 		else if (bootverbose)
 			if_printf(ifp, "RNDIS mtu %u\n", mtu);
 
 		/*
 		 * Commit the requested MTU, after the synthetic parts
 		 * have been successfully attached.
 		 */
 		if (mtu >= ifr->ifr_mtu) {
 			mtu = ifr->ifr_mtu;
 		} else {
 			if_printf(ifp, "fixup mtu %d -> %u\n",
 			    ifr->ifr_mtu, mtu);
 		}
 		ifp->if_mtu = mtu;
 
 		/*
 		 * Synthetic parts' reattach may change the chimney
 		 * sending size; update it.
 		 */
 		if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
 			hn_set_chim_size(sc, sc->hn_chim_szmax);
 
 		/*
 		 * Make sure that various parameters based on MTU are
 		 * still valid, after the MTU change.
 		 */
 		hn_mtu_change_fixup(sc);
 
 		/*
 		 * All done!  Resume the interface now.
 		 */
 		hn_resume(sc);
 
 		if ((sc->hn_flags & HN_FLAG_RXVF) ||
 		    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
 			/*
 			 * Since we have reattached the NVS part,
 			 * change the datapath to VF again; in case
 			 * that it is lost, after the NVS was detached.
 			 */
 			hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFFLAGS:
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if (hn_xpnt_vf_isready(sc))
 			hn_xpnt_vf_saveifflags(sc);
 
 		if (ifp->if_flags & IFF_UP) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				/*
 				 * Caller meight hold mutex, e.g.
 				 * bpf; use busy-wait for the RNDIS
 				 * reply.
 				 */
 				HN_NO_SLEEPING(sc);
 				hn_rxfilter_config(sc);
 				HN_SLEEPING_OK(sc);
 
 				if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 					error = hn_xpnt_vf_iocsetflags(sc);
 			} else {
 				hn_init_locked(sc);
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				hn_stop(sc, false);
 		}
 		sc->hn_if_flags = ifp->if_flags;
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFCAP:
 		HN_LOCK(sc);
 
 		if (hn_xpnt_vf_isready(sc)) {
 			ifr_vf = *ifr;
 			strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname,
 			    sizeof(ifr_vf.ifr_name));
 			error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf);
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		/*
 		 * Fix up requested capabilities w/ supported capabilities,
 		 * since the supported capabilities could have been changed.
 		 */
 		mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^
 		    ifp->if_capenable;
 
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if (ifp->if_capenable & IFCAP_TXCSUM)
 				ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
 			else
 				ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
 		}
 		if (mask & IFCAP_TXCSUM_IPV6) {
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 			if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 				ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
 			else
 				ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
 		}
 
 		/* TODO: flip RNDIS offload parameters for RXCSUM. */
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 #ifdef foo
 		/* We can't diff IPv6 packets from IPv4 packets on RX path. */
 		if (mask & IFCAP_RXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 #endif
 
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if (ifp->if_capenable & IFCAP_TSO4)
 				ifp->if_hwassist |= CSUM_IP_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP_TSO;
 		}
 		if (mask & IFCAP_TSO6) {
 			ifp->if_capenable ^= IFCAP_TSO6;
 			if (ifp->if_capenable & IFCAP_TSO6)
 				ifp->if_hwassist |= CSUM_IP6_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			/*
 			 * Multicast uses mutex; use busy-wait for
 			 * the RNDIS reply.
 			 */
 			HN_NO_SLEEPING(sc);
 			hn_rxfilter_config(sc);
 			HN_SLEEPING_OK(sc);
 		}
 
 		/* XXX vlan(4) style mcast addr maintenance */
 		if (hn_xpnt_vf_isready(sc)) {
 			int old_if_flags;
 
 			old_if_flags = sc->hn_vf_ifp->if_flags;
 			hn_xpnt_vf_saveifflags(sc);
 
 			if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) &&
 			    ((old_if_flags ^ sc->hn_vf_ifp->if_flags) &
 			     IFF_ALLMULTI))
 				error = hn_xpnt_vf_iocsetflags(sc);
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		HN_LOCK(sc);
 		if (hn_xpnt_vf_isready(sc)) {
 			/*
 			 * SIOCGIFMEDIA expects ifmediareq, so don't
 			 * create and pass ifr_vf to the VF here; just
 			 * replace the ifr_name.
 			 */
 			vf_ifp = sc->hn_vf_ifp;
 			strlcpy(ifr->ifr_name, vf_ifp->if_xname,
 			    sizeof(ifr->ifr_name));
 			error = vf_ifp->if_ioctl(vf_ifp, cmd, data);
 			/* Restore the ifr_name. */
 			strlcpy(ifr->ifr_name, ifp->if_xname,
 			    sizeof(ifr->ifr_name));
 			HN_UNLOCK(sc);
 			break;
 		}
 		HN_UNLOCK(sc);
 		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
 		break;
 
 	case SIOCGIFRSSHASH:
 		ifrh = (struct ifrsshash *)data;
 		HN_LOCK(sc);
 		if (sc->hn_rx_ring_inuse == 1) {
 			HN_UNLOCK(sc);
 			ifrh->ifrh_func = RSS_FUNC_NONE;
 			ifrh->ifrh_types = 0;
 			break;
 		}
 
 		if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
 			ifrh->ifrh_func = RSS_FUNC_TOEPLITZ;
 		else
 			ifrh->ifrh_func = RSS_FUNC_PRIVATE;
 		ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash);
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCGIFRSSKEY:
 		ifrk = (struct ifrsskey *)data;
 		HN_LOCK(sc);
 		if (sc->hn_rx_ring_inuse == 1) {
 			HN_UNLOCK(sc);
 			ifrk->ifrk_func = RSS_FUNC_NONE;
 			ifrk->ifrk_keylen = 0;
 			break;
 		}
 		if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
 			ifrk->ifrk_func = RSS_FUNC_TOEPLITZ;
 		else
 			ifrk->ifrk_func = RSS_FUNC_PRIVATE;
 		ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ;
 		memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key,
 		    NDIS_HASH_KEYSIZE_TOEPLITZ);
 		HN_UNLOCK(sc);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 	return (error);
 }
 
 static void
 hn_stop(struct hn_softc *sc, bool detaching)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("synthetic parts were not attached"));
 
 	/* Clear RUNNING bit ASAP. */
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 
 	/* Disable polling. */
 	hn_polling(sc, 0);
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
 		KASSERT(sc->hn_vf_ifp != NULL,
 		    ("%s: VF is not attached", ifp->if_xname));
 
 		/* Mark transparent mode VF as disabled. */
 		hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */);
 
 		/*
 		 * NOTE:
 		 * Datapath setting must happen _before_ bringing
 		 * the VF down.
 		 */
 		hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
 
 		/*
 		 * Bring the VF down.
 		 */
 		hn_xpnt_vf_saveifflags(sc);
 		sc->hn_vf_ifp->if_flags &= ~IFF_UP;
 		hn_xpnt_vf_iocsetflags(sc);
 	}
 
 	/* Suspend data transfers. */
 	hn_suspend_data(sc);
 
 	/* Clear OACTIVE bit. */
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	/*
 	 * If the non-transparent mode VF is active, make sure
 	 * that the RX filter still allows packet reception.
 	 */
 	if (!detaching && (sc->hn_flags & HN_FLAG_RXVF))
 		hn_rxfilter_config(sc);
 }
 
 static void
 hn_init_locked(struct hn_softc *sc)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		return;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	/* Configure RX filter */
 	hn_rxfilter_config(sc);
 
 	/* Clear OACTIVE bit. */
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	/* Clear TX 'suspended' bit. */
 	hn_resume_tx(sc, sc->hn_tx_ring_inuse);
 
 	if (hn_xpnt_vf_isready(sc)) {
 		/* Initialize transparent VF. */
 		hn_xpnt_vf_init(sc);
 	}
 
 	/* Everything is ready; unleash! */
 	atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 
 	/* Re-enable polling if requested. */
 	if (sc->hn_pollhz > 0)
 		hn_polling(sc, sc->hn_pollhz);
 }
 
 static void
 hn_init(void *xsc)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 	hn_init_locked(sc);
 	HN_UNLOCK(sc);
 }
 
 #if __FreeBSD_version >= 1100099
 
 static int
 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int lenlim;
 	int error;
 
 	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
 	error = sysctl_handle_int(oidp, &lenlim, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
 	    lenlim > TCP_LRO_LENGTH_MAX) {
 		HN_UNLOCK(sc);
 		return EINVAL;
 	}
 	hn_set_lro_lenlim(sc, lenlim);
 	HN_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ackcnt, error, i;
 
 	/*
 	 * lro_ackcnt_lim is append count limit,
 	 * +1 to turn it into aggregation limit.
 	 */
 	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
 	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
 		return EINVAL;
 
 	/*
 	 * Convert aggregation limit back to append
 	 * count limit.
 	 */
 	--ackcnt;
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 #endif
 
 static int
 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int hcsum = arg2;
 	int on, error, i;
 
 	on = 0;
 	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
 		on = 1;
 
 	error = sysctl_handle_int(oidp, &on, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (on)
 			rxr->hn_trust_hcsum |= hcsum;
 		else
 			rxr->hn_trust_hcsum &= ~hcsum;
 	}
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int chim_size, error;
 
 	chim_size = sc->hn_tx_ring[0].hn_chim_size;
 	error = sysctl_handle_int(oidp, &chim_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
 		return EINVAL;
 
 	HN_LOCK(sc);
 	hn_set_chim_size(sc, chim_size);
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 #if __FreeBSD_version < 1100095
 static int
 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((int *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((int *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 #else
 static int
 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 #endif
 
 static int
 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((u_long *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_tx_ring *txr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		stat += *((u_long *)((uint8_t *)txr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((u_long *)((uint8_t *)txr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error, conf;
 	struct hn_tx_ring *txr;
 
 	txr = &sc->hn_tx_ring[0];
 	conf = *((int *)((uint8_t *)txr + ofs));
 
 	error = sysctl_handle_int(oidp, &conf, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((int *)((uint8_t *)txr + ofs)) = conf;
 	}
 	HN_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, size;
 
 	size = sc->hn_agg_size;
 	error = sysctl_handle_int(oidp, &size, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	sc->hn_agg_size = size;
 	hn_set_txagg(sc);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, pkts;
 
 	pkts = sc->hn_agg_pkts;
 	error = sysctl_handle_int(oidp, &pkts, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	sc->hn_agg_pkts = pkts;
 	hn_set_txagg(sc);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int pkts;
 
 	pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
 	return (sysctl_handle_int(oidp, &pkts, 0, req));
 }
 
 static int
 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int align;
 
 	align = sc->hn_tx_ring[0].hn_agg_align;
 	return (sysctl_handle_int(oidp, &align, 0, req));
 }
 
 static void
 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
 {
 	if (pollhz == 0)
 		vmbus_chan_poll_disable(chan);
 	else
 		vmbus_chan_poll_enable(chan, pollhz);
 }
 
 static void
 hn_polling(struct hn_softc *sc, u_int pollhz)
 {
 	int nsubch = sc->hn_rx_ring_inuse - 1;
 
 	HN_LOCK_ASSERT(sc);
 
 	if (nsubch > 0) {
 		struct vmbus_channel **subch;
 		int i;
 
 		subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
 		for (i = 0; i < nsubch; ++i)
 			hn_chan_polling(subch[i], pollhz);
 		vmbus_subchan_rel(subch, nsubch);
 	}
 	hn_chan_polling(sc->hn_prichan, pollhz);
 }
 
 static int
 hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int pollhz, error;
 
 	pollhz = sc->hn_pollhz;
 	error = sysctl_handle_int(oidp, &pollhz, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	if (pollhz != 0 &&
 	    (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
 		return (EINVAL);
 
 	HN_LOCK(sc);
 	if (sc->hn_pollhz != pollhz) {
 		sc->hn_pollhz = pollhz;
 		if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 		    (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
 			hn_polling(sc, sc->hn_pollhz);
 	}
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char verstr[16];
 
 	snprintf(verstr, sizeof(verstr), "%u.%u",
 	    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
 	    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
 }
 
 static int
 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char caps_str[128];
 	uint32_t caps;
 
 	HN_LOCK(sc);
 	caps = sc->hn_caps;
 	HN_UNLOCK(sc);
 	snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
 	return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
 }
 
 static int
 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char assist_str[128];
 	uint32_t hwassist;
 
 	HN_LOCK(sc);
 	hwassist = sc->hn_ifp->if_hwassist;
 	HN_UNLOCK(sc);
 	snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
 	return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
 }
 
 static int
 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char filter_str[128];
 	uint32_t filter;
 
 	HN_LOCK(sc);
 	filter = sc->hn_rx_filter;
 	HN_UNLOCK(sc);
 	snprintf(filter_str, sizeof(filter_str), "%b", filter,
 	    NDIS_PACKET_TYPES);
 	return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
 }
 
 #ifndef RSS
 
 static int
 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error;
 
 	HN_LOCK(sc);
 
 	error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
 	if (error || req->newptr == NULL)
 		goto back;
 
 	if ((sc->hn_flags & HN_FLAG_RXVF) ||
 	    (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) {
 		/*
 		 * RSS key is synchronized w/ VF's, don't allow users
 		 * to change it.
 		 */
 		error = EBUSY;
 		goto back;
 	}
 
 	error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
 	if (error)
 		goto back;
 	sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 
 	if (sc->hn_rx_ring_inuse > 1) {
 		error = hn_rss_reconfig(sc);
 	} else {
 		/* Not RSS capable, at least for now; just save the RSS key. */
 		error = 0;
 	}
 back:
 	HN_UNLOCK(sc);
 	return (error);
 }
 
 static int
 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error;
 
 	HN_LOCK(sc);
 
 	error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
 	if (error || req->newptr == NULL)
 		goto back;
 
 	/*
 	 * Don't allow RSS indirect table change, if this interface is not
 	 * RSS capable currently.
 	 */
 	if (sc->hn_rx_ring_inuse == 1) {
 		error = EOPNOTSUPP;
 		goto back;
 	}
 
 	error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
 	if (error)
 		goto back;
 	sc->hn_flags |= HN_FLAG_HAS_RSSIND;
 
 	hn_rss_ind_fixup(sc);
 	error = hn_rss_reconfig(sc);
 back:
 	HN_UNLOCK(sc);
 	return (error);
 }
 
 #endif	/* !RSS */
 
 static int
 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char hash_str[128];
 	uint32_t hash;
 
 	HN_LOCK(sc);
 	hash = sc->hn_rss_hash;
 	HN_UNLOCK(sc);
 	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
 	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
 }
 
 static int
 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char hash_str[128];
 	uint32_t hash;
 
 	HN_LOCK(sc);
 	hash = sc->hn_rss_hcap;
 	HN_UNLOCK(sc);
 	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
 	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
 }
 
 static int
 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char hash_str[128];
 	uint32_t hash;
 
 	HN_LOCK(sc);
 	hash = sc->hn_rx_ring[0].hn_mbuf_hash;
 	HN_UNLOCK(sc);
 	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
 	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
 }
 
 static int
 hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char vf_name[IFNAMSIZ + 1];
 	struct ifnet *vf_ifp;
 
 	HN_LOCK(sc);
 	vf_name[0] = '\0';
 	vf_ifp = sc->hn_vf_ifp;
 	if (vf_ifp != NULL)
 		snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname);
 	HN_UNLOCK(sc);
 	return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
 }
 
 static int
 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char vf_name[IFNAMSIZ + 1];
 	struct ifnet *vf_ifp;
 
 	HN_LOCK(sc);
 	vf_name[0] = '\0';
 	vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp;
 	if (vf_ifp != NULL)
 		snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname);
 	HN_UNLOCK(sc);
 	return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
 }
 
 static int
 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker pt;
 	struct sbuf *sb;
 	int error, i;
 	bool first;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	rm_rlock(&hn_vfmap_lock, &pt);
 
 	first = true;
 	for (i = 0; i < hn_vfmap_size; ++i) {
 		struct ifnet *ifp;
 
 		if (hn_vfmap[i] == NULL)
 			continue;
 
 		ifp = ifnet_byindex(i);
 		if (ifp != NULL) {
 			if (first)
 				sbuf_printf(sb, "%s", ifp->if_xname);
 			else
 				sbuf_printf(sb, " %s", ifp->if_xname);
 			first = false;
 		}
 	}
 
 	rm_runlock(&hn_vfmap_lock, &pt);
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 
 static int
 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker pt;
 	struct sbuf *sb;
 	int error, i;
 	bool first;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	rm_rlock(&hn_vfmap_lock, &pt);
 
 	first = true;
 	for (i = 0; i < hn_vfmap_size; ++i) {
 		struct ifnet *ifp, *hn_ifp;
 
 		hn_ifp = hn_vfmap[i];
 		if (hn_ifp == NULL)
 			continue;
 
 		ifp = ifnet_byindex(i);
 		if (ifp != NULL) {
 			if (first) {
 				sbuf_printf(sb, "%s:%s", ifp->if_xname,
 				    hn_ifp->if_xname);
 			} else {
 				sbuf_printf(sb, " %s:%s", ifp->if_xname,
 				    hn_ifp->if_xname);
 			}
 			first = false;
 		}
 	}
 
 	rm_runlock(&hn_vfmap_lock, &pt);
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 
 static int
 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, onoff = 0;
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF)
 		onoff = 1;
 	error = sysctl_handle_int(oidp, &onoff, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	/* NOTE: hn_vf_lock for hn_transmit() */
 	rm_wlock(&sc->hn_vf_lock);
 	if (onoff)
 		sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
 	else
 		sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF;
 	rm_wunlock(&sc->hn_vf_lock);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int enabled = 0;
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 		enabled = 1;
 	return (sysctl_handle_int(oidp, &enabled, 0, req));
 }
 
 static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
 	const struct ip *ip;
 	int len, iphlen, iplen;
 	const struct tcphdr *th;
 	int thoff;				/* TCP data offset */
 
 	len = hoff + sizeof(struct ip);
 
 	/* The packet must be at least the size of an IP header. */
 	if (m->m_pkthdr.len < len)
 		return IPPROTO_DONE;
 
 	/* The fixed IP header must reside completely in the first mbuf. */
 	if (m->m_len < len)
 		return IPPROTO_DONE;
 
 	ip = mtodo(m, hoff);
 
 	/* Bound check the packet's stated IP header length. */
 	iphlen = ip->ip_hl << 2;
 	if (iphlen < sizeof(struct ip))		/* minimum header length */
 		return IPPROTO_DONE;
 
 	/* The full IP header must reside completely in the one mbuf. */
 	if (m->m_len < hoff + iphlen)
 		return IPPROTO_DONE;
 
 	iplen = ntohs(ip->ip_len);
 
 	/*
 	 * Check that the amount of data in the buffers is as
 	 * at least much as the IP header would have us expect.
 	 */
 	if (m->m_pkthdr.len < hoff + iplen)
 		return IPPROTO_DONE;
 
 	/*
 	 * Ignore IP fragments.
 	 */
 	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
 		return IPPROTO_DONE;
 
 	/*
 	 * The TCP/IP or UDP/IP header must be entirely contained within
 	 * the first fragment of a packet.
 	 */
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (iplen < iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
 		thoff = th->th_off << 2;
 		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + thoff)
 			return IPPROTO_DONE;
 		break;
 	case IPPROTO_UDP:
 		if (iplen < iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		break;
 	default:
 		if (iplen < iphlen)
 			return IPPROTO_DONE;
 		break;
 	}
 	return ip->ip_p;
 }
 
 static void
 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto)
 {
 	const struct ether_header *eh;
 	uint16_t etype;
 	int hoff;
 
 	hoff = sizeof(*eh);
 	/* Checked at the beginning of this function. */
 	KASSERT(m_new->m_len >= hoff, ("not ethernet frame"));
 
 	eh = mtod(m_new, const struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	if (etype == ETHERTYPE_VLAN) {
 		const struct ether_vlan_header *evl;
 
 		hoff = sizeof(*evl);
 		if (m_new->m_len < hoff)
 			return;
 		evl = mtod(m_new, const struct ether_vlan_header *);
 		etype = ntohs(evl->evl_proto);
 	}
 	*l3proto = etype;
 
 	if (etype == ETHERTYPE_IP)
 		*l4proto = hn_check_iplen(m_new, hoff);
 	else
 		*l4proto = IPPROTO_DONE;
 }
 
 static int
 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	device_t dev = sc->hn_dev;
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	int lroent_cnt;
 #endif
 #endif
 	int i;
 
 	/*
 	 * Create RXBUF for reception.
 	 *
 	 * NOTE:
 	 * - It is shared by all channels.
 	 * - A large enough buffer is allocated, certain version of NVSes
 	 *   may further limit the usable space.
 	 */
 	sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
 	    PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->hn_rxbuf == NULL) {
 		device_printf(sc->hn_dev, "allocate rxbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_rx_ring_cnt = ring_cnt;
 	sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
 
 	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	lroent_cnt = hn_lro_entry_count;
 	if (lroent_cnt < TCP_LRO_ENTRIES)
 		lroent_cnt = TCP_LRO_ENTRIES;
 	if (bootverbose)
 		device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
 #endif
 #endif	/* INET || INET6 */
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	/* Create dev.hn.UNIT.rx sysctl tree */
 	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
 		    PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
 		    &rxr->hn_br_dma, BUS_DMA_WAITOK);
 		if (rxr->hn_br == NULL) {
 			device_printf(dev, "allocate bufring failed\n");
 			return (ENOMEM);
 		}
 
 		if (hn_trust_hosttcp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
 		if (hn_trust_hostudp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
 		if (hn_trust_hostip)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
 		rxr->hn_mbuf_hash = NDIS_HASH_ALL;
 		rxr->hn_ifp = sc->hn_ifp;
 		if (i < sc->hn_tx_ring_cnt)
 			rxr->hn_txr = &sc->hn_tx_ring[i];
 		rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
 		rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
 		rxr->hn_rx_idx = i;
 		rxr->hn_rxbuf = sc->hn_rxbuf;
 
 		/*
 		 * Initialize LRO.
 		 */
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
 		    hn_lro_mbufq_depth);
 #else
 		tcp_lro_init(&rxr->hn_lro);
 		rxr->hn_lro.ifp = sc->hn_ifp;
 #endif
 #if __FreeBSD_version >= 1100099
 		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
 		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif	/* INET || INET6 */
 
 		if (sc->hn_rx_sysctl_tree != NULL) {
 			char name[16];
 
 			/*
 			 * Create per RX ring sysctl tree:
 			 * dev.hn.UNIT.rx.RINGID
 			 */
 			snprintf(name, sizeof(name), "%d", i);
 			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
 			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
 			    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 			if (rxr->hn_rx_sysctl_tree != NULL) {
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "packets", CTLFLAG_RW,
 				    &rxr->hn_pkts, "# of packets received");
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "rss_pkts", CTLFLAG_RW,
 				    &rxr->hn_rss_pkts,
 				    "# of packets w/ RSS info received");
 				SYSCTL_ADD_INT(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "pktbuf_len", CTLFLAG_RD,
 				    &rxr->hn_pktbuf_len, 0,
 				    "Temporary channel packet buffer length");
 			}
 		}
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
 #if __FreeBSD_version < 1100095
 	    hn_rx_stat_int_sysctl,
 #else
 	    hn_rx_stat_u64_sysctl,
 #endif
 	    "LU", "LRO queued");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
 #if __FreeBSD_version < 1100095
 	    hn_rx_stat_int_sysctl,
 #else
 	    hn_rx_stat_u64_sysctl,
 #endif
 	    "LU", "LRO flushed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro_tried),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
 #if __FreeBSD_version >= 1100099
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_lenlim_sysctl, "IU",
 	    "Max # of data bytes to be aggregated by LRO");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_ackcnt_sysctl, "I",
 	    "Max # of ACKs to be aggregated by LRO");
 #endif
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust tcp segement verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust udp datagram verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust ip packet verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_ip),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_udp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
 	    hn_rx_stat_ulong_sysctl, "LU",
 	    "# of packets that we trust host's csum verification");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_small_pkts),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_ack_failed),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
 
 	return (0);
 }
 
 static void
 hn_destroy_rx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_rxbuf != NULL) {
 		if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
 			hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
 		else
 			device_printf(sc->hn_dev, "RXBUF is referenced\n");
 		sc->hn_rxbuf = NULL;
 	}
 
 	if (sc->hn_rx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (rxr->hn_br == NULL)
 			continue;
 		if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
 			hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
 		} else {
 			device_printf(sc->hn_dev,
 			    "%dth channel bufring is referenced", i);
 		}
 		rxr->hn_br = NULL;
 
 #if defined(INET) || defined(INET6)
 		tcp_lro_free(&rxr->hn_lro);
 #endif
 		free(rxr->hn_pktbuf, M_DEVBUF);
 	}
 	free(sc->hn_rx_ring, M_DEVBUF);
 	sc->hn_rx_ring = NULL;
 
 	sc->hn_rx_ring_cnt = 0;
 	sc->hn_rx_ring_inuse = 0;
 }
 
 static int
 hn_tx_ring_create(struct hn_softc *sc, int id)
 {
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	device_t dev = sc->hn_dev;
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 
 	txr->hn_sc = sc;
 	txr->hn_tx_idx = id;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
 #endif
 	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
 
 	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
 	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_INIT(&txr->hn_txlist);
 #else
 	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
 	    M_WAITOK, &txr->hn_tx_lock);
 #endif
 
 	if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
 		txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
 		    device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
 	} else {
 		txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
 	}
 
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		txr->hn_txeof = hn_start_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
 	} else
 #endif
 	{
 		int br_depth;
 
 		txr->hn_txeof = hn_xmit_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
 
 		br_depth = hn_get_txswq_depth(txr);
 		txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
 		    M_WAITOK, &txr->hn_tx_lock);
 	}
 
 	txr->hn_direct_tx_size = hn_direct_tx_size;
 
 	/*
 	 * Always schedule transmission instead of trying to do direct
 	 * transmission.  This one gives the best performance so far.
 	 */
 	txr->hn_sched_tx = 1;
 
 	parent_dtag = bus_get_dma_tag(dev);
 
 	/* DMA tag for RNDIS packet messages. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    HN_RNDIS_PKT_ALIGN,		/* alignment */
 	    HN_RNDIS_PKT_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_RNDIS_PKT_LEN,		/* maxsize */
 	    1,				/* nsegments */
 	    HN_RNDIS_PKT_LEN,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(dev, "failed to create rndis dmatag\n");
 		return error;
 	}
 
 	/* DMA tag for data. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    1,				/* alignment */
 	    HN_TX_DATA_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_TX_DATA_MAXSIZE,		/* maxsize */
 	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
 	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(dev, "failed to create data dmatag\n");
 		return error;
 	}
 
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
 		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
 		txd->txr = txr;
 		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 		STAILQ_INIT(&txd->agg_list);
 
 		/*
 		 * Allocate and load RNDIS packet message.
 		 */
         	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_pkt,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 		    &txd->rndis_pkt_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate rndis_packet_msg, %d\n", i);
 			return error;
 		}
 
 		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_pkt_dmap,
 		    txd->rndis_pkt, HN_RNDIS_PKT_LEN,
 		    hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
 		    BUS_DMA_NOWAIT);
 		if (error) {
 			device_printf(dev,
 			    "failed to load rndis_packet_msg, %d\n", i);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt, txd->rndis_pkt_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
 		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate tx data dmamap\n");
 			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt_dmap);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt, txd->rndis_pkt_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 #else
 		buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 	}
 	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
 
 	if (sc->hn_tx_sysctl_tree != NULL) {
 		struct sysctl_oid_list *child;
 		struct sysctl_ctx_list *ctx;
 		char name[16];
 
 		/*
 		 * Create per TX ring sysctl tree:
 		 * dev.hn.UNIT.tx.RINGID
 		 */
 		ctx = device_get_sysctl_ctx(dev);
 		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
 
 		snprintf(name, sizeof(name), "%d", id);
 		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
 		    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 		if (txr->hn_tx_sysctl_tree != NULL) {
 			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
 
 #ifdef HN_DEBUG
 			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
 			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
 			    "# of available TX descs");
 #endif
 #ifdef HN_IFSTART_SUPPORT
 			if (!hn_use_if_start)
 #endif
 			{
 				SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
 				    CTLFLAG_RD, &txr->hn_oactive, 0,
 				    "over active");
 			}
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
 			    CTLFLAG_RW, &txr->hn_pkts,
 			    "# of packets transmitted");
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
 			    CTLFLAG_RW, &txr->hn_sends, "# of sends");
 		}
 	}
 
 	return 0;
 }
 
 static void
 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
 {
 	struct hn_tx_ring *txr = txd->txr;
 
 	KASSERT(txd->m == NULL, ("still has mbuf installed"));
 	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
 
 	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
 	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
 	    txd->rndis_pkt_dmap);
 	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
 }
 
 static void
 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT(txd->refs == 0 || txd->refs == 1,
 	    ("invalid txd refs %d", txd->refs));
 
 	/* Aggregated txds will be freed by their aggregating txd. */
 	if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
 		int freed;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed, ("can't free txdesc"));
 	}
 }
 
 static void
 hn_tx_ring_destroy(struct hn_tx_ring *txr)
 {
 	int i;
 
 	if (txr->hn_txdesc == NULL)
 		return;
 
 	/*
 	 * NOTE:
 	 * Because the freeing of aggregated txds will be deferred
 	 * to the aggregating txd, two passes are used here:
 	 * - The first pass GCes any pending txds.  This GC is necessary,
 	 *   since if the channels are revoked, hypervisor will not
 	 *   deliver send-done for all pending txds.
 	 * - The second pass frees the busdma stuffs, i.e. after all txds
 	 *   were freed.
 	 */
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
 		hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
 		hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
 
 	if (txr->hn_tx_data_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
 	if (txr->hn_tx_rndis_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
 
 #ifdef HN_USE_TXDESC_BUFRING
 	buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
 #endif
 
 	free(txr->hn_txdesc, M_DEVBUF);
 	txr->hn_txdesc = NULL;
 
 	if (txr->hn_mbuf_br != NULL)
 		buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_destroy(&txr->hn_txlist_spin);
 #endif
 	mtx_destroy(&txr->hn_tx_lock);
 }
 
 static int
 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int i;
 
 	/*
 	 * Create TXBUF for chimney sending.
 	 *
 	 * NOTE: It is shared by all channels.
 	 */
 	sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
 	    PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->hn_chim == NULL) {
 		device_printf(sc->hn_dev, "allocate txbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_tx_ring_cnt = ring_cnt;
 	sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 
 	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
 	ctx = device_get_sysctl_ctx(sc->hn_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
 
 	/* Create dev.hn.UNIT.tx sysctl tree */
 	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		int error;
 
 		error = hn_tx_ring_create(sc, i);
 		if (error)
 			return error;
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_send_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_flush_failed),
 	    hn_tx_stat_ulong_sysctl, "LU",
 	    "# of packet transmission aggregation flush failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
 	    "# of total TX descs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
 	    CTLFLAG_RD, &sc->hn_chim_szmax, 0,
 	    "Chimney send packet size upper boundary");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_chim_size_sysctl, "I", "Chimney send packet size limit");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Size of the packet for direct transmission");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_sched_tx),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Always schedule transmission "
 	    "instead of doing direct transmission");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
 	    "Applied packet transmission aggregation size");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_pktmax_sysctl, "I",
 	    "Applied packet transmission aggregation packets");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_align_sysctl, "I",
 	    "Applied packet transmission aggregation alignment");
 
 	return 0;
 }
 
 static void
 hn_set_chim_size(struct hn_softc *sc, int chim_size)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		sc->hn_tx_ring[i].hn_chim_size = chim_size;
 }
 
 static void
 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	u_int hw_tsomax;
 	int tso_minlen;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
 		return;
 
 	KASSERT(sc->hn_ndis_tso_sgmin >= 2,
 	    ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
 	tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
 
 	KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
 	    sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
 	    ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
 
 	if (tso_maxlen < tso_minlen)
 		tso_maxlen = tso_minlen;
 	else if (tso_maxlen > IP_MAXPACKET)
 		tso_maxlen = IP_MAXPACKET;
 	if (tso_maxlen > sc->hn_ndis_tso_szmax)
 		tso_maxlen = sc->hn_ndis_tso_szmax;
 	hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 
 	if (hn_xpnt_vf_isready(sc)) {
 		if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax)
 			hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax;
 	}
 	ifp->if_hw_tsomax = hw_tsomax;
 	if (bootverbose)
 		if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
 }
 
 static void
 hn_fixup_tx_data(struct hn_softc *sc)
 {
 	uint64_t csum_assist;
 	int i;
 
 	hn_set_chim_size(sc, sc->hn_chim_szmax);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_chim_szmax)
 		hn_set_chim_size(sc, hn_tx_chimney_size);
 
 	csum_assist = 0;
 	if (sc->hn_caps & HN_CAP_IPCS)
 		csum_assist |= CSUM_IP;
 	if (sc->hn_caps & HN_CAP_TCP4CS)
 		csum_assist |= CSUM_IP_TCP;
 	if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs)
 		csum_assist |= CSUM_IP_UDP;
 	if (sc->hn_caps & HN_CAP_TCP6CS)
 		csum_assist |= CSUM_IP6_TCP;
 	if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs)
 		csum_assist |= CSUM_IP6_UDP;
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
 
 	if (sc->hn_caps & HN_CAP_HASHVAL) {
 		/*
 		 * Support HASHVAL pktinfo on TX path.
 		 */
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
 		for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 			sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
 	}
 }
 
 static void
 hn_fixup_rx_data(struct hn_softc *sc)
 {
 
 	if (sc->hn_caps & HN_CAP_UDPHASH) {
 		int i;
 
 		for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 			sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH;
 	}
 }
 
 static void
 hn_destroy_tx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_chim != NULL) {
 		if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
 			hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
 		} else {
 			device_printf(sc->hn_dev,
 			    "chimney sending buffer is referenced");
 		}
 		sc->hn_chim = NULL;
 	}
 
 	if (sc->hn_tx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
 
 	free(sc->hn_tx_ring, M_DEVBUF);
 	sc->hn_tx_ring = NULL;
 
 	sc->hn_tx_ring_cnt = 0;
 	sc->hn_tx_ring_inuse = 0;
 }
 
 #ifdef HN_IFSTART_SUPPORT
 
 static void
 hn_start_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static int
 hn_start_locked(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 	int sched = 0;
 
 	KASSERT(hn_use_if_start,
 	    ("hn_start_locked is called, when if_start is disabled"));
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	if (__predict_false(txr->hn_suspended))
 		return (0);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (0);
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		struct hn_txdesc *txd;
 		struct mbuf *m_head;
 		int error;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			sched = 1;
 			break;
 		}
 
 #if defined(INET6) || defined(INET)
 		if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 			m_head = hn_tso_fixup(m_head);
 			if (__predict_false(m_head == NULL)) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		} else if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
 			m_head = hn_set_hlen(m_head);
 			if (__predict_false(m_head == NULL)) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		}
 #endif
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 
 		error = hn_encap(ifp, txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed */
 			KASSERT(txr->hn_agg_txd == NULL,
 			    ("encap failed w/ pending aggregating txdesc"));
 			continue;
 		}
 
 		if (txr->hn_agg_pktleft == 0) {
 			if (txr->hn_agg_txd != NULL) {
 				KASSERT(m_head == NULL,
 				    ("pending mbuf for aggregating txdesc"));
 				error = hn_flush_txagg(ifp, txr);
 				if (__predict_false(error)) {
 					atomic_set_int(&ifp->if_drv_flags,
 					    IFF_DRV_OACTIVE);
 					break;
 				}
 			} else {
 				KASSERT(m_head != NULL, ("mbuf was freed"));
 				error = hn_txpkt(ifp, txr, txd);
 				if (__predict_false(error)) {
 					/* txd is freed, but m_head is not */
 					IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 					atomic_set_int(&ifp->if_drv_flags,
 					    IFF_DRV_OACTIVE);
 					break;
 				}
 			}
 		}
 #ifdef INVARIANTS
 		else {
 			KASSERT(txr->hn_agg_txd != NULL,
 			    ("no aggregating txdesc"));
 			KASSERT(m_head == NULL,
 			    ("pending mbuf for aggregating txdesc"));
 		}
 #endif
 	}
 
 	/* Flush pending aggerated transmission. */
 	if (txr->hn_agg_txd != NULL)
 		hn_flush_txagg(ifp, txr);
 	return (sched);
 }
 
 static void
 hn_start(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 }
 
 static void
 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_start_txeof(struct hn_tx_ring *txr)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the OACTIVE earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * flag again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 #endif	/* HN_IFSTART_SUPPORT */
 
 static int
 hn_xmit(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 	struct mbuf *m_head;
 	int sched = 0;
 
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 #ifdef HN_IFSTART_SUPPORT
 	KASSERT(hn_use_if_start == 0,
 	    ("hn_xmit is called, when if_start is enabled"));
 #endif
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	if (__predict_false(txr->hn_suspended))
 		return (0);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
 		return (0);
 
 	while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
 		struct hn_txdesc *txd;
 		int error;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			sched = 1;
 			break;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		error = hn_encap(ifp, txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed; discard */
 			KASSERT(txr->hn_agg_txd == NULL,
 			    ("encap failed w/ pending aggregating txdesc"));
 			drbr_advance(ifp, txr->hn_mbuf_br);
 			continue;
 		}
 
 		if (txr->hn_agg_pktleft == 0) {
 			if (txr->hn_agg_txd != NULL) {
 				KASSERT(m_head == NULL,
 				    ("pending mbuf for aggregating txdesc"));
 				error = hn_flush_txagg(ifp, txr);
 				if (__predict_false(error)) {
 					txr->hn_oactive = 1;
 					break;
 				}
 			} else {
 				KASSERT(m_head != NULL, ("mbuf was freed"));
 				error = hn_txpkt(ifp, txr, txd);
 				if (__predict_false(error)) {
 					/* txd is freed, but m_head is not */
 					drbr_putback(ifp, txr->hn_mbuf_br,
 					    m_head);
 					txr->hn_oactive = 1;
 					break;
 				}
 			}
 		}
 #ifdef INVARIANTS
 		else {
 			KASSERT(txr->hn_agg_txd != NULL,
 			    ("no aggregating txdesc"));
 			KASSERT(m_head == NULL,
 			    ("pending mbuf for aggregating txdesc"));
 		}
 #endif
 
 		/* Sent */
 		drbr_advance(ifp, txr->hn_mbuf_br);
 	}
 
 	/* Flush pending aggerated transmission. */
 	if (txr->hn_agg_txd != NULL)
 		hn_flush_txagg(ifp, txr);
 	return (sched);
 }
 
 static int
 hn_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr;
 	int error, idx = 0;
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
 		struct rm_priotracker pt;
 
 		rm_rlock(&sc->hn_vf_lock, &pt);
 		if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
 			struct mbuf *m_bpf = NULL;
 			int obytes, omcast;
 
 			obytes = m->m_pkthdr.len;
 			omcast = (m->m_flags & M_MCAST) != 0;
 
 			if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) {
 				if (bpf_peers_present(ifp->if_bpf)) {
 					m_bpf = m_copypacket(m, M_NOWAIT);
 					if (m_bpf == NULL) {
 						/*
 						 * Failed to grab a shallow
 						 * copy; tap now.
 						 */
 						ETHER_BPF_MTAP(ifp, m);
 					}
 				}
 			} else {
 				ETHER_BPF_MTAP(ifp, m);
 			}
 
 			error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m);
 			rm_runlock(&sc->hn_vf_lock, &pt);
 
 			if (m_bpf != NULL) {
 				if (!error)
 					ETHER_BPF_MTAP(ifp, m_bpf);
 				m_freem(m_bpf);
 			}
 
 			if (error == ENOBUFS) {
 				if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 			} else if (error) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			} else {
 				if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 				if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes);
 				if (omcast) {
 					if_inc_counter(ifp, IFCOUNTER_OMCASTS,
 					    omcast);
 				}
 			}
 			return (error);
 		}
 		rm_runlock(&sc->hn_vf_lock, &pt);
 	}
 
 #if defined(INET6) || defined(INET)
 	/*
 	 * Perform TSO packet header fixup or get l2/l3 header length now,
 	 * since packet headers should be cache-hot.
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		m = hn_tso_fixup(m);
 		if (__predict_false(m == NULL)) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return EIO;
 		}
 	} else if (m->m_pkthdr.csum_flags &
 	    (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
 		m = hn_set_hlen(m);
 		if (__predict_false(m == NULL)) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return EIO;
 		}
 	}
 #endif
 
 	/*
 	 * Select the TX ring based on flowid
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 #ifdef RSS
 		uint32_t bid;
 
 		if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
 		    &bid) == 0)
 			idx = bid % sc->hn_tx_ring_inuse;
 		else
 #endif
 		{
 #if defined(INET6) || defined(INET)
 			int tcpsyn = 0;
 
 			if (m->m_pkthdr.len < 128 &&
 			    (m->m_pkthdr.csum_flags &
 			     (CSUM_IP_TCP | CSUM_IP6_TCP)) &&
 			    (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
 				m = hn_check_tcpsyn(m, &tcpsyn);
 				if (__predict_false(m == NULL)) {
 					if_inc_counter(ifp,
 					    IFCOUNTER_OERRORS, 1);
 					return (EIO);
 				}
 			}
 #else
 			const int tcpsyn = 0;
 #endif
 			if (tcpsyn)
 				idx = 0;
 			else
 				idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
 		}
 	}
 	txr = &sc->hn_tx_ring[idx];
 
 	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
 	if (error) {
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		return error;
 	}
 
 	if (txr->hn_oactive)
 		return 0;
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return 0;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 	return 0;
 }
 
 static void
 hn_tx_ring_qflush(struct hn_tx_ring *txr)
 {
 	struct mbuf *m;
 
 	mtx_lock(&txr->hn_tx_lock);
 	while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
 		m_freem(m);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_qflush(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct rm_priotracker pt;
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
 	if_qflush(ifp);
 
 	rm_rlock(&sc->hn_vf_lock, &pt);
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 		sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp);
 	rm_runlock(&sc->hn_vf_lock, &pt);
 }
 
 static void
 hn_xmit_txeof(struct hn_tx_ring *txr)
 {
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		txr->hn_oactive = 0;
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the oactive earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * oactive again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		txr->hn_oactive = 0;
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_xmit_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	txr->hn_oactive = 0;
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static int
 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct vmbus_chan_br cbr;
 	struct hn_rx_ring *rxr;
 	struct hn_tx_ring *txr = NULL;
 	int idx, error;
 
 	idx = vmbus_chan_subidx(chan);
 
 	/*
 	 * Link this channel to RX/TX ring.
 	 */
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
 	    ("RX ring %d already attached", idx));
 	rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
 	rxr->hn_chan = chan;
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
 		    idx, vmbus_chan_id(chan));
 	}
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		txr = &sc->hn_tx_ring[idx];
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
 		    ("TX ring %d already attached", idx));
 		txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
 
 		txr->hn_chan = chan;
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
 			    idx, vmbus_chan_id(chan));
 		}
 	}
 
 	/* Bind this channel to a proper CPU. */
 	vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
 
 	/*
 	 * Open this channel
 	 */
 	cbr.cbr = rxr->hn_br;
 	cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
 	cbr.cbr_txsz = HN_TXBR_SIZE;
 	cbr.cbr_rxsz = HN_RXBR_SIZE;
 	error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
 	if (error) {
 		if (error == EISCONN) {
 			if_printf(sc->hn_ifp, "bufring is connected after "
 			    "chan%u open failure\n", vmbus_chan_id(chan));
 			rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
 		} else {
 			if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
 			    vmbus_chan_id(chan), error);
 		}
 	}
 	return (error);
 }
 
 static void
 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct hn_rx_ring *rxr;
 	int idx, error;
 
 	idx = vmbus_chan_subidx(chan);
 
 	/*
 	 * Link this channel to RX/TX ring.
 	 */
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
 	    ("RX ring %d is not attached", idx));
 	rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
 
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
 		    ("TX ring %d is not attached attached", idx));
 		txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
 	}
 
 	/*
 	 * Close this channel.
 	 *
 	 * NOTE:
 	 * Channel closing does _not_ destroy the target channel.
 	 */
 	error = vmbus_chan_close_direct(chan);
 	if (error == EISCONN) {
 		if_printf(sc->hn_ifp, "chan%u bufring is connected "
 		    "after being closed\n", vmbus_chan_id(chan));
 		rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
 	} else if (error) {
 		if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
 		    vmbus_chan_id(chan), error);
 	}
 }
 
 static int
 hn_attach_subchans(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
 	int i, error = 0;
 
 	KASSERT(subchan_cnt > 0, ("no sub-channels"));
 
 	/* Attach the sub-channels. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 	for (i = 0; i < subchan_cnt; ++i) {
 		int error1;
 
 		error1 = hn_chan_attach(sc, subchans[i]);
 		if (error1) {
 			error = error1;
 			/* Move on; all channels will be detached later. */
 		}
 	}
 	vmbus_subchan_rel(subchans, subchan_cnt);
 
 	if (error) {
 		if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
 	} else {
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "%d sub-channels attached\n",
 			    subchan_cnt);
 		}
 	}
 	return (error);
 }
 
 static void
 hn_detach_allchans(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
 	int i;
 
 	if (subchan_cnt == 0)
 		goto back;
 
 	/* Detach the sub-channels. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 	for (i = 0; i < subchan_cnt; ++i)
 		hn_chan_detach(sc, subchans[i]);
 	vmbus_subchan_rel(subchans, subchan_cnt);
 
 back:
 	/*
 	 * Detach the primary channel, _after_ all sub-channels
 	 * are detached.
 	 */
 	hn_chan_detach(sc, sc->hn_prichan);
 
 	/* Wait for sub-channels to be destroyed, if any. */
 	vmbus_subchan_drain(sc->hn_prichan);
 
 #ifdef INVARIANTS
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
 		    HN_RX_FLAG_ATTACHED) == 0,
 		    ("%dth RX ring is still attached", i));
 	}
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
 		    HN_TX_FLAG_ATTACHED) == 0,
 		    ("%dth TX ring is still attached", i));
 	}
 #endif
 }
 
 static int
 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
 {
 	struct vmbus_channel **subchans;
 	int nchan, rxr_cnt, error;
 
 	nchan = *nsubch + 1;
 	if (nchan == 1) {
 		/*
 		 * Multiple RX/TX rings are not requested.
 		 */
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
 	 * table entries.
 	 */
 	error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
 	if (error) {
 		/* No RSS; this is benign. */
 		*nsubch = 0;
 		return (0);
 	}
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
 		    rxr_cnt, nchan);
 	}
 
 	if (nchan > rxr_cnt)
 		nchan = rxr_cnt;
 	if (nchan == 1) {
 		if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Allocate sub-channels from NVS.
 	 */
 	*nsubch = nchan - 1;
 	error = hn_nvs_alloc_subchans(sc, nsubch);
 	if (error || *nsubch == 0) {
 		/* Failed to allocate sub-channels. */
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Wait for all sub-channels to become ready before moving on.
 	 */
 	subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
 	vmbus_subchan_rel(subchans, *nsubch);
 	return (0);
 }
 
 static bool
 hn_synth_attachable(const struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_flags & HN_FLAG_ERRORS)
 		return (false);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
 			return (false);
 	}
 	return (true);
 }
 
 /*
  * Make sure that the RX filter is zero after the successful
  * RNDIS initialization.
  *
  * NOTE:
  * Under certain conditions on certain versions of Hyper-V,
  * the RNDIS rxfilter is _not_ zero on the hypervisor side
  * after the successful RNDIS initialization, which breaks
  * the assumption of any following code (well, it breaks the
  * RNDIS API contract actually).  Clear the RNDIS rxfilter
  * explicitly, drain packets sneaking through, and drain the
  * interrupt taskqueues scheduled due to the stealth packets.
  */
 static void
 hn_rndis_init_fixat(struct hn_softc *sc, int nchan)
 {
 
 	hn_disable_rx(sc);
 	hn_drain_rxtx(sc, nchan);
 }
 
 static int
 hn_synth_attach(struct hn_softc *sc, int mtu)
 {
 #define ATTACHED_NVS		0x0002
 #define ATTACHED_RNDIS		0x0004
 
 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
 	int error, nsubch, nchan = 1, i, rndis_inited;
 	uint32_t old_caps, attached = 0;
 
 	KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
 	    ("synthetic parts were attached"));
 
 	if (!hn_synth_attachable(sc))
 		return (ENXIO);
 
 	/* Save capabilities for later verification. */
 	old_caps = sc->hn_caps;
 	sc->hn_caps = 0;
 
 	/* Clear RSS stuffs. */
 	sc->hn_rss_ind_size = 0;
 	sc->hn_rss_hash = 0;
 	sc->hn_rss_hcap = 0;
 
 	/*
 	 * Attach the primary channel _before_ attaching NVS and RNDIS.
 	 */
 	error = hn_chan_attach(sc, sc->hn_prichan);
 	if (error)
 		goto failed;
 
 	/*
 	 * Attach NVS.
 	 */
 	error = hn_nvs_attach(sc, mtu);
 	if (error)
 		goto failed;
 	attached |= ATTACHED_NVS;
 
 	/*
 	 * Attach RNDIS _after_ NVS is attached.
 	 */
 	error = hn_rndis_attach(sc, mtu, &rndis_inited);
 	if (rndis_inited)
 		attached |= ATTACHED_RNDIS;
 	if (error)
 		goto failed;
 
 	/*
 	 * Make sure capabilities are not changed.
 	 */
 	if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
 		if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
 		    old_caps, sc->hn_caps);
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Allocate sub-channels for multi-TX/RX rings.
 	 *
 	 * NOTE:
 	 * The # of RX rings that can be used is equivalent to the # of
 	 * channels to be requested.
 	 */
 	nsubch = sc->hn_rx_ring_cnt - 1;
 	error = hn_synth_alloc_subchans(sc, &nsubch);
 	if (error)
 		goto failed;
 	/* NOTE: _Full_ synthetic parts detach is required now. */
 	sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
 
 	/*
 	 * Set the # of TX/RX rings that could be used according to
 	 * the # of channels that NVS offered.
 	 */
 	nchan = nsubch + 1;
 	hn_set_ring_inuse(sc, nchan);
 	if (nchan == 1) {
 		/* Only the primary channel can be used; done */
 		goto back;
 	}
 
 	/*
 	 * Attach the sub-channels.
 	 *
 	 * NOTE: hn_set_ring_inuse() _must_ have been called.
 	 */
 	error = hn_attach_subchans(sc);
 	if (error)
 		goto failed;
 
 	/*
 	 * Configure RSS key and indirect table _after_ all sub-channels
 	 * are attached.
 	 */
 	if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
 		/*
 		 * RSS key is not set yet; set it to the default RSS key.
 		 */
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "setup default RSS key\n");
 #ifdef RSS
 		rss_getkey(rss->rss_key);
 #else
 		memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
 #endif
 		sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 	}
 
 	if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
 		/*
 		 * RSS indirect table is not set yet; set it up in round-
 		 * robin fashion.
 		 */
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "setup default RSS indirect "
 			    "table\n");
 		}
 		for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
 			uint32_t subidx;
 
 #ifdef RSS
 			subidx = rss_get_indirection_to_bucket(i);
 #else
 			subidx = i;
 #endif
 			rss->rss_ind[i] = subidx % nchan;
 		}
 		sc->hn_flags |= HN_FLAG_HAS_RSSIND;
 	} else {
 		/*
 		 * # of usable channels may be changed, so we have to
 		 * make sure that all entries in RSS indirect table
 		 * are valid.
 		 *
 		 * NOTE: hn_set_ring_inuse() _must_ have been called.
 		 */
 		hn_rss_ind_fixup(sc);
 	}
 
 	sc->hn_rss_hash = sc->hn_rss_hcap;
 	if ((sc->hn_flags & HN_FLAG_RXVF) ||
 	    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
 		/* NOTE: Don't reconfigure RSS; will do immediately. */
 		hn_vf_rss_fixup(sc, false);
 	}
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
 	if (error)
 		goto failed;
 back:
 	/*
 	 * Fixup transmission aggregation setup.
 	 */
 	hn_set_txagg(sc);
 	hn_rndis_init_fixat(sc, nchan);
 	return (0);
 
 failed:
 	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
 		hn_rndis_init_fixat(sc, nchan);
 		hn_synth_detach(sc);
 	} else {
 		if (attached & ATTACHED_RNDIS) {
 			hn_rndis_init_fixat(sc, nchan);
 			hn_rndis_detach(sc);
 		}
 		if (attached & ATTACHED_NVS)
 			hn_nvs_detach(sc);
 		hn_chan_detach(sc, sc->hn_prichan);
 		/* Restore old capabilities. */
 		sc->hn_caps = old_caps;
 	}
 	return (error);
 
 #undef ATTACHED_RNDIS
 #undef ATTACHED_NVS
 }
 
 /*
  * NOTE:
  * The interface must have been suspended though hn_suspend(), before
  * this function get called.
  */
 static void
 hn_synth_detach(struct hn_softc *sc)
 {
 
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("synthetic parts were not attached"));
 
 	/* Detach the RNDIS first. */
 	hn_rndis_detach(sc);
 
 	/* Detach NVS. */
 	hn_nvs_detach(sc);
 
 	/* Detach all of the channels. */
 	hn_detach_allchans(sc);
 
 	if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) {
 		/*
 		 * Host is post-Win2016, disconnect RXBUF from primary channel here.
 		 */
 		int error;
 
 		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
 		    sc->hn_rxbuf_gpadl);
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "rxbuf gpadl disconn failed: %d\n", error);
 			sc->hn_flags |= HN_FLAG_RXBUF_REF;
 		}
 		sc->hn_rxbuf_gpadl = 0;
 	}
 
 	if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) {
 		/*
 		 * Host is post-Win2016, disconnect chimney sending buffer from
 		 * primary channel here.
 		 */
 		int error;
 
 		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
 		    sc->hn_chim_gpadl);
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "chim gpadl disconn failed: %d\n", error);
 			sc->hn_flags |= HN_FLAG_CHIM_REF;
 		}
 		sc->hn_chim_gpadl = 0;
 	}
 	sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
 }
 
 static void
 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
 {
 	KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
 	    ("invalid ring count %d", ring_cnt));
 
 	if (sc->hn_tx_ring_cnt > ring_cnt)
 		sc->hn_tx_ring_inuse = ring_cnt;
 	else
 		sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 	sc->hn_rx_ring_inuse = ring_cnt;
 
 #ifdef RSS
 	if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) {
 		if_printf(sc->hn_ifp, "# of RX rings (%d) does not match "
 		    "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse,
 		    rss_getnumbuckets());
 	}
 #endif
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
 		    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 	}
 }
 
 static void
 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 
 	/*
 	 * NOTE:
 	 * The TX bufring will not be drained by the hypervisor,
 	 * if the primary channel is revoked.
 	 */
 	while (!vmbus_chan_rx_empty(chan) ||
 	    (!vmbus_chan_is_revoked(sc->hn_prichan) &&
 	     !vmbus_chan_tx_empty(chan)))
 		pause("waitch", 1);
 	vmbus_chan_intr_drain(chan);
 }
 
 static void
 hn_disable_rx(struct hn_softc *sc)
 {
 
 	/*
 	 * Disable RX by clearing RX filter forcefully.
 	 */
 	sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
 	hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */
 
 	/*
 	 * Give RNDIS enough time to flush all pending data packets.
 	 */
 	pause("waitrx", (200 * hz) / 1000);
 }
 
 /*
  * NOTE:
  * RX/TX _must_ have been suspended/disabled, before this function
  * is called.
  */
 static void
 hn_drain_rxtx(struct hn_softc *sc, int nchan)
 {
 	struct vmbus_channel **subch = NULL;
 	int nsubch;
 
 	/*
 	 * Drain RX/TX bufrings and interrupts.
 	 */
 	nsubch = nchan - 1;
 	if (nsubch > 0)
 		subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
 
 	if (subch != NULL) {
 		int i;
 
 		for (i = 0; i < nsubch; ++i)
 			hn_chan_drain(sc, subch[i]);
 	}
 	hn_chan_drain(sc, sc->hn_prichan);
 
 	if (subch != NULL)
 		vmbus_subchan_rel(subch, nsubch);
 }
 
 static void
 hn_suspend_data(struct hn_softc *sc)
 {
 	struct hn_tx_ring *txr;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Suspend TX.
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_suspended = 1;
 		mtx_unlock(&txr->hn_tx_lock);
 		/* No one is able send more packets now. */
 
 		/*
 		 * Wait for all pending sends to finish.
 		 *
 		 * NOTE:
 		 * We will _not_ receive all pending send-done, if the
 		 * primary channel is revoked.
 		 */
 		while (hn_tx_ring_pending(txr) &&
 		    !vmbus_chan_is_revoked(sc->hn_prichan))
 			pause("hnwtx", 1 /* 1 tick */);
 	}
 
 	/*
 	 * Disable RX.
 	 */
 	hn_disable_rx(sc);
 
 	/*
 	 * Drain RX/TX.
 	 */
 	hn_drain_rxtx(sc, sc->hn_rx_ring_inuse);
 
 	/*
 	 * Drain any pending TX tasks.
 	 *
 	 * NOTE:
 	 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX
 	 * tasks will have to be drained _after_ the above hn_drain_rxtx().
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
 {
 
 	((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
 }
 
 static void
 hn_suspend_mgmt(struct hn_softc *sc)
 {
 	struct task task;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
 	 * through hn_mgmt_taskq.
 	 */
 	TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
 	vmbus_chan_run_task(sc->hn_prichan, &task);
 
 	/*
 	 * Make sure that all pending management tasks are completed.
 	 */
 	taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
 	taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
 	taskqueue_drain_all(sc->hn_mgmt_taskq0);
 }
 
 static void
 hn_suspend(struct hn_softc *sc)
 {
 
 	/* Disable polling. */
 	hn_polling(sc, 0);
 
 	/*
 	 * If the non-transparent mode VF is activated, the synthetic
 	 * device is receiving packets, so the data path of the
 	 * synthetic device must be suspended.
 	 */
 	if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
 	    (sc->hn_flags & HN_FLAG_RXVF))
 		hn_suspend_data(sc);
 	hn_suspend_mgmt(sc);
 }
 
 static void
 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
 {
 	int i;
 
 	KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
 	    ("invalid TX ring count %d", tx_ring_cnt));
 
 	for (i = 0; i < tx_ring_cnt; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_suspended = 0;
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 }
 
 static void
 hn_resume_data(struct hn_softc *sc)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Re-enable RX.
 	 */
 	hn_rxfilter_config(sc);
 
 	/*
 	 * Make sure to clear suspend status on "all" TX rings,
 	 * since hn_tx_ring_inuse can be changed after
 	 * hn_suspend_data().
 	 */
 	hn_resume_tx(sc, sc->hn_tx_ring_cnt);
 
 #ifdef HN_IFSTART_SUPPORT
 	if (!hn_use_if_start)
 #endif
 	{
 		/*
 		 * Flush unused drbrs, since hn_tx_ring_inuse may be
 		 * reduced.
 		 */
 		for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
 			hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
 	}
 
 	/*
 	 * Kick start TX.
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		/*
 		 * Use txeof task, so that any pending oactive can be
 		 * cleared properly.
 		 */
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_resume_mgmt(struct hn_softc *sc)
 {
 
 	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
 
 	/*
 	 * Kick off network change detection, if it was pending.
 	 * If no network change was pending, start link status
 	 * checks, which is more lightweight than network change
 	 * detection.
 	 */
 	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
 		hn_change_network(sc);
 	else
 		hn_update_link_status(sc);
 }
 
 static void
 hn_resume(struct hn_softc *sc)
 {
 
 	/*
 	 * If the non-transparent mode VF is activated, the synthetic
 	 * device have to receive packets, so the data path of the
 	 * synthetic device must be resumed.
 	 */
 	if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
 	    (sc->hn_flags & HN_FLAG_RXVF))
 		hn_resume_data(sc);
 
 	/*
 	 * Don't resume link status change if VF is attached/activated.
 	 * - In the non-transparent VF mode, the synthetic device marks
 	 *   link down until the VF is deactivated; i.e. VF is down.
 	 * - In transparent VF mode, VF's media status is used until
 	 *   the VF is detached.
 	 */
 	if ((sc->hn_flags & HN_FLAG_RXVF) == 0 &&
 	    !(hn_xpnt_vf && sc->hn_vf_ifp != NULL))
 		hn_resume_mgmt(sc);
 
 	/*
 	 * Re-enable polling if this interface is running and
 	 * the polling is requested.
 	 */
 	if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
 		hn_polling(sc, sc->hn_pollhz);
 }
 
 static void 
 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
 {
 	const struct rndis_status_msg *msg;
 	int ofs;
 
 	if (dlen < sizeof(*msg)) {
 		if_printf(sc->hn_ifp, "invalid RNDIS status\n");
 		return;
 	}
 	msg = data;
 
 	switch (msg->rm_status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
 		hn_update_link_status(sc);
 		break;
 
 	case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
 	case RNDIS_STATUS_LINK_SPEED_CHANGE:
 		/* Not really useful; ignore. */
 		break;
 
 	case RNDIS_STATUS_NETWORK_CHANGE:
 		ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
 		if (dlen < ofs + msg->rm_stbuflen ||
 		    msg->rm_stbuflen < sizeof(uint32_t)) {
 			if_printf(sc->hn_ifp, "network changed\n");
 		} else {
 			uint32_t change;
 
 			memcpy(&change, ((const uint8_t *)msg) + ofs,
 			    sizeof(change));
 			if_printf(sc->hn_ifp, "network changed, change %u\n",
 			    change);
 		}
 		hn_change_network(sc);
 		break;
 
 	default:
 		if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
 		    msg->rm_status);
 		break;
 	}
 }
 
 static int
 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
 {
 	const struct rndis_pktinfo *pi = info_data;
 	uint32_t mask = 0;
 
 	while (info_dlen != 0) {
 		const void *data;
 		uint32_t dlen;
 
 		if (__predict_false(info_dlen < sizeof(*pi)))
 			return (EINVAL);
 		if (__predict_false(info_dlen < pi->rm_size))
 			return (EINVAL);
 		info_dlen -= pi->rm_size;
 
 		if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
 			return (EINVAL);
 		if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
 			return (EINVAL);
 		dlen = pi->rm_size - pi->rm_pktinfooffset;
 		data = pi->rm_data;
 
 		switch (pi->rm_type) {
 		case NDIS_PKTINFO_TYPE_VLAN:
 			if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
 				return (EINVAL);
 			info->vlan_info = *((const uint32_t *)data);
 			mask |= HN_RXINFO_VLAN;
 			break;
 
 		case NDIS_PKTINFO_TYPE_CSUM:
 			if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
 				return (EINVAL);
 			info->csum_info = *((const uint32_t *)data);
 			mask |= HN_RXINFO_CSUM;
 			break;
 
 		case HN_NDIS_PKTINFO_TYPE_HASHVAL:
 			if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
 				return (EINVAL);
 			info->hash_value = *((const uint32_t *)data);
 			mask |= HN_RXINFO_HASHVAL;
 			break;
 
 		case HN_NDIS_PKTINFO_TYPE_HASHINF:
 			if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
 				return (EINVAL);
 			info->hash_info = *((const uint32_t *)data);
 			mask |= HN_RXINFO_HASHINF;
 			break;
 
 		default:
 			goto next;
 		}
 
 		if (mask == HN_RXINFO_ALL) {
 			/* All found; done */
 			break;
 		}
 next:
 		pi = (const struct rndis_pktinfo *)
 		    ((const uint8_t *)pi + pi->rm_size);
 	}
 
 	/*
 	 * Final fixup.
 	 * - If there is no hash value, invalidate the hash info.
 	 */
 	if ((mask & HN_RXINFO_HASHVAL) == 0)
 		info->hash_info = HN_NDIS_HASH_INFO_INVALID;
 	return (0);
 }
 
 static __inline bool
 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
 {
 
 	if (off < check_off) {
 		if (__predict_true(off + len <= check_off))
 			return (false);
 	} else if (off > check_off) {
 		if (__predict_true(check_off + check_len <= off))
 			return (false);
 	}
 	return (true);
 }
 
 static void
 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const struct rndis_packet_msg *pkt;
 	struct hn_rxinfo info;
 	int data_off, pktinfo_off, data_len, pktinfo_len;
 
 	/*
 	 * Check length.
 	 */
 	if (__predict_false(dlen < sizeof(*pkt))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
 		return;
 	}
 	pkt = data;
 
 	if (__predict_false(dlen < pkt->rm_len)) {
 		if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
 		    "dlen %d, msglen %u\n", dlen, pkt->rm_len);
 		return;
 	}
 	if (__predict_false(pkt->rm_len <
 	    pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
 		    "msglen %u, data %u, oob %u, pktinfo %u\n",
 		    pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
 		    pkt->rm_pktinfolen);
 		return;
 	}
 	if (__predict_false(pkt->rm_datalen == 0)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
 		return;
 	}
 
 	/*
 	 * Check offests.
 	 */
 #define IS_OFFSET_INVALID(ofs)			\
 	((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN ||	\
 	 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
 
 	/* XXX Hyper-V does not meet data offset alignment requirement */
 	if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "data offset %u\n", pkt->rm_dataoffset);
 		return;
 	}
 	if (__predict_false(pkt->rm_oobdataoffset > 0 &&
 	    IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "oob offset %u\n", pkt->rm_oobdataoffset);
 		return;
 	}
 	if (__predict_true(pkt->rm_pktinfooffset > 0) &&
 	    __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "pktinfo offset %u\n", pkt->rm_pktinfooffset);
 		return;
 	}
 
 #undef IS_OFFSET_INVALID
 
 	data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
 	data_len = pkt->rm_datalen;
 	pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
 	pktinfo_len = pkt->rm_pktinfolen;
 
 	/*
 	 * Check OOB coverage.
 	 */
 	if (__predict_false(pkt->rm_oobdatalen != 0)) {
 		int oob_off, oob_len;
 
 		if_printf(rxr->hn_ifp, "got oobdata\n");
 		oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
 		oob_len = pkt->rm_oobdatalen;
 
 		if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overflow, msglen %u, oob abs %d len %d\n",
 			    pkt->rm_len, oob_off, oob_len);
 			return;
 		}
 
 		/*
 		 * Check against data.
 		 */
 		if (hn_rndis_check_overlap(oob_off, oob_len,
 		    data_off, data_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overlaps data, oob abs %d len %d, "
 			    "data abs %d len %d\n",
 			    oob_off, oob_len, data_off, data_len);
 			return;
 		}
 
 		/*
 		 * Check against pktinfo.
 		 */
 		if (pktinfo_len != 0 &&
 		    hn_rndis_check_overlap(oob_off, oob_len,
 		    pktinfo_off, pktinfo_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overlaps pktinfo, oob abs %d len %d, "
 			    "pktinfo abs %d len %d\n",
 			    oob_off, oob_len, pktinfo_off, pktinfo_len);
 			return;
 		}
 	}
 
 	/*
 	 * Check per-packet-info coverage and find useful per-packet-info.
 	 */
 	info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
 	info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
 	info.hash_info = HN_NDIS_HASH_INFO_INVALID;
 	if (__predict_true(pktinfo_len != 0)) {
 		bool overlap;
 		int error;
 
 		if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "pktinfo overflow, msglen %u, "
 			    "pktinfo abs %d len %d\n",
 			    pkt->rm_len, pktinfo_off, pktinfo_len);
 			return;
 		}
 
 		/*
 		 * Check packet info coverage.
 		 */
 		overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
 		    data_off, data_len);
 		if (__predict_false(overlap)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "pktinfo overlap data, pktinfo abs %d len %d, "
 			    "data abs %d len %d\n",
 			    pktinfo_off, pktinfo_len, data_off, data_len);
 			return;
 		}
 
 		/*
 		 * Find useful per-packet-info.
 		 */
 		error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
 		    pktinfo_len, &info);
 		if (__predict_false(error)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
 			    "pktinfo\n");
 			return;
 		}
 	}
 
 	if (__predict_false(data_off + data_len > pkt->rm_len)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "data overflow, msglen %u, data abs %d len %d\n",
 		    pkt->rm_len, data_off, data_len);
 		return;
 	}
 	hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
 }
 
 static __inline void
 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const struct rndis_msghdr *hdr;
 
 	if (__predict_false(dlen < sizeof(*hdr))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
 		return;
 	}
 	hdr = data;
 
 	if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
 		/* Hot data path. */
 		hn_rndis_rx_data(rxr, data, dlen);
 		/* Done! */
 		return;
 	}
 
 	if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
 		hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
 	else
 		hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
 }
 
 static void
 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
 {
 	const struct hn_nvs_hdr *hdr;
 
 	if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
 		if_printf(sc->hn_ifp, "invalid nvs notify\n");
 		return;
 	}
 	hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
 
 	if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
 		/* Useless; ignore */
 		return;
 	}
 	if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
 }
 
 static void
 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
     const struct vmbus_chanpkt_hdr *pkt)
 {
 	struct hn_nvs_sendctx *sndc;
 
 	sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
 	sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
 	    VMBUS_CHANPKT_DATALEN(pkt));
 	/*
 	 * NOTE:
 	 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
 	 * its callback.
 	 */
 }
 
 static void
 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
     const struct vmbus_chanpkt_hdr *pkthdr)
 {
 	const struct vmbus_chanpkt_rxbuf *pkt;
 	const struct hn_nvs_hdr *nvs_hdr;
 	int count, i, hlen;
 
 	if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
 		if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
 		return;
 	}
 	nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
 
 	/* Make sure that this is a RNDIS message. */
 	if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
 		if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
 		    nvs_hdr->nvs_type);
 		return;
 	}
 
 	hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
 	if (__predict_false(hlen < sizeof(*pkt))) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
 		return;
 	}
 	pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
 
 	if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
 		    pkt->cp_rxbuf_id);
 		return;
 	}
 
 	count = pkt->cp_rxbuf_cnt;
 	if (__predict_false(hlen <
 	    __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
 		return;
 	}
 
 	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
 	for (i = 0; i < count; ++i) {
 		int ofs, len;
 
 		ofs = pkt->cp_rxbuf[i].rb_ofs;
 		len = pkt->cp_rxbuf[i].rb_len;
 		if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
 			if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
 			    "ofs %d, len %d\n", i, ofs, len);
 			continue;
 		}
 		hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
 	}
 
 	/*
 	 * Ack the consumed RXBUF associated w/ this channel packet,
 	 * so that this RXBUF can be recycled by the hypervisor.
 	 */
 	hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
 }
 
 static void
 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
     uint64_t tid)
 {
 	struct hn_nvs_rndis_ack ack;
 	int retries, error;
 	
 	ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
 	ack.nvs_status = HN_NVS_STATUS_OK;
 
 	retries = 0;
 again:
 	error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
 	    VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
 	if (__predict_false(error == EAGAIN)) {
 		/*
 		 * NOTE:
 		 * This should _not_ happen in real world, since the
 		 * consumption of the TX bufring from the TX path is
 		 * controlled.
 		 */
 		if (rxr->hn_ack_failed == 0)
 			if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
 		rxr->hn_ack_failed++;
 		retries++;
 		if (retries < 10) {
 			DELAY(100);
 			goto again;
 		}
 		/* RXBUF leaks! */
 		if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
 	}
 }
 
 static void
 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
 {
 	struct hn_rx_ring *rxr = xrxr;
 	struct hn_softc *sc = rxr->hn_ifp->if_softc;
 
 	for (;;) {
 		struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
 		int error, pktlen;
 
 		pktlen = rxr->hn_pktbuf_len;
 		error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
 		if (__predict_false(error == ENOBUFS)) {
 			void *nbuf;
 			int nlen;
 
 			/*
 			 * Expand channel packet buffer.
 			 *
 			 * XXX
 			 * Use M_WAITOK here, since allocation failure
 			 * is fatal.
 			 */
 			nlen = rxr->hn_pktbuf_len * 2;
 			while (nlen < pktlen)
 				nlen *= 2;
 			nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
 
 			if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
 			    rxr->hn_pktbuf_len, nlen);
 
 			free(rxr->hn_pktbuf, M_DEVBUF);
 			rxr->hn_pktbuf = nbuf;
 			rxr->hn_pktbuf_len = nlen;
 			/* Retry! */
 			continue;
 		} else if (__predict_false(error == EAGAIN)) {
 			/* No more channel packets; done! */
 			break;
 		}
 		KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
 
 		switch (pkt->cph_type) {
 		case VMBUS_CHANPKT_TYPE_COMP:
 			hn_nvs_handle_comp(sc, chan, pkt);
 			break;
 
 		case VMBUS_CHANPKT_TYPE_RXBUF:
 			hn_nvs_handle_rxbuf(rxr, chan, pkt);
 			break;
 
 		case VMBUS_CHANPKT_TYPE_INBAND:
 			hn_nvs_handle_notify(sc, pkt);
 			break;
 
 		default:
 			if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
 			    pkt->cph_type);
 			break;
 		}
 	}
 	hn_chan_rollup(rxr, rxr->hn_txr);
 }
 
 static void
 hn_sysinit(void *arg __unused)
 {
 	int i;
 
 	hn_udpcs_fixup = counter_u64_alloc(M_WAITOK);
 
 #ifdef HN_IFSTART_SUPPORT
 	/*
 	 * Don't use ifnet.if_start if transparent VF mode is requested;
 	 * mainly due to the IFF_DRV_OACTIVE flag.
 	 */
 	if (hn_xpnt_vf && hn_use_if_start) {
 		hn_use_if_start = 0;
 		printf("hn: tranparent VF mode, if_transmit will be used, "
 		    "instead of if_start\n");
 	}
 #endif
 	if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) {
 		printf("hn: invalid transparent VF attach routing "
 		    "wait timeout %d, reset to %d\n",
 		    hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN);
 		hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
 	}
 
 	/*
 	 * Initialize VF map.
 	 */
 	rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE);
 	hn_vfmap_size = HN_VFMAP_SIZE_DEF;
 	hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 
 	/*
 	 * Fix the # of TX taskqueues.
 	 */
 	if (hn_tx_taskq_cnt <= 0)
 		hn_tx_taskq_cnt = 1;
 	else if (hn_tx_taskq_cnt > mp_ncpus)
 		hn_tx_taskq_cnt = mp_ncpus;
 
 	/*
 	 * Fix the TX taskqueue mode.
 	 */
 	switch (hn_tx_taskq_mode) {
 	case HN_TX_TASKQ_M_INDEP:
 	case HN_TX_TASKQ_M_GLOBAL:
 	case HN_TX_TASKQ_M_EVTTQ:
 		break;
 	default:
 		hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
 		break;
 	}
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
 		return;
 
 	hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
 	    M_DEVBUF, M_WAITOK);
 	for (i = 0; i < hn_tx_taskq_cnt; ++i) {
 		hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
 		    taskqueue_thread_enqueue, &hn_tx_taskque[i]);
 		taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
 		    "hn tx%d", i);
 	}
 }
 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL);
 
 static void
 hn_sysuninit(void *arg __unused)
 {
 
 	if (hn_tx_taskque != NULL) {
 		int i;
 
 		for (i = 0; i < hn_tx_taskq_cnt; ++i)
 			taskqueue_free(hn_tx_taskque[i]);
 		free(hn_tx_taskque, M_DEVBUF);
 	}
 
 	if (hn_vfmap != NULL)
 		free(hn_vfmap, M_DEVBUF);
 	rm_destroy(&hn_vfmap_lock);
 
 	counter_u64_free(hn_udpcs_fixup);
 }
 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL);
diff --git a/sys/dev/if_ndis/if_ndis.c b/sys/dev/if_ndis/if_ndis.c
index 37cf0e6bf703..1a5e8eeed4be 100644
--- a/sys/dev/if_ndis/if_ndis.c
+++ b/sys/dev/if_ndis/if_ndis.c
@@ -1,3424 +1,3423 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2003
  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
  * WPA support originally contributed by Arvind Srinivasan <arvind@celar.us>
  * then hacked upon mercilessly by my.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/endian.h>
 #include <sys/priv.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <net/bpf.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_ioctl.h>
 #include <net80211/ieee80211_regdomain.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 
 #include <compat/ndis/pe_var.h>
 #include <compat/ndis/cfg_var.h>
 #include <compat/ndis/resource_var.h>
 #include <compat/ndis/ntoskrnl_var.h>
 #include <compat/ndis/hal_var.h>
 #include <compat/ndis/ndis_var.h>
 #include <compat/ndis/usbd_var.h>
 #include <dev/if_ndis/if_ndisvar.h>
 
 #define NDIS_DEBUG
 #ifdef NDIS_DEBUG
 #define DPRINTF(x)	do { if (ndis_debug > 0) printf x; } while (0)
 int ndis_debug = 0;
 SYSCTL_INT(_debug, OID_AUTO, ndis, CTLFLAG_RW, &ndis_debug, 0,
     "if_ndis debug level");
 #else
 #define DPRINTF(x)
 #endif
 
 SYSCTL_DECL(_hw_ndisusb);
 int ndisusb_halt = 1;
 SYSCTL_INT(_hw_ndisusb, OID_AUTO, halt, CTLFLAG_RW, &ndisusb_halt, 0,
     "Halt NDIS USB driver when it's attached");
 
 /* 0 - 30 dBm to mW conversion table */
 static const uint16_t dBm2mW[] = {
 	1, 1, 1, 1, 2, 2, 2, 2, 3, 3,
 	3, 4, 4, 4, 5, 6, 6, 7, 8, 9,
 	10, 11, 13, 14, 16, 18, 20, 22, 25, 28,
 	32, 35, 40, 45, 50, 56, 63, 71, 79, 89,
 	100, 112, 126, 141, 158, 178, 200, 224, 251, 282,
 	316, 355, 398, 447, 501, 562, 631, 708, 794, 891,
 	1000
 };
 
 MODULE_DEPEND(ndis, ether, 1, 1, 1);
 MODULE_DEPEND(ndis, wlan, 1, 1, 1);
 MODULE_DEPEND(ndis, ndisapi, 1, 1, 1);
 
 MODULE_VERSION(ndis, 1);
 
 int ndis_attach			(device_t);
 int ndis_detach			(device_t);
 int ndis_suspend		(device_t);
 int ndis_resume			(device_t);
 void ndis_shutdown		(device_t);
 
 int ndisdrv_modevent		(module_t, int, void *);
 
 static void ndis_txeof		(ndis_handle, ndis_packet *, ndis_status);
 static void ndis_rxeof		(ndis_handle, ndis_packet **, uint32_t);
 static void ndis_rxeof_eth	(ndis_handle, ndis_handle, char *, void *,
 				 uint32_t, void *, uint32_t, uint32_t);
 static void ndis_rxeof_done	(ndis_handle);
 static void ndis_rxeof_xfr	(kdpc *, ndis_handle, void *, void *);
 static void ndis_rxeof_xfr_done	(ndis_handle, ndis_packet *,
 				 uint32_t, uint32_t);
 static void ndis_linksts	(ndis_handle, ndis_status, void *, uint32_t);
 static void ndis_linksts_done	(ndis_handle);
 
 /* We need to wrap these functions for amd64. */
 static funcptr ndis_txeof_wrap;
 static funcptr ndis_rxeof_wrap;
 static funcptr ndis_rxeof_eth_wrap;
 static funcptr ndis_rxeof_done_wrap;
 static funcptr ndis_rxeof_xfr_wrap;
 static funcptr ndis_rxeof_xfr_done_wrap;
 static funcptr ndis_linksts_wrap;
 static funcptr ndis_linksts_done_wrap;
 static funcptr ndis_ticktask_wrap;
 static funcptr ndis_ifstarttask_wrap;
 static funcptr ndis_resettask_wrap;
 static funcptr ndis_inputtask_wrap;
 
 static struct	ieee80211vap *ndis_vap_create(struct ieee80211com *,
 		    const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
 		    const uint8_t [IEEE80211_ADDR_LEN],
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void ndis_vap_delete	(struct ieee80211vap *);
 static void ndis_tick		(void *);
 static void ndis_ticktask	(device_object *, void *);
 static int ndis_raw_xmit	(struct ieee80211_node *, struct mbuf *,
 	const struct ieee80211_bpf_params *);
 static void ndis_update_mcast	(struct ieee80211com *);
 static void ndis_update_promisc	(struct ieee80211com *);
 static void ndis_ifstart	(struct ifnet *);
 static void ndis_ifstarttask	(device_object *, void *);
 static void ndis_resettask	(device_object *, void *);
 static void ndis_inputtask	(device_object *, void *);
 static int ndis_ifioctl		(struct ifnet *, u_long, caddr_t);
 static int ndis_newstate	(struct ieee80211vap *, enum ieee80211_state,
 	int);
 static int ndis_nettype_chan	(uint32_t);
 static int ndis_nettype_mode	(uint32_t);
 static void ndis_scan		(void *);
 static void ndis_scan_results	(struct ndis_softc *);
 static void ndis_scan_start	(struct ieee80211com *);
 static void ndis_scan_end	(struct ieee80211com *);
 static void ndis_set_channel	(struct ieee80211com *);
 static void ndis_scan_curchan	(struct ieee80211_scan_state *, unsigned long);
 static void ndis_scan_mindwell	(struct ieee80211_scan_state *);
 static void ndis_init		(void *);
 static void ndis_stop		(struct ndis_softc *);
 static int ndis_ifmedia_upd	(struct ifnet *);
 static void ndis_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
 static int ndis_get_bssid_list	(struct ndis_softc *,
 					ndis_80211_bssid_list_ex **);
 static int ndis_get_assoc	(struct ndis_softc *, ndis_wlan_bssid_ex **);
 static int ndis_probe_offload	(struct ndis_softc *);
 static int ndis_set_offload	(struct ndis_softc *);
 static void ndis_getstate_80211	(struct ndis_softc *);
 static void ndis_setstate_80211	(struct ndis_softc *);
 static void ndis_auth_and_assoc	(struct ndis_softc *, struct ieee80211vap *);
 static void ndis_media_status	(struct ifnet *, struct ifmediareq *);
 static int ndis_set_cipher	(struct ndis_softc *, int);
 static int ndis_set_wpa		(struct ndis_softc *, void *, int);
 static int ndis_add_key		(struct ieee80211vap *,
 	const struct ieee80211_key *);
 static int ndis_del_key		(struct ieee80211vap *,
 	const struct ieee80211_key *);
 static void ndis_setmulti	(struct ndis_softc *);
 static void ndis_map_sclist	(void *, bus_dma_segment_t *,
 	int, bus_size_t, int);
 static int ndis_ifattach(struct ndis_softc *);
 
 static int ndis_80211attach(struct ndis_softc *);
 static int ndis_80211ioctl(struct ieee80211com *, u_long , void *);
 static int ndis_80211transmit(struct ieee80211com *, struct mbuf *);
 static void ndis_80211parent(struct ieee80211com *);
 
 static int ndisdrv_loaded = 0;
 
 /*
  * This routine should call windrv_load() once for each driver
  * image. This will do the relocation and dynalinking for the
  * image, and create a Windows driver object which will be
  * saved in our driver database.
  */
 int
 ndisdrv_modevent(mod, cmd, arg)
 	module_t		mod;
 	int			cmd;
 	void			*arg;
 {
 	int			error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		ndisdrv_loaded++;
                 if (ndisdrv_loaded > 1)
 			break;
 		windrv_wrap((funcptr)ndis_rxeof, &ndis_rxeof_wrap,
 		    3, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_rxeof_eth, &ndis_rxeof_eth_wrap,
 		    8, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_rxeof_done, &ndis_rxeof_done_wrap,
 		    1, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_rxeof_xfr, &ndis_rxeof_xfr_wrap,
 		    4, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_rxeof_xfr_done,
 		    &ndis_rxeof_xfr_done_wrap, 4, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_txeof, &ndis_txeof_wrap,
 		    3, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_linksts, &ndis_linksts_wrap,
 		    4, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_linksts_done,
 		    &ndis_linksts_done_wrap, 1, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_ticktask, &ndis_ticktask_wrap,
 		    2, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_ifstarttask, &ndis_ifstarttask_wrap,
 		    2, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_resettask, &ndis_resettask_wrap,
 		    2, WINDRV_WRAP_STDCALL);
 		windrv_wrap((funcptr)ndis_inputtask, &ndis_inputtask_wrap,
 		    2, WINDRV_WRAP_STDCALL);
 		break;
 	case MOD_UNLOAD:
 		ndisdrv_loaded--;
 		if (ndisdrv_loaded > 0)
 			break;
 		/* fallthrough */
 	case MOD_SHUTDOWN:
 		windrv_unwrap(ndis_rxeof_wrap);
 		windrv_unwrap(ndis_rxeof_eth_wrap);
 		windrv_unwrap(ndis_rxeof_done_wrap);
 		windrv_unwrap(ndis_rxeof_xfr_wrap);
 		windrv_unwrap(ndis_rxeof_xfr_done_wrap);
 		windrv_unwrap(ndis_txeof_wrap);
 		windrv_unwrap(ndis_linksts_wrap);
 		windrv_unwrap(ndis_linksts_done_wrap);
 		windrv_unwrap(ndis_ticktask_wrap);
 		windrv_unwrap(ndis_ifstarttask_wrap);
 		windrv_unwrap(ndis_resettask_wrap);
 		windrv_unwrap(ndis_inputtask_wrap);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
 
 struct mclist_ctx {
 	uint8_t *mclist;
 	int mclistsz;
 };
 
 static u_int
 ndis_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
 {
 	struct mclist_ctx *ctx = arg;
 
 	if (cnt < ctx->mclistsz)
 		bcopy(LLADDR(sdl), ctx->mclist + (ETHER_ADDR_LEN * cnt),
 		    ETHER_ADDR_LEN);
 	return (1);
 }
 
 /*
  * Program the 64-bit multicast hash filter.
  */
 static void
 ndis_setmulti(sc)
 	struct ndis_softc	*sc;
 {
 	struct ifnet		*ifp;
 	struct mclist_ctx	ctx;
 	int			len, error;
 
 
 	if (!NDIS_INITIALIZED(sc))
 		return;
 
 	if (sc->ndis_80211)
 		return;
 
 	ifp = sc->ifp;
 	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
 		sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 		len = sizeof(sc->ndis_filter);
 		error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER,
 		    &sc->ndis_filter, &len);
 		if (error)
 			device_printf(sc->ndis_dev,
 			    "set allmulti failed: %d\n", error);
 		return;
 	}
 
 	if (if_llmaddr_count(ifp) == 0)
 		return;
 
 	len = sizeof(ctx.mclistsz);
 	ndis_get_info(sc, OID_802_3_MAXIMUM_LIST_SIZE, &ctx.mclistsz, &len);
 
 	ctx.mclist = malloc(ETHER_ADDR_LEN * ctx.mclistsz, M_TEMP,
 	    M_NOWAIT | M_ZERO);
 
 	if (ctx.mclist == NULL) {
 		sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 		goto out;
 	}
 
 	sc->ndis_filter |= NDIS_PACKET_TYPE_MULTICAST;
 
 	len = if_foreach_llmaddr(ifp, ndis_copy_maddr, &ctx);
 	if (len > ctx.mclistsz) {
 		sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 		sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST;
 			goto out;
 	}
 
 	len = len * ETHER_ADDR_LEN;
 	error = ndis_set_info(sc, OID_802_3_MULTICAST_LIST, ctx.mclist, &len);
 	if (error) {
 		device_printf(sc->ndis_dev, "set mclist failed: %d\n", error);
 		sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 		sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST;
 	}
 
 out:
 	free(ctx.mclist, M_TEMP);
 
 	len = sizeof(sc->ndis_filter);
 	error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER,
 	    &sc->ndis_filter, &len);
 	if (error)
 		device_printf(sc->ndis_dev, "set multi failed: %d\n", error);
 }
 
 static int
 ndis_set_offload(sc)
 	struct ndis_softc	*sc;
 {
 	ndis_task_offload	*nto;
 	ndis_task_offload_hdr	*ntoh;
 	ndis_task_tcpip_csum	*nttc;
 	struct ifnet		*ifp;
 	int			len, error;
 
 	if (!NDIS_INITIALIZED(sc))
 		return (EINVAL);
 
 	if (sc->ndis_80211)
 		return (EINVAL);
 	/* See if there's anything to set. */
 
 	ifp = sc->ifp;
 	error = ndis_probe_offload(sc);
 	if (error)
 		return (error);
 		
 	if (sc->ndis_hwassist == 0 && ifp->if_capabilities == 0)
 		return (0);
 
 	len = sizeof(ndis_task_offload_hdr) + sizeof(ndis_task_offload) +
 	    sizeof(ndis_task_tcpip_csum);
 
 	ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO);
 
 	if (ntoh == NULL)
 		return (ENOMEM);
 
 	ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION;
 	ntoh->ntoh_len = sizeof(ndis_task_offload_hdr);
 	ntoh->ntoh_offset_firsttask = sizeof(ndis_task_offload_hdr);
 	ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header);
 	ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3;
 	ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN;
 
 	nto = (ndis_task_offload *)((char *)ntoh +
 	    ntoh->ntoh_offset_firsttask);
 
 	nto->nto_vers = NDIS_TASK_OFFLOAD_VERSION;
 	nto->nto_len = sizeof(ndis_task_offload);
 	nto->nto_task = NDIS_TASK_TCPIP_CSUM;
 	nto->nto_offset_nexttask = 0;
 	nto->nto_taskbuflen = sizeof(ndis_task_tcpip_csum);
 
 	nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf;
 
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		nttc->nttc_v4tx = sc->ndis_v4tx;
 
 	if (ifp->if_capenable & IFCAP_RXCSUM)
 		nttc->nttc_v4rx = sc->ndis_v4rx;
 
 	error = ndis_set_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len);
 	free(ntoh, M_TEMP);
 
 	return (error);
 }
 
 static int
 ndis_probe_offload(sc)
 	struct ndis_softc	*sc;
 {
 	ndis_task_offload	*nto;
 	ndis_task_offload_hdr	*ntoh;
 	ndis_task_tcpip_csum	*nttc = NULL;
 	struct ifnet		*ifp;
 	int			len, error, dummy;
 
 	ifp = sc->ifp;
 
 	len = sizeof(dummy);
 	error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, &dummy, &len);
 
 	if (error != ENOSPC)
 		return (error);
 
 	ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO);
 
 	if (ntoh == NULL)
 		return (ENOMEM);
 
 	ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION;
 	ntoh->ntoh_len = sizeof(ndis_task_offload_hdr);
 	ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header);
 	ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3;
 	ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN;
 
 	error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len);
 
 	if (error) {
 		free(ntoh, M_TEMP);
 		return (error);
 	}
 
 	if (ntoh->ntoh_vers != NDIS_TASK_OFFLOAD_VERSION) {
 		free(ntoh, M_TEMP);
 		return (EINVAL);
 	}
 
 	nto = (ndis_task_offload *)((char *)ntoh +
 	    ntoh->ntoh_offset_firsttask);
 
 	while (1) {
 		switch (nto->nto_task) {
 		case NDIS_TASK_TCPIP_CSUM:
 			nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf;
 			break;
 		/* Don't handle these yet. */
 		case NDIS_TASK_IPSEC:
 		case NDIS_TASK_TCP_LARGESEND:
 		default:
 			break;
 		}
 		if (nto->nto_offset_nexttask == 0)
 			break;
 		nto = (ndis_task_offload *)((char *)nto +
 		    nto->nto_offset_nexttask);
 	}
 
 	if (nttc == NULL) {
 		free(ntoh, M_TEMP);
 		return (ENOENT);
 	}
 
 	sc->ndis_v4tx = nttc->nttc_v4tx;
 	sc->ndis_v4rx = nttc->nttc_v4rx;
 
 	if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_IP_CSUM)
 		sc->ndis_hwassist |= CSUM_IP;
 	if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_TCP_CSUM)
 		sc->ndis_hwassist |= CSUM_TCP;
 	if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_UDP_CSUM)
 		sc->ndis_hwassist |= CSUM_UDP;
 
 	if (sc->ndis_hwassist)
 		ifp->if_capabilities |= IFCAP_TXCSUM;
 
 	if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_IP_CSUM)
 		ifp->if_capabilities |= IFCAP_RXCSUM;
 	if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_TCP_CSUM)
 		ifp->if_capabilities |= IFCAP_RXCSUM;
 	if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_UDP_CSUM)
 		ifp->if_capabilities |= IFCAP_RXCSUM;
 
 	free(ntoh, M_TEMP);
 	return (0);
 }
 
 static int
 ndis_nettype_chan(uint32_t type)
 {
 	switch (type) {
 	case NDIS_80211_NETTYPE_11FH:		return (IEEE80211_CHAN_FHSS);
 	case NDIS_80211_NETTYPE_11DS:		return (IEEE80211_CHAN_B);
 	case NDIS_80211_NETTYPE_11OFDM5:	return (IEEE80211_CHAN_A);
 	case NDIS_80211_NETTYPE_11OFDM24:	return (IEEE80211_CHAN_G);
 	}
 	DPRINTF(("unknown channel nettype %d\n", type));
 	return (IEEE80211_CHAN_B);	/* Default to 11B chan */
 }
 
 static int
 ndis_nettype_mode(uint32_t type)
 {
 	switch (type) {
 	case NDIS_80211_NETTYPE_11FH:		return (IEEE80211_MODE_FH);
 	case NDIS_80211_NETTYPE_11DS:		return (IEEE80211_MODE_11B);
 	case NDIS_80211_NETTYPE_11OFDM5:	return (IEEE80211_MODE_11A);
 	case NDIS_80211_NETTYPE_11OFDM24:	return (IEEE80211_MODE_11G);
 	}
 	DPRINTF(("unknown mode nettype %d\n", type));
 	return (IEEE80211_MODE_AUTO);
 }
 
 /*
  * Attach the interface. Allocate softc structures, do ifmedia
  * setup and ethernet/BPF attach.
  */
 int
 ndis_attach(device_t dev)
 {
 	struct ndis_softc	*sc;
 	driver_object		*pdrv;
 	device_object		*pdo;
 	int			error = 0, len;
 	int			i;
 
 	sc = device_get_softc(dev);
 
 	mtx_init(&sc->ndis_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	KeInitializeSpinLock(&sc->ndis_rxlock);
 	KeInitializeSpinLock(&sc->ndisusb_tasklock);
 	KeInitializeSpinLock(&sc->ndisusb_xferdonelock);
 	InitializeListHead(&sc->ndis_shlist);
 	InitializeListHead(&sc->ndisusb_tasklist);
 	InitializeListHead(&sc->ndisusb_xferdonelist);
 	callout_init(&sc->ndis_stat_callout, 1);
 	mbufq_init(&sc->ndis_rxqueue, INT_MAX);	/* XXXGL: sane maximum */
 
 	/* Create sysctl registry nodes */
 	ndis_create_sysctls(sc);
 
 	/* Find the PDO for this device instance. */
 
 	if (sc->ndis_iftype == PCIBus)
 		pdrv = windrv_lookup(0, "PCI Bus");
 	else if (sc->ndis_iftype == PCMCIABus)
 		pdrv = windrv_lookup(0, "PCCARD Bus");
 	else
 		pdrv = windrv_lookup(0, "USB Bus");
 	pdo = windrv_find_pdo(pdrv, dev);
 
 	/*
 	 * Create a new functional device object for this
 	 * device. This is what creates the miniport block
 	 * for this device instance.
 	 */
 
 	if (NdisAddDevice(sc->ndis_dobj, pdo) != STATUS_SUCCESS) {
 		device_printf(dev, "failed to create FDO!\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/* Tell the user what version of the API the driver is using. */
 	device_printf(dev, "NDIS API version: %d.%d\n",
 	    sc->ndis_chars->nmc_version_major,
 	    sc->ndis_chars->nmc_version_minor);
 
 	/* Do resource conversion. */
 	if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus)
 		ndis_convert_res(sc);
 	else
 		sc->ndis_block->nmb_rlist = NULL;
 
 	/* Install our RX and TX interrupt handlers. */
 	sc->ndis_block->nmb_senddone_func = ndis_txeof_wrap;
 	sc->ndis_block->nmb_pktind_func = ndis_rxeof_wrap;
 	sc->ndis_block->nmb_ethrxindicate_func = ndis_rxeof_eth_wrap;
 	sc->ndis_block->nmb_ethrxdone_func = ndis_rxeof_done_wrap;
 	sc->ndis_block->nmb_tdcond_func = ndis_rxeof_xfr_done_wrap;
 
 	/* Override the status handler so we can detect link changes. */
 	sc->ndis_block->nmb_status_func = ndis_linksts_wrap;
 	sc->ndis_block->nmb_statusdone_func = ndis_linksts_done_wrap;
 
 	/* Set up work item handlers. */
 	sc->ndis_tickitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
 	sc->ndis_startitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
 	sc->ndis_resetitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
 	sc->ndis_inputitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
 	sc->ndisusb_xferdoneitem =
 	    IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
 	sc->ndisusb_taskitem =
 	    IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
 	KeInitializeDpc(&sc->ndis_rxdpc, ndis_rxeof_xfr_wrap, sc->ndis_block);
 
 	/* Call driver's init routine. */
 	if (ndis_init_nic(sc)) {
 		device_printf(dev, "init handler failed\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/*
 	 * Figure out how big to make the TX buffer pool.
 	 */
 	len = sizeof(sc->ndis_maxpkts);
 	if (ndis_get_info(sc, OID_GEN_MAXIMUM_SEND_PACKETS,
 		    &sc->ndis_maxpkts, &len)) {
 		device_printf(dev, "failed to get max TX packets\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/*
 	 * If this is a deserialized miniport, we don't have
 	 * to honor the OID_GEN_MAXIMUM_SEND_PACKETS result.
 	 */
 	if (!NDIS_SERIALIZED(sc->ndis_block))
 		sc->ndis_maxpkts = NDIS_TXPKTS;
 
 	/* Enforce some sanity, just in case. */
 
 	if (sc->ndis_maxpkts == 0)
 		sc->ndis_maxpkts = 10;
 
 	sc->ndis_txarray = malloc(sizeof(ndis_packet *) *
 	    sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO);
 
 	/* Allocate a pool of ndis_packets for TX encapsulation. */
 
 	NdisAllocatePacketPool(&i, &sc->ndis_txpool,
 	    sc->ndis_maxpkts, PROTOCOL_RESERVED_SIZE_IN_PACKET);
 
 	if (i != NDIS_STATUS_SUCCESS) {
 		sc->ndis_txpool = NULL;
 		device_printf(dev, "failed to allocate TX packet pool");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	sc->ndis_txpending = sc->ndis_maxpkts;
 
 	sc->ndis_oidcnt = 0;
 	/* Get supported oid list. */
 	ndis_get_supported_oids(sc, &sc->ndis_oids, &sc->ndis_oidcnt);
 
 	/* If the NDIS module requested scatter/gather, init maps. */
 	if (sc->ndis_sc)
 		ndis_init_dma(sc);
 
 	/*
 	 * See if the OID_802_11_CONFIGURATION OID is
 	 * supported by this driver. If it is, then this an 802.11
 	 * wireless driver, and we should set up media for wireless.
 	 */
 	for (i = 0; i < sc->ndis_oidcnt; i++)
 		if (sc->ndis_oids[i] == OID_802_11_CONFIGURATION) {
 			sc->ndis_80211 = 1;
 			break;
 		}
 
 	if (sc->ndis_80211)
 		error = ndis_80211attach(sc);
 	else
 		error = ndis_ifattach(sc);
 
 fail:
 	if (error) {
 		ndis_detach(dev);
 		return (error);
 	}
 
 	if (sc->ndis_iftype == PNPBus && ndisusb_halt == 0)
 		return (error);
 
 	DPRINTF(("attach done.\n"));
 	/* We're done talking to the NIC for now; halt it. */
 	ndis_halt_nic(sc);
 	DPRINTF(("halting done.\n"));
 
 	return (error);
 }
 
 static int
 ndis_80211attach(struct ndis_softc *sc)
 {
 	struct ieee80211com	*ic = &sc->ndis_ic;
 	ndis_80211_rates_ex	rates;
 	struct ndis_80211_nettype_list *ntl;
 	uint32_t		arg;
 	int			mode, i, r, len, nonettypes = 1;
 	uint8_t			bands[IEEE80211_MODE_BYTES] = { 0 };
 
 	callout_init(&sc->ndis_scan_callout, 1);
 
 	ic->ic_softc = sc;
 	ic->ic_ioctl = ndis_80211ioctl;
 	ic->ic_name = device_get_nameunit(sc->ndis_dev);
 	ic->ic_opmode = IEEE80211_M_STA;
         ic->ic_phytype = IEEE80211_T_DS;
 	ic->ic_caps = IEEE80211_C_8023ENCAP |
 		IEEE80211_C_STA | IEEE80211_C_IBSS;
 	setbit(ic->ic_modecaps, IEEE80211_MODE_AUTO);
 	len = 0;
 	r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, NULL, &len);
 	if (r != ENOSPC)
 		goto nonettypes;
 	ntl = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO);
 	r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, ntl, &len);
 	if (r != 0) {
 		free(ntl, M_DEVBUF);
 		goto nonettypes;
 	}
 
 	for (i = 0; i < ntl->ntl_items; i++) {
 		mode = ndis_nettype_mode(ntl->ntl_type[i]);
 		if (mode) {
 			nonettypes = 0;
 			setbit(ic->ic_modecaps, mode);
 			setbit(bands, mode);
 		} else
 			device_printf(sc->ndis_dev, "Unknown nettype %d\n",
 			    ntl->ntl_type[i]);
 	}
 	free(ntl, M_DEVBUF);
 nonettypes:
 	/* Default to 11b channels if the card did not supply any */
 	if (nonettypes) {
 		setbit(ic->ic_modecaps, IEEE80211_MODE_11B);
 		setbit(bands, IEEE80211_MODE_11B);
 	}
 	len = sizeof(rates);
 	bzero((char *)&rates, len);
 	r = ndis_get_info(sc, OID_802_11_SUPPORTED_RATES, (void *)rates, &len);
 	if (r != 0)
 		device_printf(sc->ndis_dev, "get rates failed: 0x%x\n", r);
 	/*
 	 * Since the supported rates only up to 8 can be supported,
 	 * if this is not 802.11b we're just going to be faking it
 	 * all up to heck.
 	 */
 
 #define TESTSETRATE(x, y)						\
 	do {								\
 		int			i;				\
 		for (i = 0; i < ic->ic_sup_rates[x].rs_nrates; i++) {	\
 			if (ic->ic_sup_rates[x].rs_rates[i] == (y))	\
 				break;					\
 		}							\
 		if (i == ic->ic_sup_rates[x].rs_nrates) {		\
 			ic->ic_sup_rates[x].rs_rates[i] = (y);		\
 			ic->ic_sup_rates[x].rs_nrates++;		\
 		}							\
 	} while (0)
 
 #define SETRATE(x, y)	\
 	ic->ic_sup_rates[x].rs_rates[ic->ic_sup_rates[x].rs_nrates] = (y)
 #define INCRATE(x)	\
 	ic->ic_sup_rates[x].rs_nrates++
 
 	ic->ic_curmode = IEEE80211_MODE_AUTO;
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11A))
 		ic->ic_sup_rates[IEEE80211_MODE_11A].rs_nrates = 0;
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11B))
 		ic->ic_sup_rates[IEEE80211_MODE_11B].rs_nrates = 0;
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11G))
 		ic->ic_sup_rates[IEEE80211_MODE_11G].rs_nrates = 0;
 	for (i = 0; i < len; i++) {
 		switch (rates[i] & IEEE80211_RATE_VAL) {
 		case 2:
 		case 4:
 		case 11:
 		case 10:
 		case 22:
 			if (isclr(ic->ic_modecaps, IEEE80211_MODE_11B)) {
 				/* Lazy-init 802.11b. */
 				setbit(ic->ic_modecaps, IEEE80211_MODE_11B);
 				ic->ic_sup_rates[IEEE80211_MODE_11B].
 				    rs_nrates = 0;
 			}
 			SETRATE(IEEE80211_MODE_11B, rates[i]);
 			INCRATE(IEEE80211_MODE_11B);
 			break;
 		default:
 			if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) {
 				SETRATE(IEEE80211_MODE_11A, rates[i]);
 				INCRATE(IEEE80211_MODE_11A);
 			}
 			if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) {
 				SETRATE(IEEE80211_MODE_11G, rates[i]);
 				INCRATE(IEEE80211_MODE_11G);
 			}
 			break;
 		}
 	}
 
 	/*
 	 * If the hardware supports 802.11g, it most
 	 * likely supports 802.11b and all of the
 	 * 802.11b and 802.11g speeds, so maybe we can
 	 * just cheat here.  Just how in the heck do
 	 * we detect turbo modes, though?
 	 */
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) {
 		TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|2);
 		TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|4);
 		TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|11);
 		TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|22);
 	}
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) {
 		TESTSETRATE(IEEE80211_MODE_11G, 48);
 		TESTSETRATE(IEEE80211_MODE_11G, 72);
 		TESTSETRATE(IEEE80211_MODE_11G, 96);
 		TESTSETRATE(IEEE80211_MODE_11G, 108);
 	}
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) {
 		TESTSETRATE(IEEE80211_MODE_11A, 48);
 		TESTSETRATE(IEEE80211_MODE_11A, 72);
 		TESTSETRATE(IEEE80211_MODE_11A, 96);
 		TESTSETRATE(IEEE80211_MODE_11A, 108);
 	}
 
 #undef SETRATE
 #undef INCRATE
 #undef TESTSETRATE
 
 	ieee80211_init_channels(ic, NULL, bands);
 
 	/*
 	 * To test for WPA support, we need to see if we can
 	 * set AUTHENTICATION_MODE to WPA and read it back
 	 * successfully.
 	 */
 	i = sizeof(arg);
 	arg = NDIS_80211_AUTHMODE_WPA;
 	r = ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i);
 	if (r == 0) {
 		r = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i);
 		if (r == 0 && arg == NDIS_80211_AUTHMODE_WPA)
 			ic->ic_caps |= IEEE80211_C_WPA;
 	}
 
 	/*
 	 * To test for supported ciphers, we set each
 	 * available encryption type in descending order.
 	 * If ENC3 works, then we have WEP, TKIP and AES.
 	 * If only ENC2 works, then we have WEP and TKIP.
 	 * If only ENC1 works, then we have just WEP.
 	 */
 	i = sizeof(arg);
 	arg = NDIS_80211_WEPSTAT_ENC3ENABLED;
 	r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i);
 	if (r == 0) {
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP
 				  |  IEEE80211_CRYPTO_TKIP
 				  |  IEEE80211_CRYPTO_AES_CCM;
 		goto got_crypto;
 	}
 	arg = NDIS_80211_WEPSTAT_ENC2ENABLED;
 	r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i);
 	if (r == 0) {
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP
 				  |  IEEE80211_CRYPTO_TKIP;
 		goto got_crypto;
 	}
 	arg = NDIS_80211_WEPSTAT_ENC1ENABLED;
 	r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i);
 	if (r == 0)
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP;
 got_crypto:
 	i = sizeof(arg);
 	r = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &i);
 	if (r == 0)
 		ic->ic_caps |= IEEE80211_C_PMGT;
 
 	r = ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &i);
 	if (r == 0)
 		ic->ic_caps |= IEEE80211_C_TXPMGT;
 
 	/*
 	 * Get station address from the driver.
 	 */
 	len = sizeof(ic->ic_macaddr);
 	ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, &ic->ic_macaddr, &len);
 
 	ieee80211_ifattach(ic);
 	ic->ic_raw_xmit = ndis_raw_xmit;
 	ic->ic_scan_start = ndis_scan_start;
 	ic->ic_scan_end = ndis_scan_end;
 	ic->ic_set_channel = ndis_set_channel;
 	ic->ic_scan_curchan = ndis_scan_curchan;
 	ic->ic_scan_mindwell = ndis_scan_mindwell;
 	ic->ic_bsschan = IEEE80211_CHAN_ANYC;
 	ic->ic_vap_create = ndis_vap_create;
 	ic->ic_vap_delete = ndis_vap_delete;
 	ic->ic_update_mcast = ndis_update_mcast;
 	ic->ic_update_promisc = ndis_update_promisc;
 	ic->ic_transmit = ndis_80211transmit;
 	ic->ic_parent = ndis_80211parent;
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 
 	return (0);
 }
 
 static int
 ndis_ifattach(struct ndis_softc *sc)
 {
 	struct ifnet *ifp;
 	u_char eaddr[ETHER_ADDR_LEN];
 	int len;
 
 	ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL)
 		return (ENOSPC);
 	sc->ifp = ifp;
 	ifp->if_softc = sc;
 
 	/* Check for task offload support. */
 	ndis_probe_offload(sc);
 
 	/*
 	 * Get station address from the driver.
 	 */
 	len = sizeof(eaddr);
 	ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, eaddr, &len);
 
 	if_initname(ifp, device_get_name(sc->ndis_dev),
 	    device_get_unit(sc->ndis_dev));
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
-	    IFF_NEEDSEPOCH;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = ndis_ifioctl;
 	ifp->if_start = ndis_ifstart;
 	ifp->if_init = ndis_init;
 	ifp->if_baudrate = 10000000;
 	IFQ_SET_MAXLEN(&ifp->if_snd, 50);
 	ifp->if_snd.ifq_drv_maxlen = 25;
 	IFQ_SET_READY(&ifp->if_snd);
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_hwassist = sc->ndis_hwassist;
 
 	ifmedia_init(&sc->ifmedia, IFM_IMASK, ndis_ifmedia_upd,
 	    ndis_ifmedia_sts);
 	ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL);
 	ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL);
 	ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL);
 	ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL);
 	ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO);
 	ether_ifattach(ifp, eaddr);
 
 	return (0);
 }
 
 static struct ieee80211vap *
 ndis_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct ndis_vap *nvp;
 	struct ieee80211vap *vap;
 
 	if (!TAILQ_EMPTY(&ic->ic_vaps))		/* only one at a time */
 		return NULL;
 	nvp = malloc(sizeof(struct ndis_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	vap = &nvp->vap;
 	ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
 	/* override with driver methods */
 	nvp->newstate = vap->iv_newstate;
 	vap->iv_newstate = ndis_newstate;
 
 	/* complete setup */
 	ieee80211_vap_attach(vap, ieee80211_media_change, ndis_media_status,
 	    mac);
 	ic->ic_opmode = opmode;
 	/* install key handing routines */
 	vap->iv_key_set = ndis_add_key;
 	vap->iv_key_delete = ndis_del_key;
 	return vap;
 }
 
 static void
 ndis_vap_delete(struct ieee80211vap *vap)
 {
 	struct ndis_vap *nvp = NDIS_VAP(vap);
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ndis_softc *sc = ic->ic_softc;
 
 	ndis_stop(sc);
 	callout_drain(&sc->ndis_scan_callout);
 	ieee80211_vap_detach(vap);
 	free(nvp, M_80211_VAP);
 }
 
 /*
  * Shutdown hardware and free up resources. This can be called any
  * time after the mutex has been initialized. It is called in both
  * the error case in attach and the normal detach case so it needs
  * to be careful about only freeing resources that have actually been
  * allocated.
  */
 int
 ndis_detach(device_t dev)
 {
 	struct ifnet		*ifp;
 	struct ndis_softc	*sc;
 	driver_object		*drv;
 
 	sc = device_get_softc(dev);
 	NDIS_LOCK(sc);
 	if (!sc->ndis_80211)
 		ifp = sc->ifp;
 	else
 		ifp = NULL;
 	if (ifp != NULL)
 		ifp->if_flags &= ~IFF_UP;
 	if (device_is_attached(dev)) {
 		NDIS_UNLOCK(sc);
 		ndis_stop(sc);
 		if (sc->ndis_80211)
 			ieee80211_ifdetach(&sc->ndis_ic);
 		else if (ifp != NULL)
 			ether_ifdetach(ifp);
 	} else
 		NDIS_UNLOCK(sc);
 
 	if (sc->ndis_tickitem != NULL)
 		IoFreeWorkItem(sc->ndis_tickitem);
 	if (sc->ndis_startitem != NULL)
 		IoFreeWorkItem(sc->ndis_startitem);
 	if (sc->ndis_resetitem != NULL)
 		IoFreeWorkItem(sc->ndis_resetitem);
 	if (sc->ndis_inputitem != NULL)
 		IoFreeWorkItem(sc->ndis_inputitem);
 	if (sc->ndisusb_xferdoneitem != NULL)
 		IoFreeWorkItem(sc->ndisusb_xferdoneitem);
 	if (sc->ndisusb_taskitem != NULL)
 		IoFreeWorkItem(sc->ndisusb_taskitem);
 
 	bus_generic_detach(dev);
 	ndis_unload_driver(sc);
 
 	if (sc->ndis_irq)
 		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ndis_irq);
 	if (sc->ndis_res_io)
 		bus_release_resource(dev, SYS_RES_IOPORT,
 		    sc->ndis_io_rid, sc->ndis_res_io);
 	if (sc->ndis_res_mem)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    sc->ndis_mem_rid, sc->ndis_res_mem);
 	if (sc->ndis_res_altmem)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    sc->ndis_altmem_rid, sc->ndis_res_altmem);
 
 	if (ifp != NULL)
 		if_free(ifp);
 
 	if (sc->ndis_sc)
 		ndis_destroy_dma(sc);
 
 	if (sc->ndis_txarray)
 		free(sc->ndis_txarray, M_DEVBUF);
 
 	if (!sc->ndis_80211)
 		ifmedia_removeall(&sc->ifmedia);
 
 	if (sc->ndis_txpool != NULL)
 		NdisFreePacketPool(sc->ndis_txpool);
 
 	/* Destroy the PDO for this device. */
 	
 	if (sc->ndis_iftype == PCIBus)
 		drv = windrv_lookup(0, "PCI Bus");
 	else if (sc->ndis_iftype == PCMCIABus)
 		drv = windrv_lookup(0, "PCCARD Bus");
 	else
 		drv = windrv_lookup(0, "USB Bus");
 	if (drv == NULL)
 		panic("couldn't find driver object");
 	windrv_destroy_pdo(drv, dev);
 
 	if (sc->ndis_iftype == PCIBus)
 		bus_dma_tag_destroy(sc->ndis_parent_tag);
 
 	return (0);
 }
 
 int
 ndis_suspend(dev)
 	device_t		dev;
 {
 	struct ndis_softc	*sc;
 	struct ifnet		*ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->ifp;
 
 #ifdef notdef
 	if (NDIS_INITIALIZED(sc))
         	ndis_stop(sc);
 #endif
 
 	return (0);
 }
 
 int
 ndis_resume(dev)
 	device_t		dev;
 {
 	struct ndis_softc	*sc;
 	struct ifnet		*ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->ifp;
 
 	if (NDIS_INITIALIZED(sc))
         	ndis_init(sc);
 
 	return (0);
 }
 
 /*
  * The following bunch of routines are here to support drivers that
  * use the NdisMEthIndicateReceive()/MiniportTransferData() mechanism.
  * The NdisMEthIndicateReceive() handler runs at DISPATCH_LEVEL for
  * serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized
  * miniports.
  */
 static void
 ndis_rxeof_eth(adapter, ctx, addr, hdr, hdrlen, lookahead, lookaheadlen, pktlen)
 	ndis_handle		adapter;
 	ndis_handle		ctx;
 	char			*addr;
 	void			*hdr;
 	uint32_t		hdrlen;
 	void			*lookahead;
 	uint32_t		lookaheadlen;
 	uint32_t		pktlen;
 {
 	ndis_miniport_block	*block;
 	uint8_t			irql = 0;
 	uint32_t		status;
 	ndis_buffer		*b;
 	ndis_packet		*p;
 	struct mbuf		*m;
 	ndis_ethpriv		*priv;
 
 	block = adapter;
 
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return;
 
 	/* Save the data provided to us so far. */
 
 	m->m_len = lookaheadlen + hdrlen;
 	m->m_pkthdr.len = pktlen + hdrlen;
 	m->m_next = NULL;
 	m_copyback(m, 0, hdrlen, hdr);
 	m_copyback(m, hdrlen, lookaheadlen, lookahead);
 
 	/* Now create a fake NDIS_PACKET to hold the data */
 
 	NdisAllocatePacket(&status, &p, block->nmb_rxpool);
 
 	if (status != NDIS_STATUS_SUCCESS) {
 		m_freem(m);
 		return;
 	}
 
 	p->np_m0 = m;
 
 	b = IoAllocateMdl(m->m_data, m->m_pkthdr.len, FALSE, FALSE, NULL);
 
 	if (b == NULL) {
 		NdisFreePacket(p);
 		m_freem(m);
 		return;
 	}
 
 	p->np_private.npp_head = p->np_private.npp_tail = b;
 	p->np_private.npp_totlen = m->m_pkthdr.len;
 
 	/* Save the packet RX context somewhere. */
 	priv = (ndis_ethpriv *)&p->np_protocolreserved;
 	priv->nep_ctx = ctx;
 
 	if (!NDIS_SERIALIZED(block))
 		KeAcquireSpinLock(&block->nmb_lock, &irql);
 
 	InsertTailList((&block->nmb_packetlist), (&p->np_list));
 
 	if (!NDIS_SERIALIZED(block))
 		KeReleaseSpinLock(&block->nmb_lock, irql);
 }
 
 /*
  * NdisMEthIndicateReceiveComplete() handler, runs at DISPATCH_LEVEL
  * for serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized
  * miniports.
  */
 static void
 ndis_rxeof_done(adapter)
 	ndis_handle		adapter;
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 
 	block = adapter;
 
 	/* Schedule transfer/RX of queued packets. */
 
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	KeInsertQueueDpc(&sc->ndis_rxdpc, NULL, NULL);
 }
 
 /*
  * MiniportTransferData() handler, runs at DISPATCH_LEVEL.
  */
 static void
 ndis_rxeof_xfr(dpc, adapter, sysarg1, sysarg2)
 	kdpc			*dpc;
 	ndis_handle		adapter;
 	void			*sysarg1;
 	void			*sysarg2;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	ndis_packet		*p;
 	list_entry		*l;
 	uint32_t		status;
 	ndis_ethpriv		*priv;
 	struct ifnet		*ifp;
 	struct mbuf		*m;
 
 	block = adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	ifp = sc->ifp;
 
 	KeAcquireSpinLockAtDpcLevel(&block->nmb_lock);
 
 	l = block->nmb_packetlist.nle_flink;
 	while(!IsListEmpty(&block->nmb_packetlist)) {
 		l = RemoveHeadList((&block->nmb_packetlist));
 		p = CONTAINING_RECORD(l, ndis_packet, np_list);
 		InitializeListHead((&p->np_list));
 
 		priv = (ndis_ethpriv *)&p->np_protocolreserved;
 		m = p->np_m0;
 		p->np_softc = sc;
 		p->np_m0 = NULL;
 
 		KeReleaseSpinLockFromDpcLevel(&block->nmb_lock);
 
 		status = MSCALL6(sc->ndis_chars->nmc_transferdata_func,
 		    p, &p->np_private.npp_totlen, block, priv->nep_ctx,
 		    m->m_len, m->m_pkthdr.len - m->m_len);
 
 		KeAcquireSpinLockAtDpcLevel(&block->nmb_lock);
 
 		/*
 		 * If status is NDIS_STATUS_PENDING, do nothing and
 		 * wait for a callback to the ndis_rxeof_xfr_done()
 		 * handler.
 	 	 */
 
 		m->m_len = m->m_pkthdr.len;
 		m->m_pkthdr.rcvif = ifp;
 
 		if (status == NDIS_STATUS_SUCCESS) {
 			IoFreeMdl(p->np_private.npp_head);
 			NdisFreePacket(p);
 			KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock);
 			mbufq_enqueue(&sc->ndis_rxqueue, m);
 			KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock);
 			IoQueueWorkItem(sc->ndis_inputitem,
 			    (io_workitem_func)ndis_inputtask_wrap,
 			    WORKQUEUE_CRITICAL, sc);
 		}
 
 		if (status == NDIS_STATUS_FAILURE)
 			m_freem(m);
 
 		/* Advance to next packet */
 		l = block->nmb_packetlist.nle_flink;
 	}
 
 	KeReleaseSpinLockFromDpcLevel(&block->nmb_lock);
 }
 
 /*
  * NdisMTransferDataComplete() handler, runs at DISPATCH_LEVEL.
  */
 static void
 ndis_rxeof_xfr_done(adapter, packet, status, len)
 	ndis_handle		adapter;
 	ndis_packet		*packet;
 	uint32_t		status;
 	uint32_t		len;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct ifnet		*ifp;
 	struct mbuf		*m;
 
 	block = adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	ifp = sc->ifp;
 
 	m = packet->np_m0;
 	IoFreeMdl(packet->np_private.npp_head);
 	NdisFreePacket(packet);
 
 	if (status != NDIS_STATUS_SUCCESS) {
 		m_freem(m);
 		return;
 	}
 
 	m->m_len = m->m_pkthdr.len;
 	m->m_pkthdr.rcvif = ifp;
 	KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock);
 	mbufq_enqueue(&sc->ndis_rxqueue, m);
 	KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock);
 	IoQueueWorkItem(sc->ndis_inputitem,
 	    (io_workitem_func)ndis_inputtask_wrap,
 	    WORKQUEUE_CRITICAL, sc);
 }
 /*
  * A frame has been uploaded: pass the resulting mbuf chain up to
  * the higher level protocols.
  *
  * When handling received NDIS packets, the 'status' field in the
  * out-of-band portion of the ndis_packet has special meaning. In the
  * most common case, the underlying NDIS driver will set this field
  * to NDIS_STATUS_SUCCESS, which indicates that it's ok for us to
  * take possession of it. We then change the status field to
  * NDIS_STATUS_PENDING to tell the driver that we now own the packet,
  * and that we will return it at some point in the future via the
  * return packet handler.
  *
  * If the driver hands us a packet with a status of NDIS_STATUS_RESOURCES,
  * this means the driver is running out of packet/buffer resources and
  * wants to maintain ownership of the packet. In this case, we have to
  * copy the packet data into local storage and let the driver keep the
  * packet.
  */
 static void
 ndis_rxeof(adapter, packets, pktcnt)
 	ndis_handle		adapter;
 	ndis_packet		**packets;
 	uint32_t		pktcnt;
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	ndis_packet		*p;
 	uint32_t		s;
 	ndis_tcpip_csum		*csum;
 	struct ifnet		*ifp;
 	struct mbuf		*m0, *m;
 	int			i;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	ifp = sc->ifp;
 
 	/*
 	 * There's a slim chance the driver may indicate some packets
 	 * before we're completely ready to handle them. If we detect this,
 	 * we need to return them to the miniport and ignore them.
 	 */
         if (!sc->ndis_running) {
 		for (i = 0; i < pktcnt; i++) {
 			p = packets[i];
 			if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) {
 				p->np_refcnt++;
 				ndis_return_packet(p);
 			}
 		}
 		return;
         }
 
 	for (i = 0; i < pktcnt; i++) {
 		p = packets[i];
 		/* Stash the softc here so ptom can use it. */
 		p->np_softc = sc;
 		if (ndis_ptom(&m0, p)) {
 			device_printf(sc->ndis_dev, "ptom failed\n");
 			if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS)
 				ndis_return_packet(p);
 		} else {
 #ifdef notdef
 			if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) {
 				m = m_dup(m0, M_NOWAIT);
 				/*
 				 * NOTE: we want to destroy the mbuf here, but
 				 * we don't actually want to return it to the
 				 * driver via the return packet handler. By
 				 * bumping np_refcnt, we can prevent the
 				 * ndis_return_packet() routine from actually
 				 * doing anything.
 				 */
 				p->np_refcnt++;
 				m_freem(m0);
 				if (m == NULL)
 					if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				else
 					m0 = m;
 			} else
 				p->np_oob.npo_status = NDIS_STATUS_PENDING;
 #endif
 			m = m_dup(m0, M_NOWAIT);
 			if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES)
 				p->np_refcnt++;
 			else
 				p->np_oob.npo_status = NDIS_STATUS_PENDING;
 			m_freem(m0);
 			if (m == NULL) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				continue;
 			}
 			m0 = m;
 			m0->m_pkthdr.rcvif = ifp;
 
 			/* Deal with checksum offload. */
 
 			if (ifp->if_capenable & IFCAP_RXCSUM &&
 			    p->np_ext.npe_info[ndis_tcpipcsum_info] != NULL) {
 				s = (uintptr_t)
 			 	    p->np_ext.npe_info[ndis_tcpipcsum_info];
 				csum = (ndis_tcpip_csum *)&s;
 				if (csum->u.ntc_rxflags &
 				    NDIS_RXCSUM_IP_PASSED)
 					m0->m_pkthdr.csum_flags |=
 					    CSUM_IP_CHECKED|CSUM_IP_VALID;
 				if (csum->u.ntc_rxflags &
 				    (NDIS_RXCSUM_TCP_PASSED |
 				    NDIS_RXCSUM_UDP_PASSED)) {
 					m0->m_pkthdr.csum_flags |=
 					    CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
 					m0->m_pkthdr.csum_data = 0xFFFF;
 				}
 			}
 
 			KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock);
 			mbufq_enqueue(&sc->ndis_rxqueue, m0);
 			KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock);
 			IoQueueWorkItem(sc->ndis_inputitem,
 			    (io_workitem_func)ndis_inputtask_wrap,
 			    WORKQUEUE_CRITICAL, sc);
 		}
 	}
 }
 
 /*
  * This routine is run at PASSIVE_LEVEL. We use this routine to pass
  * packets into the stack in order to avoid calling (*ifp->if_input)()
  * with any locks held (at DISPATCH_LEVEL, we'll be holding the
  * 'dispatch level' per-cpu sleep lock).
  */
 static void
 ndis_inputtask(device_object *dobj, void *arg)
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc = arg;
 	struct mbuf		*m;
 	uint8_t			irql;
 
 	block = dobj->do_devext;
 
 	KeAcquireSpinLock(&sc->ndis_rxlock, &irql);
 	while ((m = mbufq_dequeue(&sc->ndis_rxqueue)) != NULL) {
 		KeReleaseSpinLock(&sc->ndis_rxlock, irql);
 		if ((sc->ndis_80211 != 0)) {
 			struct ieee80211com *ic = &sc->ndis_ic;
 			struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 			if (vap != NULL)
 				vap->iv_deliver_data(vap, vap->iv_bss, m);
 		} else {
 			struct ifnet *ifp = sc->ifp;
 
 			(*ifp->if_input)(ifp, m);
 		}
 		KeAcquireSpinLock(&sc->ndis_rxlock, &irql);
 	}
 	KeReleaseSpinLock(&sc->ndis_rxlock, irql);
 }
 
 /*
  * A frame was downloaded to the chip. It's safe for us to clean up
  * the list buffers.
  */
 static void
 ndis_txeof(adapter, packet, status)
 	ndis_handle		adapter;
 	ndis_packet		*packet;
 	ndis_status		status;
 
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	struct ifnet		*ifp;
 	int			idx;
 	struct mbuf		*m;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	ifp = sc->ifp;
 
 	m = packet->np_m0;
 	idx = packet->np_txidx;
 	if (sc->ndis_sc)
 		bus_dmamap_unload(sc->ndis_ttag, sc->ndis_tmaps[idx]);
 
 	ndis_free_packet(packet);
 	m_freem(m);
 
 	NDIS_LOCK(sc);
 	sc->ndis_txarray[idx] = NULL;
 	sc->ndis_txpending++;
 
 	if (!sc->ndis_80211) {
 		struct ifnet		*ifp = sc->ifp;
 		if (status == NDIS_STATUS_SUCCESS)
 			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		else
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	}
 	sc->ndis_tx_timer = 0;
 
 	NDIS_UNLOCK(sc);
 
 	if (!sc->ndis_80211)
 		IoQueueWorkItem(sc->ndis_startitem,
 		    (io_workitem_func)ndis_ifstarttask_wrap,
 		    WORKQUEUE_CRITICAL, sc);
 	DPRINTF(("%s: ndis_ifstarttask_wrap sc=%p\n", __func__, sc));
 }
 
 static void
 ndis_linksts(adapter, status, sbuf, slen)
 	ndis_handle		adapter;
 	ndis_status		status;
 	void			*sbuf;
 	uint32_t		slen;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 
 	block = adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	sc->ndis_sts = status;
 
 	/* Event list is all full up, drop this one. */
 
 	NDIS_LOCK(sc);
 	if (sc->ndis_evt[sc->ndis_evtpidx].ne_sts) {
 		NDIS_UNLOCK(sc);
 		return;
 	}
 
 	/* Cache the event. */
 
 	if (slen) {
 		sc->ndis_evt[sc->ndis_evtpidx].ne_buf = malloc(slen,
 		    M_TEMP, M_NOWAIT);
 		if (sc->ndis_evt[sc->ndis_evtpidx].ne_buf == NULL) {
 			NDIS_UNLOCK(sc);
 			return;
 		}
 		bcopy((char *)sbuf,
 		    sc->ndis_evt[sc->ndis_evtpidx].ne_buf, slen);
 	}
 	sc->ndis_evt[sc->ndis_evtpidx].ne_sts = status;
 	sc->ndis_evt[sc->ndis_evtpidx].ne_len = slen;
 	NDIS_EVTINC(sc->ndis_evtpidx);
 	NDIS_UNLOCK(sc);
 }
 
 static void
 ndis_linksts_done(adapter)
 	ndis_handle		adapter;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct ifnet		*ifp;
 
 	block = adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	ifp = sc->ifp;
 
 	if (!NDIS_INITIALIZED(sc))
 		return;
 
 	switch (sc->ndis_sts) {
 	case NDIS_STATUS_MEDIA_CONNECT:
 		IoQueueWorkItem(sc->ndis_tickitem, 
 		    (io_workitem_func)ndis_ticktask_wrap,
 		    WORKQUEUE_CRITICAL, sc);
 		if (!sc->ndis_80211)
 			IoQueueWorkItem(sc->ndis_startitem,
 			    (io_workitem_func)ndis_ifstarttask_wrap,
 			    WORKQUEUE_CRITICAL, sc);
 		break;
 	case NDIS_STATUS_MEDIA_DISCONNECT:
 		if (sc->ndis_link)
 			IoQueueWorkItem(sc->ndis_tickitem,
 		    	    (io_workitem_func)ndis_ticktask_wrap,
 			    WORKQUEUE_CRITICAL, sc);
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 ndis_tick(xsc)
 	void			*xsc;
 {
 	struct ndis_softc	*sc;
 
 	sc = xsc;
 
 	if (sc->ndis_hang_timer && --sc->ndis_hang_timer == 0) {
 		IoQueueWorkItem(sc->ndis_tickitem,
 		    (io_workitem_func)ndis_ticktask_wrap,
 		    WORKQUEUE_CRITICAL, sc);
 		sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs;
 	}
 
 	if (sc->ndis_tx_timer && --sc->ndis_tx_timer == 0) {
 		if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
 		device_printf(sc->ndis_dev, "watchdog timeout\n");
 
 		IoQueueWorkItem(sc->ndis_resetitem,
 		    (io_workitem_func)ndis_resettask_wrap,
 		    WORKQUEUE_CRITICAL, sc);
 		if (!sc->ndis_80211)
 			IoQueueWorkItem(sc->ndis_startitem,
 			    (io_workitem_func)ndis_ifstarttask_wrap,
 			    WORKQUEUE_CRITICAL, sc);
 	}
 
 	callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc);
 }
 
 static void
 ndis_ticktask(device_object *d, void *xsc)
 {
 	struct ndis_softc	*sc = xsc;
 	ndis_checkforhang_handler hangfunc;
 	uint8_t			rval;
 
 	NDIS_LOCK(sc);
 	if (!NDIS_INITIALIZED(sc)) {
 		NDIS_UNLOCK(sc);
 		return;
 	}
 	NDIS_UNLOCK(sc);
 
 	hangfunc = sc->ndis_chars->nmc_checkhang_func;
 
 	if (hangfunc != NULL) {
 		rval = MSCALL1(hangfunc,
 		    sc->ndis_block->nmb_miniportadapterctx);
 		if (rval == TRUE) {
 			ndis_reset_nic(sc);
 			return;
 		}
 	}
 
 	NDIS_LOCK(sc);
 	if (sc->ndis_link == 0 &&
 	    sc->ndis_sts == NDIS_STATUS_MEDIA_CONNECT) {
 		sc->ndis_link = 1;
 		if (sc->ndis_80211 != 0) {
 			struct ieee80211com *ic = &sc->ndis_ic;
 			struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 			if (vap != NULL) {
 				NDIS_UNLOCK(sc);
 				ndis_getstate_80211(sc);
 				ieee80211_new_state(vap, IEEE80211_S_RUN, -1);
 				NDIS_LOCK(sc);
 				if_link_state_change(vap->iv_ifp,
 				    LINK_STATE_UP);
 			}
 		} else
 			if_link_state_change(sc->ifp, LINK_STATE_UP);
 	}
 
 	if (sc->ndis_link == 1 &&
 	    sc->ndis_sts == NDIS_STATUS_MEDIA_DISCONNECT) {
 		sc->ndis_link = 0;
 		if (sc->ndis_80211 != 0) {
 			struct ieee80211com *ic = &sc->ndis_ic;
 			struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 			if (vap != NULL) {
 				NDIS_UNLOCK(sc);
 				ieee80211_new_state(vap, IEEE80211_S_SCAN, 0);
 				NDIS_LOCK(sc);
 				if_link_state_change(vap->iv_ifp,
 				    LINK_STATE_DOWN);
 			}
 		} else
 			if_link_state_change(sc->ifp, LINK_STATE_DOWN);
 	}
 
 	NDIS_UNLOCK(sc);
 }
 
 static void
 ndis_map_sclist(arg, segs, nseg, mapsize, error)
 	void			*arg;
 	bus_dma_segment_t	*segs;
 	int			nseg;
 	bus_size_t		mapsize;
 	int			error;
 
 {
 	struct ndis_sc_list	*sclist;
 	int			i;
 
 	if (error || arg == NULL)
 		return;
 
 	sclist = arg;
 
 	sclist->nsl_frags = nseg;
 
 	for (i = 0; i < nseg; i++) {
 		sclist->nsl_elements[i].nse_addr.np_quad = segs[i].ds_addr;
 		sclist->nsl_elements[i].nse_len = segs[i].ds_len;
 	}
 }
 
 static int
 ndis_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
 	const struct ieee80211_bpf_params *params)
 {
 	/* no support; just discard */
 	m_freem(m);
 	ieee80211_free_node(ni);
 	return (0);
 }
 
 static void
 ndis_update_mcast(struct ieee80211com *ic)
 {
        struct ndis_softc *sc = ic->ic_softc;
 
        ndis_setmulti(sc);
 }
 
 static void
 ndis_update_promisc(struct ieee80211com *ic)
 {
        /* not supported */
 }
 
 static void
 ndis_ifstarttask(device_object *d, void *arg)
 {
 	struct ndis_softc	*sc = arg;
 	DPRINTF(("%s: sc=%p, ifp=%p\n", __func__, sc, sc->ifp));
 	if (sc->ndis_80211)
 		return;
 
 	struct ifnet		*ifp = sc->ifp;
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		ndis_ifstart(ifp);
 }
 
 /*
  * Main transmit routine. To make NDIS drivers happy, we need to
  * transform mbuf chains into NDIS packets and feed them to the
  * send packet routines. Most drivers allow you to send several
  * packets at once (up to the maxpkts limit). Unfortunately, rather
  * that accepting them in the form of a linked list, they expect
  * a contiguous array of pointers to packets.
  *
  * For those drivers which use the NDIS scatter/gather DMA mechanism,
  * we need to perform busdma work here. Those that use map registers
  * will do the mapping themselves on a buffer by buffer basis.
  */
 static void
 ndis_ifstart(struct ifnet *ifp)
 {
 	struct ndis_softc	*sc;
 	struct mbuf		*m = NULL;
 	ndis_packet		**p0 = NULL, *p = NULL;
 	ndis_tcpip_csum		*csum;
 	int			pcnt = 0, status;
 
 	sc = ifp->if_softc;
 
 	NDIS_LOCK(sc);
 	if (!sc->ndis_link || ifp->if_drv_flags & IFF_DRV_OACTIVE) {
 		NDIS_UNLOCK(sc);
 		return;
 	}
 
 	p0 = &sc->ndis_txarray[sc->ndis_txidx];
 
 	while(sc->ndis_txpending) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL)
 			break;
 
 		NdisAllocatePacket(&status,
 		    &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool);
 
 		if (status != NDIS_STATUS_SUCCESS)
 			break;
 
 		if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) {
 			IFQ_DRV_PREPEND(&ifp->if_snd, m);
 			NDIS_UNLOCK(sc);
 			return;
 		}
 
 		/*
 		 * Save pointer to original mbuf
 		 * so we can free it later.
 		 */
 
 		p = sc->ndis_txarray[sc->ndis_txidx];
 		p->np_txidx = sc->ndis_txidx;
 		p->np_m0 = m;
 		p->np_oob.npo_status = NDIS_STATUS_PENDING;
 
 		/*
 		 * Do scatter/gather processing, if driver requested it.
 		 */
 		if (sc->ndis_sc) {
 			bus_dmamap_load_mbuf(sc->ndis_ttag,
 			    sc->ndis_tmaps[sc->ndis_txidx], m,
 			    ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT);
 			bus_dmamap_sync(sc->ndis_ttag,
 			    sc->ndis_tmaps[sc->ndis_txidx],
 			    BUS_DMASYNC_PREREAD);
 			p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist;
 		}
 
 		/* Handle checksum offload. */
 
 		if (ifp->if_capenable & IFCAP_TXCSUM &&
 		    m->m_pkthdr.csum_flags) {
 			csum = (ndis_tcpip_csum *)
 				&p->np_ext.npe_info[ndis_tcpipcsum_info];
 			csum->u.ntc_txflags = NDIS_TXCSUM_DO_IPV4;
 			if (m->m_pkthdr.csum_flags & CSUM_IP)
 				csum->u.ntc_txflags |= NDIS_TXCSUM_DO_IP;
 			if (m->m_pkthdr.csum_flags & CSUM_TCP)
 				csum->u.ntc_txflags |= NDIS_TXCSUM_DO_TCP;
 			if (m->m_pkthdr.csum_flags & CSUM_UDP)
 				csum->u.ntc_txflags |= NDIS_TXCSUM_DO_UDP;
 			p->np_private.npp_flags = NDIS_PROTOCOL_ID_TCP_IP;
 		}
 
 		NDIS_INC(sc);
 		sc->ndis_txpending--;
 
 		pcnt++;
 
 		/*
 		 * If there's a BPF listener, bounce a copy of this frame
 		 * to him.
 		 */
 		if (!sc->ndis_80211)	/* XXX handle 80211 */
 			BPF_MTAP(ifp, m);
 
 		/*
 		 * The array that p0 points to must appear contiguous,
 		 * so we must not wrap past the end of sc->ndis_txarray[].
 		 * If it looks like we're about to wrap, break out here
 		 * so the this batch of packets can be transmitted, then
 		 * wait for txeof to ask us to send the rest.
 		 */
 		if (sc->ndis_txidx == 0)
 			break;
 	}
 
 	if (pcnt == 0) {
 		NDIS_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->ndis_txpending == 0)
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 
 	/*
 	 * Set a timeout in case the chip goes out to lunch.
 	 */
 	sc->ndis_tx_timer = 5;
 
 	NDIS_UNLOCK(sc);
 
 	/*
 	 * According to NDIS documentation, if a driver exports
 	 * a MiniportSendPackets() routine, we prefer that over
 	 * a MiniportSend() routine (which sends just a single
 	 * packet).
 	 */
 	if (sc->ndis_chars->nmc_sendmulti_func != NULL)
 		ndis_send_packets(sc, p0, pcnt);
 	else
 		ndis_send_packet(sc, p);
 
 	return;
 }
 
 static int
 ndis_80211transmit(struct ieee80211com *ic, struct mbuf *m)
 {
 	struct ndis_softc *sc = ic->ic_softc;
 	ndis_packet **p0 = NULL, *p = NULL;
 	int status;
 
 	NDIS_LOCK(sc);
 	if (!sc->ndis_link || !sc->ndis_running) {
 		NDIS_UNLOCK(sc);
 		return (ENXIO);
 	}
 
 	if (sc->ndis_txpending == 0) {
 		NDIS_UNLOCK(sc);
 		return (ENOBUFS);
 	}
 
 	p0 = &sc->ndis_txarray[sc->ndis_txidx];
 
 	NdisAllocatePacket(&status,
 	    &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool);
 
 	if (status != NDIS_STATUS_SUCCESS) {
 		NDIS_UNLOCK(sc);
 		return (ENOBUFS);
 	}
 
 	if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) {
 		NDIS_UNLOCK(sc);
 		return (ENOBUFS);
 	}
 
 	/*
 	 * Save pointer to original mbuf
 	 * so we can free it later.
 	 */
 
 	p = sc->ndis_txarray[sc->ndis_txidx];
 	p->np_txidx = sc->ndis_txidx;
 	p->np_m0 = m;
 	p->np_oob.npo_status = NDIS_STATUS_PENDING;
 
 	/*
 	 * Do scatter/gather processing, if driver requested it.
 	 */
 	if (sc->ndis_sc) {
 		bus_dmamap_load_mbuf(sc->ndis_ttag,
 		    sc->ndis_tmaps[sc->ndis_txidx], m,
 		    ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT);
 		bus_dmamap_sync(sc->ndis_ttag,
 		    sc->ndis_tmaps[sc->ndis_txidx],
 		    BUS_DMASYNC_PREREAD);
 		p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist;
 	}
 
 	NDIS_INC(sc);
 	sc->ndis_txpending--;
 
 	/*
 	 * Set a timeout in case the chip goes out to lunch.
 	 */
 	sc->ndis_tx_timer = 5;
 	NDIS_UNLOCK(sc);
 
 	/*
 	 * According to NDIS documentation, if a driver exports
 	 * a MiniportSendPackets() routine, we prefer that over
 	 * a MiniportSend() routine (which sends just a single
 	 * packet).
 	 */
 	if (sc->ndis_chars->nmc_sendmulti_func != NULL)
 		ndis_send_packets(sc, p0, 1);
 	else
 		ndis_send_packet(sc, p);
 
 	return (0);
 }
 
 static void
 ndis_80211parent(struct ieee80211com *ic)
 {
 	struct ndis_softc *sc = ic->ic_softc;
 
 	/*NDIS_LOCK(sc);*/
 	if (ic->ic_nrunning > 0) {
 		if (!sc->ndis_running)
 			ndis_init(sc);
 	} else if (sc->ndis_running)
 		ndis_stop(sc);
 	/*NDIS_UNLOCK(sc);*/
 }
 
 static void
 ndis_init(void *xsc)
 {
 	struct ndis_softc	*sc = xsc;
 	int			i, len, error;
 
 	/*
 	 * Avoid reintializing the link unnecessarily.
 	 * This should be dealt with in a better way by
 	 * fixing the upper layer modules so they don't
 	 * call ifp->if_init() quite as often.
 	 */
 	if (sc->ndis_link)
 		return;
 
 	/*
 	 * Cancel pending I/O and free all RX/TX buffers.
 	 */
 	ndis_stop(sc);
 
 	if (!(sc->ndis_iftype == PNPBus && ndisusb_halt == 0)) {
 		error = ndis_init_nic(sc);
 		if (error != 0) {
 			device_printf(sc->ndis_dev,
 			    "failed to initialize the device: %d\n", error);
 			return;
 		}
 	}
 
 	/* Program the packet filter */
 	sc->ndis_filter = NDIS_PACKET_TYPE_DIRECTED |
 	    NDIS_PACKET_TYPE_BROADCAST;
 
 	if (sc->ndis_80211) {
 		struct ieee80211com *ic = &sc->ndis_ic;
 
 		if (ic->ic_promisc > 0)
 			sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS;
 	} else {
 		struct ifnet *ifp = sc->ifp;
 
 		if (ifp->if_flags & IFF_PROMISC)
 			sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS;
 	}
 
 	len = sizeof(sc->ndis_filter);
 
 	error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER,
 	    &sc->ndis_filter, &len);
 
 	if (error)
 		device_printf(sc->ndis_dev, "set filter failed: %d\n", error);
 
 	/*
 	 * Set lookahead.
  	 */
 	if (sc->ndis_80211)
 		i = ETHERMTU;
 	else
 		i = sc->ifp->if_mtu;
 	len = sizeof(i);
 	ndis_set_info(sc, OID_GEN_CURRENT_LOOKAHEAD, &i, &len);
 
 	/*
 	 * Program the multicast filter, if necessary.
 	 */
 	ndis_setmulti(sc);
 
 	/* Setup task offload. */
 	ndis_set_offload(sc);
 
 	NDIS_LOCK(sc);
 
 	sc->ndis_txidx = 0;
 	sc->ndis_txpending = sc->ndis_maxpkts;
 	sc->ndis_link = 0;
 
 	if (!sc->ndis_80211) {
 		if_link_state_change(sc->ifp, LINK_STATE_UNKNOWN);
 		sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	}
 
 	sc->ndis_tx_timer = 0;
 
 	/*
 	 * Some drivers don't set this value. The NDIS spec says
 	 * the default checkforhang timeout is "approximately 2
 	 * seconds." We use 3 seconds, because it seems for some
 	 * drivers, exactly 2 seconds is too fast.
 	 */
 	if (sc->ndis_block->nmb_checkforhangsecs == 0)
 		sc->ndis_block->nmb_checkforhangsecs = 3;
 
 	sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs;
 	callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc);
 	sc->ndis_running = 1;
 	NDIS_UNLOCK(sc);
 
 	/* XXX force handling */
 	if (sc->ndis_80211)
 		ieee80211_start_all(&sc->ndis_ic);	/* start all vap's */
 }
 
 /*
  * Set media options.
  */
 static int
 ndis_ifmedia_upd(ifp)
 	struct ifnet		*ifp;
 {
 	struct ndis_softc		*sc;
 
 	sc = ifp->if_softc;
 
 	if (NDIS_INITIALIZED(sc))
 		ndis_init(sc);
 
 	return (0);
 }
 
 /*
  * Report current media status.
  */
 static void
 ndis_ifmedia_sts(ifp, ifmr)
 	struct ifnet		*ifp;
 	struct ifmediareq	*ifmr;
 {
 	struct ndis_softc	*sc;
 	uint32_t		media_info;
 	ndis_media_state	linkstate;
 	int			len;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 	sc = ifp->if_softc;
 
 	if (!NDIS_INITIALIZED(sc))
 		return;
 
 	len = sizeof(linkstate);
 	ndis_get_info(sc, OID_GEN_MEDIA_CONNECT_STATUS,
 	    (void *)&linkstate, &len);
 
 	len = sizeof(media_info);
 	ndis_get_info(sc, OID_GEN_LINK_SPEED,
 	    (void *)&media_info, &len);
 
 	if (linkstate == nmc_connected)
 		ifmr->ifm_status |= IFM_ACTIVE;
 
 	switch (media_info) {
 	case 100000:
 		ifmr->ifm_active |= IFM_10_T;
 		break;
 	case 1000000:
 		ifmr->ifm_active |= IFM_100_TX;
 		break;
 	case 10000000:
 		ifmr->ifm_active |= IFM_1000_T;
 		break;
 	default:
 		device_printf(sc->ndis_dev, "unknown speed: %d\n", media_info);
 		break;
 	}
 }
 
 static int
 ndis_set_cipher(struct ndis_softc *sc, int cipher)
 {
 	struct ieee80211com	*ic = &sc->ndis_ic;
 	int			rval = 0, len;
 	uint32_t		arg, save;
 
 	len = sizeof(arg);
 
 	if (cipher == WPA_CSE_WEP40 || cipher == WPA_CSE_WEP104) {
 		if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_WEP))
 			return (ENOTSUP);
 		arg = NDIS_80211_WEPSTAT_ENC1ENABLED;
 	}
 
 	if (cipher == WPA_CSE_TKIP) {
 		if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP))
 			return (ENOTSUP);
 		arg = NDIS_80211_WEPSTAT_ENC2ENABLED;
 	}
 
 	if (cipher == WPA_CSE_CCMP) {
 		if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_AES_CCM))
 			return (ENOTSUP);
 		arg = NDIS_80211_WEPSTAT_ENC3ENABLED;
 	}
 
 	DPRINTF(("Setting cipher to %d\n", arg));
 	save = arg;
 	rval = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len);
 
 	if (rval)
 		return (rval);
 
 	/* Check that the cipher was set correctly. */
 
 	len = sizeof(save);
 	rval = ndis_get_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len);
 
 	if (rval != 0 || arg != save)
 		return (ENODEV);
 
 	return (0);
 }
 
 /*
  * WPA is hairy to set up. Do the work in a separate routine
  * so we don't clutter the setstate function too much.
  * Important yet undocumented fact: first we have to set the
  * authentication mode, _then_ we enable the ciphers. If one
  * of the WPA authentication modes isn't enabled, the driver
  * might not permit the TKIP or AES ciphers to be selected.
  */
 static int
 ndis_set_wpa(sc, ie, ielen)
 	struct ndis_softc	*sc;
 	void			*ie;
 	int			ielen;
 {
 	struct ieee80211_ie_wpa	*w;
 	struct ndis_ie		*n;
 	char			*pos;
 	uint32_t		arg;
 	int			i;
 
 	/*
 	 * Apparently, the only way for us to know what ciphers
 	 * and key management/authentication mode to use is for
 	 * us to inspect the optional information element (IE)
 	 * stored in the 802.11 state machine. This IE should be
 	 * supplied by the WPA supplicant.
 	 */
 
 	w = (struct ieee80211_ie_wpa *)ie;
 
 	/* Check for the right kind of IE. */
 	if (w->wpa_id != IEEE80211_ELEMID_VENDOR) {
 		DPRINTF(("Incorrect IE type %d\n", w->wpa_id));
 		return (EINVAL);
 	}
 
 	/* Skip over the ucast cipher OIDs. */
 	pos = (char *)&w->wpa_uciphers[0];
 	pos += w->wpa_uciphercnt * sizeof(struct ndis_ie);
 
 	/* Skip over the authmode count. */
 	pos += sizeof(u_int16_t);
 
 	/*
 	 * Check for the authentication modes. I'm
 	 * pretty sure there's only supposed to be one.
 	 */
 
 	n = (struct ndis_ie *)pos;
 	if (n->ni_val == WPA_ASE_NONE)
 		arg = NDIS_80211_AUTHMODE_WPANONE;
 
 	if (n->ni_val == WPA_ASE_8021X_UNSPEC)
 		arg = NDIS_80211_AUTHMODE_WPA;
 
 	if (n->ni_val == WPA_ASE_8021X_PSK)
 		arg = NDIS_80211_AUTHMODE_WPAPSK;
 
 	DPRINTF(("Setting WPA auth mode to %d\n", arg));
 	i = sizeof(arg);
 	if (ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i))
 		return (ENOTSUP);
 	i = sizeof(arg);
 	ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i);
 
 	/* Now configure the desired ciphers. */
 
 	/* First, set up the multicast group cipher. */
 	n = (struct ndis_ie *)&w->wpa_mcipher[0];
 
 	if (ndis_set_cipher(sc, n->ni_val))
 		return (ENOTSUP);
 
 	/* Now start looking around for the unicast ciphers. */
 	pos = (char *)&w->wpa_uciphers[0];
 	n = (struct ndis_ie *)pos;
 
 	for (i = 0; i < w->wpa_uciphercnt; i++) {
 		if (ndis_set_cipher(sc, n->ni_val))
 			return (ENOTSUP);
 		n++;
 	}
 
 	return (0);
 }
 
 static void
 ndis_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 {
 	struct ieee80211vap *vap = ifp->if_softc;
 	struct ndis_softc *sc = vap->iv_ic->ic_softc;
 	uint32_t txrate;
 	int len;
 
 	if (!NDIS_INITIALIZED(sc))
 		return;
 
 	len = sizeof(txrate);
 	if (ndis_get_info(sc, OID_GEN_LINK_SPEED, &txrate, &len) == 0)
 		vap->iv_bss->ni_txrate = txrate / 5000;
 	ieee80211_media_status(ifp, imr);
 }
 
 static void
 ndis_setstate_80211(struct ndis_softc *sc)
 {
 	struct ieee80211com	*ic = &sc->ndis_ic;
 	struct ieee80211vap	*vap = TAILQ_FIRST(&ic->ic_vaps);
 	ndis_80211_macaddr	bssid;
 	ndis_80211_config	config;
 	int			rval = 0, len;
 	uint32_t		arg;
 
 	if (!NDIS_INITIALIZED(sc)) {
 		DPRINTF(("%s: NDIS not initialized\n", __func__));
 		return;
 	}
 
 	/* Disassociate and turn off radio. */
 	len = sizeof(arg);
 	arg = 1;
 	ndis_set_info(sc, OID_802_11_DISASSOCIATE, &arg, &len);
 
 	/* Set network infrastructure mode. */
 
 	len = sizeof(arg);
 	if (ic->ic_opmode == IEEE80211_M_IBSS)
 		arg = NDIS_80211_NET_INFRA_IBSS;
 	else
 		arg = NDIS_80211_NET_INFRA_BSS;
 
 	rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len);
 
 	if (rval)
 		device_printf (sc->ndis_dev, "set infra failed: %d\n", rval);
 
 	/* Set power management */
 	len = sizeof(arg);
 	if (vap->iv_flags & IEEE80211_F_PMGTON)
 		arg = NDIS_80211_POWERMODE_FAST_PSP;
 	else
 		arg = NDIS_80211_POWERMODE_CAM;
 	ndis_set_info(sc, OID_802_11_POWER_MODE, &arg, &len);
 
 	/* Set TX power */
 	if ((ic->ic_caps & IEEE80211_C_TXPMGT) &&
 	    ic->ic_txpowlimit < nitems(dBm2mW)) {
 		arg = dBm2mW[ic->ic_txpowlimit];
 		len = sizeof(arg);
 		ndis_set_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len);
 	}
 
 	/*
 	 * Default encryption mode to off, authentication
 	 * to open and privacy to 'accept everything.'
 	 */
 	len = sizeof(arg);
 	arg = NDIS_80211_WEPSTAT_DISABLED;
 	ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len);
 
 	len = sizeof(arg);
 	arg = NDIS_80211_AUTHMODE_OPEN;
 	ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len);
 
 	/*
 	 * Note that OID_802_11_PRIVACY_FILTER is optional:
 	 * not all drivers implement it.
 	 */
 	len = sizeof(arg);
 	arg = NDIS_80211_PRIVFILT_8021XWEP;
 	ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len);
 
 	len = sizeof(config);
 	bzero((char *)&config, len);
 	config.nc_length = len;
 	config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh);
 	rval = ndis_get_info(sc, OID_802_11_CONFIGURATION, &config, &len); 
 
 	/*
 	 * Some drivers expect us to initialize these values, so
 	 * provide some defaults.
 	 */
 
 	if (config.nc_beaconperiod == 0)
 		config.nc_beaconperiod = 100;
 	if (config.nc_atimwin == 0)
 		config.nc_atimwin = 100;
 	if (config.nc_fhconfig.ncf_dwelltime == 0)
 		config.nc_fhconfig.ncf_dwelltime = 200;
 	if (rval == 0 && ic->ic_bsschan != IEEE80211_CHAN_ANYC) { 
 		int chan, chanflag;
 
 		chan = ieee80211_chan2ieee(ic, ic->ic_bsschan);
 		chanflag = config.nc_dsconfig > 2500000 ? IEEE80211_CHAN_2GHZ :
 		    IEEE80211_CHAN_5GHZ;
 		if (chan != ieee80211_mhz2ieee(config.nc_dsconfig / 1000, 0)) {
 			config.nc_dsconfig =
 				ic->ic_bsschan->ic_freq * 1000;
 			len = sizeof(config);
 			config.nc_length = len;
 			config.nc_fhconfig.ncf_length =
 			    sizeof(ndis_80211_config_fh);
 			DPRINTF(("Setting channel to %ukHz\n", config.nc_dsconfig));
 			rval = ndis_set_info(sc, OID_802_11_CONFIGURATION,
 			    &config, &len);
 			if (rval)
 				device_printf(sc->ndis_dev, "couldn't change "
 				    "DS config to %ukHz: %d\n",
 				    config.nc_dsconfig, rval);
 		}
 	} else if (rval)
 		device_printf(sc->ndis_dev, "couldn't retrieve "
 		    "channel info: %d\n", rval);
 
 	/* Set the BSSID to our value so the driver doesn't associate */
 	len = IEEE80211_ADDR_LEN;
 	bcopy(vap->iv_myaddr, bssid, len);
 	DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":"));
 	rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len);
 	if (rval)
 		device_printf(sc->ndis_dev,
 		    "setting BSSID failed: %d\n", rval);
 }
 
 static void
 ndis_auth_and_assoc(struct ndis_softc *sc, struct ieee80211vap *vap)
 {
 	struct ieee80211_node	*ni = vap->iv_bss;
 	ndis_80211_ssid		ssid;
 	ndis_80211_macaddr	bssid;
 	ndis_80211_wep		wep;
 	int			i, rval = 0, len, error;
 	uint32_t		arg;
 
 	if (!NDIS_INITIALIZED(sc)) {
 		DPRINTF(("%s: NDIS not initialized\n", __func__));
 		return;
 	}
 
 	/* Initial setup */
 	ndis_setstate_80211(sc);
 
 	/* Set network infrastructure mode. */
 
 	len = sizeof(arg);
 	if (vap->iv_opmode == IEEE80211_M_IBSS)
 		arg = NDIS_80211_NET_INFRA_IBSS;
 	else
 		arg = NDIS_80211_NET_INFRA_BSS;
 
 	rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len);
 
 	if (rval)
 		device_printf (sc->ndis_dev, "set infra failed: %d\n", rval);
 
 	/* Set RTS threshold */
 
 	len = sizeof(arg);
 	arg = vap->iv_rtsthreshold;
 	ndis_set_info(sc, OID_802_11_RTS_THRESHOLD, &arg, &len);
 
 	/* Set fragmentation threshold */
 
 	len = sizeof(arg);
 	arg = vap->iv_fragthreshold;
 	ndis_set_info(sc, OID_802_11_FRAGMENTATION_THRESHOLD, &arg, &len);
 
 	/* Set WEP */
 
 	if (vap->iv_flags & IEEE80211_F_PRIVACY &&
 	    !(vap->iv_flags & IEEE80211_F_WPA)) {
 		int keys_set = 0;
 
 		if (ni->ni_authmode == IEEE80211_AUTH_SHARED) {
 			len = sizeof(arg);
 			arg = NDIS_80211_AUTHMODE_SHARED;
 			DPRINTF(("Setting shared auth\n"));
 			ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE,
 			    &arg, &len);
 		}
 		for (i = 0; i < IEEE80211_WEP_NKID; i++) {
 			if (vap->iv_nw_keys[i].wk_keylen) {
 				if (vap->iv_nw_keys[i].wk_cipher->ic_cipher !=
 				    IEEE80211_CIPHER_WEP)
 					continue;
 				bzero((char *)&wep, sizeof(wep));
 				wep.nw_keylen = vap->iv_nw_keys[i].wk_keylen;
 
 				/*
 				 * 5, 13 and 16 are the only valid
 				 * key lengths. Anything in between
 				 * will be zero padded out to the
 				 * next highest boundary.
 				 */
 				if (vap->iv_nw_keys[i].wk_keylen < 5)
 					wep.nw_keylen = 5;
 				else if (vap->iv_nw_keys[i].wk_keylen > 5 &&
 				     vap->iv_nw_keys[i].wk_keylen < 13)
 					wep.nw_keylen = 13;
 				else if (vap->iv_nw_keys[i].wk_keylen > 13 &&
 				     vap->iv_nw_keys[i].wk_keylen < 16)
 					wep.nw_keylen = 16;
 
 				wep.nw_keyidx = i;
 				wep.nw_length = (sizeof(uint32_t) * 3)
 				    + wep.nw_keylen;
 				if (i == vap->iv_def_txkey)
 					wep.nw_keyidx |= NDIS_80211_WEPKEY_TX;
 				bcopy(vap->iv_nw_keys[i].wk_key,
 				    wep.nw_keydata, wep.nw_length);
 				len = sizeof(wep);
 				DPRINTF(("Setting WEP key %d\n", i));
 				rval = ndis_set_info(sc,
 				    OID_802_11_ADD_WEP, &wep, &len);
 				if (rval)
 					device_printf(sc->ndis_dev,
 					    "set wepkey failed: %d\n", rval);
 				keys_set++;
 			}
 		}
 		if (keys_set) {
 			DPRINTF(("Setting WEP on\n"));
 			arg = NDIS_80211_WEPSTAT_ENABLED;
 			len = sizeof(arg);
 			rval = ndis_set_info(sc,
 			    OID_802_11_WEP_STATUS, &arg, &len);
 			if (rval)
 				device_printf(sc->ndis_dev,
 				    "enable WEP failed: %d\n", rval);
 			if (vap->iv_flags & IEEE80211_F_DROPUNENC)
 				arg = NDIS_80211_PRIVFILT_8021XWEP;
 			else
 				arg = NDIS_80211_PRIVFILT_ACCEPTALL;
 
 			len = sizeof(arg);
 			ndis_set_info(sc,
 			    OID_802_11_PRIVACY_FILTER, &arg, &len);
 		}
 	}
 
 	/* Set up WPA. */
 	if ((vap->iv_flags & IEEE80211_F_WPA) &&
 	    vap->iv_appie_assocreq != NULL) {
 		struct ieee80211_appie *ie = vap->iv_appie_assocreq;
 		error = ndis_set_wpa(sc, ie->ie_data, ie->ie_len);
 		if (error != 0)
 			device_printf(sc->ndis_dev, "WPA setup failed\n");
 	}
 
 #ifdef notyet
 	/* Set network type. */
 
 	arg = 0;
 
 	switch (vap->iv_curmode) {
 	case IEEE80211_MODE_11A:
 		arg = NDIS_80211_NETTYPE_11OFDM5;
 		break;
 	case IEEE80211_MODE_11B:
 		arg = NDIS_80211_NETTYPE_11DS;
 		break;
 	case IEEE80211_MODE_11G:
 		arg = NDIS_80211_NETTYPE_11OFDM24;
 		break;
 	default:
 		device_printf(sc->ndis_dev, "unknown mode: %d\n",
 		    vap->iv_curmode);
 	}
 
 	if (arg) {
 		DPRINTF(("Setting network type to %d\n", arg));
 		len = sizeof(arg);
 		rval = ndis_set_info(sc, OID_802_11_NETWORK_TYPE_IN_USE,
 		    &arg, &len);
 		if (rval)
 			device_printf(sc->ndis_dev,
 			    "set nettype failed: %d\n", rval);
 	}
 #endif
 
 	/*
 	 * If the user selected a specific BSSID, try
 	 * to use that one. This is useful in the case where
 	 * there are several APs in range with the same network
 	 * name. To delete the BSSID, we use the broadcast
 	 * address as the BSSID.
 	 * Note that some drivers seem to allow setting a BSSID
 	 * in ad-hoc mode, which has the effect of forcing the
 	 * NIC to create an ad-hoc cell with a specific BSSID,
 	 * instead of a randomly chosen one. However, the net80211
 	 * code makes the assumtion that the BSSID setting is invalid
 	 * when you're in ad-hoc mode, so we don't allow that here.
 	 */
 
 	len = IEEE80211_ADDR_LEN;
 	if (vap->iv_flags & IEEE80211_F_DESBSSID &&
 	    vap->iv_opmode != IEEE80211_M_IBSS)
 		bcopy(ni->ni_bssid, bssid, len);
 	else
 		bcopy(ieee80211broadcastaddr, bssid, len);
 
 	DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":"));
 	rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len);
 	if (rval)
 		device_printf(sc->ndis_dev,
 		    "setting BSSID failed: %d\n", rval);
 
 	/* Set SSID -- always do this last. */
 
 #ifdef NDIS_DEBUG
 	if (ndis_debug > 0) {
 		printf("Setting ESSID to ");
 		ieee80211_print_essid(ni->ni_essid, ni->ni_esslen);
 		printf("\n");
 	}
 #endif
 
 	len = sizeof(ssid);
 	bzero((char *)&ssid, len);
 	ssid.ns_ssidlen = ni->ni_esslen;
 	if (ssid.ns_ssidlen == 0) {
 		ssid.ns_ssidlen = 1;
 	} else
 		bcopy(ni->ni_essid, ssid.ns_ssid, ssid.ns_ssidlen);
 
 	rval = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len);
 
 	if (rval)
 		device_printf (sc->ndis_dev, "set ssid failed: %d\n", rval);
 
 	return;
 }
 
 static int
 ndis_get_bssid_list(sc, bl)
 	struct ndis_softc	*sc;
 	ndis_80211_bssid_list_ex	**bl;
 {
 	int	len, error;
 
 	len = sizeof(uint32_t) + (sizeof(ndis_wlan_bssid_ex) * 16);
 	*bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (*bl == NULL)
 		return (ENOMEM);
 
 	error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len);
 	if (error == ENOSPC) {
 		free(*bl, M_DEVBUF);
 		*bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (*bl == NULL)
 			return (ENOMEM);
 
 		error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len);
 	}
 	if (error) {
 		DPRINTF(("%s: failed to read\n", __func__));
 		free(*bl, M_DEVBUF);
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 ndis_get_assoc(struct ndis_softc *sc, ndis_wlan_bssid_ex **assoc)
 {
 	struct ieee80211com *ic = &sc->ndis_ic;
 	struct ieee80211vap     *vap;
 	struct ieee80211_node   *ni;
 	ndis_80211_bssid_list_ex	*bl;
 	ndis_wlan_bssid_ex	*bs;
 	ndis_80211_macaddr	bssid;
 	int			i, len, error;
 
 	if (!sc->ndis_link)
 		return (ENOENT);
 
 	len = sizeof(bssid);
 	error = ndis_get_info(sc, OID_802_11_BSSID, &bssid, &len);
 	if (error) {
 		device_printf(sc->ndis_dev, "failed to get bssid\n");
 		return (ENOENT);
 	}
 
 	vap = TAILQ_FIRST(&ic->ic_vaps);
 	ni = vap->iv_bss;
 
 	error = ndis_get_bssid_list(sc, &bl);
 	if (error)
 		return (error);
 
 	bs = (ndis_wlan_bssid_ex *)&bl->nblx_bssid[0];
 	for (i = 0; i < bl->nblx_items; i++) {
 		if (bcmp(bs->nwbx_macaddr, bssid, sizeof(bssid)) == 0) {
 			*assoc = malloc(bs->nwbx_len, M_TEMP, M_NOWAIT);
 			if (*assoc == NULL) {
 				free(bl, M_TEMP);
 				return (ENOMEM);
 			}
 			bcopy((char *)bs, (char *)*assoc, bs->nwbx_len);
 			free(bl, M_TEMP);
 			if (ic->ic_opmode == IEEE80211_M_STA)
 				ni->ni_associd = 1 | 0xc000; /* fake associd */
 			return (0);
 		}
 		bs = (ndis_wlan_bssid_ex *)((char *)bs + bs->nwbx_len);
 	}
 
 	free(bl, M_TEMP);
 	return (ENOENT);
 }
 
 static void
 ndis_getstate_80211(struct ndis_softc *sc)
 {
 	struct ieee80211com	*ic = &sc->ndis_ic;
 	struct ieee80211vap	*vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct ieee80211_node	*ni = vap->iv_bss;
 	ndis_wlan_bssid_ex	*bs;
 	int			rval, len, i = 0;
 	int			chanflag;
 	uint32_t		arg;
 
 	if (!NDIS_INITIALIZED(sc))
 		return;
 
 	if ((rval = ndis_get_assoc(sc, &bs)) != 0)
 		return;
 
 	/* We're associated, retrieve info on the current bssid. */
 	ic->ic_curmode = ndis_nettype_mode(bs->nwbx_nettype);
 	chanflag = ndis_nettype_chan(bs->nwbx_nettype);
 	IEEE80211_ADDR_COPY(ni->ni_bssid, bs->nwbx_macaddr);
 
 	/* Get SSID from current association info. */
 	bcopy(bs->nwbx_ssid.ns_ssid, ni->ni_essid,
 	    bs->nwbx_ssid.ns_ssidlen);
 	ni->ni_esslen = bs->nwbx_ssid.ns_ssidlen;
 
 	if (ic->ic_caps & IEEE80211_C_PMGT) {
 		len = sizeof(arg);
 		rval = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &len);
 
 		if (rval)
 			device_printf(sc->ndis_dev,
 			    "get power mode failed: %d\n", rval);
 		if (arg == NDIS_80211_POWERMODE_CAM)
 			vap->iv_flags &= ~IEEE80211_F_PMGTON;
 		else
 			vap->iv_flags |= IEEE80211_F_PMGTON;
 	}
 
 	/* Get TX power */
 	if (ic->ic_caps & IEEE80211_C_TXPMGT) {
 		len = sizeof(arg);
 		ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len);
 		for (i = 0; i < nitems(dBm2mW); i++)
 			if (dBm2mW[i] >= arg)
 				break;
 		ic->ic_txpowlimit = i;
 	}
 
 	/*
 	 * Use the current association information to reflect
 	 * what channel we're on.
 	 */
 	ic->ic_curchan = ieee80211_find_channel(ic,
 	    bs->nwbx_config.nc_dsconfig / 1000, chanflag);
 	if (ic->ic_curchan == NULL)
 		ic->ic_curchan = &ic->ic_channels[0];
 	ni->ni_chan = ic->ic_curchan;
 	ic->ic_bsschan = ic->ic_curchan;
 
 	free(bs, M_TEMP);
 
 	/*
 	 * Determine current authentication mode.
 	 */
 	len = sizeof(arg);
 	rval = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len);
 	if (rval)
 		device_printf(sc->ndis_dev,
 		    "get authmode status failed: %d\n", rval);
 	else {
 		vap->iv_flags &= ~IEEE80211_F_WPA;
 		switch (arg) {
 		case NDIS_80211_AUTHMODE_OPEN:
 			ni->ni_authmode = IEEE80211_AUTH_OPEN;
 			break;
 		case NDIS_80211_AUTHMODE_SHARED:
 			ni->ni_authmode = IEEE80211_AUTH_SHARED;
 			break;
 		case NDIS_80211_AUTHMODE_AUTO:
 			ni->ni_authmode = IEEE80211_AUTH_AUTO;
 			break;
 		case NDIS_80211_AUTHMODE_WPA:
 		case NDIS_80211_AUTHMODE_WPAPSK:
 		case NDIS_80211_AUTHMODE_WPANONE:
 			ni->ni_authmode = IEEE80211_AUTH_WPA;
 			vap->iv_flags |= IEEE80211_F_WPA1;
 			break;
 		case NDIS_80211_AUTHMODE_WPA2:
 		case NDIS_80211_AUTHMODE_WPA2PSK:
 			ni->ni_authmode = IEEE80211_AUTH_WPA;
 			vap->iv_flags |= IEEE80211_F_WPA2;
 			break;
 		default:
 			ni->ni_authmode = IEEE80211_AUTH_NONE;
 			break;
 		}
 	}
 
 	len = sizeof(arg);
 	rval = ndis_get_info(sc, OID_802_11_WEP_STATUS, &arg, &len);
 
 	if (rval)
 		device_printf(sc->ndis_dev,
 		    "get wep status failed: %d\n", rval);
 
 	if (arg == NDIS_80211_WEPSTAT_ENABLED)
 		vap->iv_flags |= IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC;
 	else
 		vap->iv_flags &= ~(IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC);
 }
 
 static int
 ndis_ifioctl(ifp, command, data)
 	struct ifnet		*ifp;
 	u_long			command;
 	caddr_t			data;
 {
 	struct ndis_softc	*sc = ifp->if_softc;
 	struct ifreq		*ifr = (struct ifreq *) data;
 	int			i, error = 0;
 
 	/*NDIS_LOCK(sc);*/
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		if (ifp->if_flags & IFF_UP) {
 			if (sc->ndis_running &&
 			    ifp->if_flags & IFF_PROMISC &&
 			    !(sc->ndis_if_flags & IFF_PROMISC)) {
 				sc->ndis_filter |=
 				    NDIS_PACKET_TYPE_PROMISCUOUS;
 				i = sizeof(sc->ndis_filter);
 				error = ndis_set_info(sc,
 				    OID_GEN_CURRENT_PACKET_FILTER,
 				    &sc->ndis_filter, &i);
 			} else if (sc->ndis_running &&
 			    !(ifp->if_flags & IFF_PROMISC) &&
 			    sc->ndis_if_flags & IFF_PROMISC) {
 				sc->ndis_filter &=
 				    ~NDIS_PACKET_TYPE_PROMISCUOUS;
 				i = sizeof(sc->ndis_filter);
 				error = ndis_set_info(sc,
 				    OID_GEN_CURRENT_PACKET_FILTER,
 				    &sc->ndis_filter, &i);
 			} else
 				ndis_init(sc);
 		} else {
 			if (sc->ndis_running)
 				ndis_stop(sc);
 		}
 		sc->ndis_if_flags = ifp->if_flags;
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ndis_setmulti(sc);
 		error = 0;
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command);
 		break;
 	case SIOCSIFCAP:
 		ifp->if_capenable = ifr->ifr_reqcap;
 		if (ifp->if_capenable & IFCAP_TXCSUM)
 			ifp->if_hwassist = sc->ndis_hwassist;
 		else
 			ifp->if_hwassist = 0;
 		ndis_set_offload(sc);
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	/*NDIS_UNLOCK(sc);*/
 
 	return(error);
 }
 
 static int
 ndis_80211ioctl(struct ieee80211com *ic, u_long cmd, void *data)
 {
 	struct ndis_softc *sc = ic->ic_softc;
 	struct ifreq *ifr = data;
 	struct ndis_oid_data oid;
 	struct ndis_evt evt;
 	void *oidbuf = NULL;
 	int error = 0;
 
 	if ((error = priv_check(curthread, PRIV_DRIVER)) != 0)
 		return (error);
 
 	switch (cmd) {
 	case SIOCGDRVSPEC:
 	case SIOCSDRVSPEC:
 		error = copyin(ifr_data_get_ptr(ifr), &oid, sizeof(oid));
 		if (error)
 			break;
 		oidbuf = malloc(oid.len, M_TEMP, M_WAITOK | M_ZERO);
 		error = copyin((caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid),
 		    oidbuf, oid.len);
 	}
 
 	if (error) {
 		free(oidbuf, M_TEMP);
 		return (error);
 	}
 
 	switch (cmd) {
 	case SIOCGDRVSPEC:
 		error = ndis_get_info(sc, oid.oid, oidbuf, &oid.len);
 		break;
 	case SIOCSDRVSPEC:
 		error = ndis_set_info(sc, oid.oid, oidbuf, &oid.len);
 		break;
 	case SIOCGPRIVATE_0:
 		NDIS_LOCK(sc);
 		if (sc->ndis_evt[sc->ndis_evtcidx].ne_sts == 0) {
 			error = ENOENT;
 			NDIS_UNLOCK(sc);
 			break;
 		}
 		error = copyin(ifr_data_get_ptr(ifr), &evt, sizeof(evt));
 		if (error) {
 			NDIS_UNLOCK(sc);
 			break;
 		}
 		if (evt.ne_len < sc->ndis_evt[sc->ndis_evtcidx].ne_len) {
 			error = ENOSPC;
 			NDIS_UNLOCK(sc);
 			break;
 		}
 		error = copyout(&sc->ndis_evt[sc->ndis_evtcidx],
 		    ifr_data_get_ptr(ifr), sizeof(uint32_t) * 2);
 		if (error) {
 			NDIS_UNLOCK(sc);
 			break;
 		}
 		if (sc->ndis_evt[sc->ndis_evtcidx].ne_len) {
 			error = copyout(sc->ndis_evt[sc->ndis_evtcidx].ne_buf,
 			    (caddr_t)ifr_data_get_ptr(ifr) +
 			    (sizeof(uint32_t) * 2),
 			    sc->ndis_evt[sc->ndis_evtcidx].ne_len);
 			if (error) {
 				NDIS_UNLOCK(sc);
 				break;
 			}
 			free(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, M_TEMP);
 			sc->ndis_evt[sc->ndis_evtcidx].ne_buf = NULL;
 		}
 		sc->ndis_evt[sc->ndis_evtcidx].ne_len = 0;
 		sc->ndis_evt[sc->ndis_evtcidx].ne_sts = 0;
 		NDIS_EVTINC(sc->ndis_evtcidx);
 		NDIS_UNLOCK(sc);
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 
 	switch (cmd) {
 	case SIOCGDRVSPEC:
 	case SIOCSDRVSPEC:
 		error = copyout(&oid, ifr_data_get_ptr(ifr), sizeof(oid));
 		if (error)
 			break;
 		error = copyout(oidbuf,
 		    (caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), oid.len);
 	}
 
 	free(oidbuf, M_TEMP);
 
 	return (error);
 }
 
 int
 ndis_del_key(struct ieee80211vap *vap, const struct ieee80211_key *key)
 {
 	struct ndis_softc	*sc = vap->iv_ic->ic_softc;
 	ndis_80211_key		rkey;
 	int			len, error = 0;
 
 	bzero((char *)&rkey, sizeof(rkey));
 	len = sizeof(rkey);
 
 	rkey.nk_len = len;
 	rkey.nk_keyidx = key->wk_keyix;
 
 	bcopy(vap->iv_ifp->if_broadcastaddr,
 	    rkey.nk_bssid, IEEE80211_ADDR_LEN);
 
 	error = ndis_set_info(sc, OID_802_11_REMOVE_KEY, &rkey, &len);
 
 	if (error)
 		return (0);
 
 	return (1);
 }
 
 /*
  * In theory this could be called for any key, but we'll
  * only use it for WPA TKIP or AES keys. These need to be
  * set after initial authentication with the AP.
  */
 static int
 ndis_add_key(struct ieee80211vap *vap, const struct ieee80211_key *key)
 {
 	struct ndis_softc	*sc = vap->iv_ic->ic_softc;
 	ndis_80211_key		rkey;
 	int			len, error = 0;
 
 	switch (key->wk_cipher->ic_cipher) {
 	case IEEE80211_CIPHER_TKIP:
 
 		len = sizeof(ndis_80211_key);
 		bzero((char *)&rkey, sizeof(rkey));
 
 		rkey.nk_len = len;
 		rkey.nk_keylen = key->wk_keylen;
 
 		if (key->wk_flags & IEEE80211_KEY_SWMIC)
 			rkey.nk_keylen += 16;
 
 		/* key index - gets weird in NDIS */
 
 		if (key->wk_keyix != IEEE80211_KEYIX_NONE)
 			rkey.nk_keyidx = key->wk_keyix;
 		else
 			rkey.nk_keyidx = 0;
 
 		if (key->wk_flags & IEEE80211_KEY_XMIT)
 			rkey.nk_keyidx |= 1 << 31;
 
 		if (key->wk_flags & IEEE80211_KEY_GROUP) {
 			bcopy(ieee80211broadcastaddr,
 			    rkey.nk_bssid, IEEE80211_ADDR_LEN);
 		} else {
 			bcopy(vap->iv_bss->ni_bssid,
 			    rkey.nk_bssid, IEEE80211_ADDR_LEN);
 			/* pairwise key */
 			rkey.nk_keyidx |= 1 << 30;
 		}
 
 		/* need to set bit 29 based on keyrsc */
 		rkey.nk_keyrsc = key->wk_keyrsc[0];	/* XXX need tid */
 
 		if (rkey.nk_keyrsc)
 			rkey.nk_keyidx |= 1 << 29;
 
 		if (key->wk_flags & IEEE80211_KEY_SWMIC) {
 			bcopy(key->wk_key, rkey.nk_keydata, 16);
 			bcopy(key->wk_key + 24, rkey.nk_keydata + 16, 8);
 			bcopy(key->wk_key + 16, rkey.nk_keydata + 24, 8);
 		} else
 			bcopy(key->wk_key, rkey.nk_keydata, key->wk_keylen);
 
 		error = ndis_set_info(sc, OID_802_11_ADD_KEY, &rkey, &len);
 		break;
 	case IEEE80211_CIPHER_WEP:
 		error = 0;
 		break;
 	/*
 	 * I don't know how to set up keys for the AES
 	 * cipher yet. Is it the same as TKIP?
 	 */
 	case IEEE80211_CIPHER_AES_CCM:
 	default:
 		error = ENOTTY;
 		break;
 	}
 
 	/* We need to return 1 for success, 0 for failure. */
 
 	if (error)
 		return (0);
 
 	return (1);
 }
 
 static void
 ndis_resettask(d, arg)
 	device_object		*d;
 	void			*arg;
 {
 	struct ndis_softc		*sc;
 
 	sc = arg;
 	ndis_reset_nic(sc);
 }
 
 /*
  * Stop the adapter and free any mbufs allocated to the
  * RX and TX lists.
  */
 static void
 ndis_stop(struct ndis_softc *sc)
 {
 	int			i;
 
 	callout_drain(&sc->ndis_stat_callout);
 
 	NDIS_LOCK(sc);
 	sc->ndis_tx_timer = 0;
 	sc->ndis_link = 0;
 	if (!sc->ndis_80211)
 		sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	sc->ndis_running = 0;
 	NDIS_UNLOCK(sc);
 
 	if (sc->ndis_iftype != PNPBus ||
 	    (sc->ndis_iftype == PNPBus &&
 	     !(sc->ndisusb_status & NDISUSB_STATUS_DETACH) &&
 	     ndisusb_halt != 0))
 		ndis_halt_nic(sc);
 
 	NDIS_LOCK(sc);
 	for (i = 0; i < NDIS_EVENTS; i++) {
 		if (sc->ndis_evt[i].ne_sts && sc->ndis_evt[i].ne_buf != NULL) {
 			free(sc->ndis_evt[i].ne_buf, M_TEMP);
 			sc->ndis_evt[i].ne_buf = NULL;
 		}
 		sc->ndis_evt[i].ne_sts = 0;
 		sc->ndis_evt[i].ne_len = 0;
 	}
 	sc->ndis_evtcidx = 0;
 	sc->ndis_evtpidx = 0;
 	NDIS_UNLOCK(sc);
 }
 
 /*
  * Stop all chip I/O so that the kernel's probe routines don't
  * get confused by errant DMAs when rebooting.
  */
 void
 ndis_shutdown(dev)
 	device_t		dev;
 {
 	struct ndis_softc		*sc;
 
 	sc = device_get_softc(dev);
 	ndis_stop(sc);
 }
 
 static int
 ndis_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct ndis_vap *nvp = NDIS_VAP(vap);
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ndis_softc *sc = ic->ic_softc;
 	enum ieee80211_state ostate;
 
 	DPRINTF(("%s: %s -> %s\n", __func__,
 		ieee80211_state_name[vap->iv_state],
 		ieee80211_state_name[nstate]));
 
 	ostate = vap->iv_state;
 	vap->iv_state = nstate;
 
 	switch (nstate) {
 	/* pass on to net80211 */
 	case IEEE80211_S_INIT:
 	case IEEE80211_S_SCAN:
 		return nvp->newstate(vap, nstate, arg);
 	case IEEE80211_S_ASSOC:
 		if (ostate != IEEE80211_S_AUTH) {
 			IEEE80211_UNLOCK(ic);
 			ndis_auth_and_assoc(sc, vap);
 			IEEE80211_LOCK(ic);
 		}
 		break;
 	case IEEE80211_S_AUTH:
 		IEEE80211_UNLOCK(ic);
 		ndis_auth_and_assoc(sc, vap);
 		if (vap->iv_state == IEEE80211_S_AUTH) /* XXX */
 			ieee80211_new_state(vap, IEEE80211_S_ASSOC, 0);
 		IEEE80211_LOCK(ic);
 		break;
 	default:
 		break;
 	}
 	return (0);
 }
 
 static void
 ndis_scan(void *arg)
 {
 	struct ieee80211vap *vap = arg;
 
 	ieee80211_scan_done(vap);
 }
 
 static void
 ndis_scan_results(struct ndis_softc *sc)
 {
 	struct ieee80211com *ic = &sc->ndis_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	ndis_80211_bssid_list_ex *bl;
 	ndis_wlan_bssid_ex	*wb;
 	struct ieee80211_scanparams sp;
 	struct ieee80211_frame wh;
 	struct ieee80211_channel *saved_chan;
 	int i, j;
 	int rssi, noise, freq, chanflag;
 	uint8_t ssid[2+IEEE80211_NWID_LEN];
 	uint8_t rates[2+IEEE80211_RATE_MAXSIZE];
 	uint8_t *frm, *efrm;
 
 	saved_chan = ic->ic_curchan;
 	noise = -96;
 
 	if (ndis_get_bssid_list(sc, &bl))
 		return;
 
 	DPRINTF(("%s: %d results\n", __func__, bl->nblx_items));
 	wb = &bl->nblx_bssid[0];
 	for (i = 0; i < bl->nblx_items; i++) {
 		memset(&sp, 0, sizeof(sp));
 
 		memcpy(wh.i_addr2, wb->nwbx_macaddr, sizeof(wh.i_addr2));
 		memcpy(wh.i_addr3, wb->nwbx_macaddr, sizeof(wh.i_addr3));
 		rssi = 100 * (wb->nwbx_rssi - noise) / (-32 - noise);
 		rssi = max(0, min(rssi, 100));	/* limit 0 <= rssi <= 100 */
 		if (wb->nwbx_privacy)
 			sp.capinfo |= IEEE80211_CAPINFO_PRIVACY;
 		sp.bintval = wb->nwbx_config.nc_beaconperiod;
 		switch (wb->nwbx_netinfra) {
 			case NDIS_80211_NET_INFRA_IBSS:
 				sp.capinfo |= IEEE80211_CAPINFO_IBSS;
 				break;
 			case NDIS_80211_NET_INFRA_BSS:
 				sp.capinfo |= IEEE80211_CAPINFO_ESS;
 				break;
 		}
 		sp.rates = &rates[0];
 		for (j = 0; j < IEEE80211_RATE_MAXSIZE; j++) {
 			/* XXX - check units */
 			if (wb->nwbx_supportedrates[j] == 0)
 				break;
 			rates[2 + j] =
 			wb->nwbx_supportedrates[j] & 0x7f;
 		}
 		rates[1] = j;
 		sp.ssid = (uint8_t *)&ssid[0];
 		memcpy(sp.ssid + 2, &wb->nwbx_ssid.ns_ssid,
 		    wb->nwbx_ssid.ns_ssidlen);
 		sp.ssid[1] = wb->nwbx_ssid.ns_ssidlen;
 
 		chanflag = ndis_nettype_chan(wb->nwbx_nettype);
 		freq = wb->nwbx_config.nc_dsconfig / 1000;
 		sp.chan = sp.bchan = ieee80211_mhz2ieee(freq, chanflag);
 		/* Hack ic->ic_curchan to be in sync with the scan result */
 		ic->ic_curchan = ieee80211_find_channel(ic, freq, chanflag);
 		if (ic->ic_curchan == NULL)
 			ic->ic_curchan = &ic->ic_channels[0];
 
 		/* Process extended info from AP */
 		if (wb->nwbx_len > sizeof(ndis_wlan_bssid)) {
 			frm = (uint8_t *)&wb->nwbx_ies;
 			efrm = frm + wb->nwbx_ielen;
 			if (efrm - frm < 12)
 				goto done;
 			sp.tstamp = frm;			frm += 8;
 			sp.bintval = le16toh(*(uint16_t *)frm);	frm += 2;
 			sp.capinfo = le16toh(*(uint16_t *)frm);	frm += 2;
 			sp.ies = frm;
 			sp.ies_len = efrm - frm;
 		}
 done:
 		DPRINTF(("scan: bssid %s chan %dMHz (%d/%d) rssi %d\n",
 		    ether_sprintf(wb->nwbx_macaddr), freq, sp.bchan, chanflag,
 		    rssi));
 		ieee80211_add_scan(vap, ic->ic_curchan, &sp, &wh, 0, rssi, noise);
 		wb = (ndis_wlan_bssid_ex *)((char *)wb + wb->nwbx_len);
 	}
 	free(bl, M_DEVBUF);
 	/* Restore the channel after messing with it */
 	ic->ic_curchan = saved_chan;
 }
 
 static void
 ndis_scan_start(struct ieee80211com *ic)
 {
 	struct ndis_softc *sc = ic->ic_softc;
 	struct ieee80211vap *vap;
 	struct ieee80211_scan_state *ss;
 	ndis_80211_ssid ssid;
 	int error, len;
 
 	ss = ic->ic_scan;
 	vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	if (!NDIS_INITIALIZED(sc)) {
 		DPRINTF(("%s: scan aborted\n", __func__));
 		ieee80211_cancel_scan(vap);
 		return;
 	}
 
 	len = sizeof(ssid);
 	bzero((char *)&ssid, len);
 	if (ss->ss_nssid == 0)
 		ssid.ns_ssidlen = 1;
 	else {
 		/* Perform a directed scan */
 		ssid.ns_ssidlen = ss->ss_ssid[0].len;
 		bcopy(ss->ss_ssid[0].ssid, ssid.ns_ssid, ssid.ns_ssidlen);
 	}
 
 	error = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len);
 	if (error)
 		DPRINTF(("%s: set ESSID failed\n", __func__));
 
 	len = 0;
 	error = ndis_set_info(sc, OID_802_11_BSSID_LIST_SCAN, NULL, &len);
 	if (error) {
 		DPRINTF(("%s: scan command failed\n", __func__));
 		ieee80211_cancel_scan(vap);
 		return;
 	}
 	/* Set a timer to collect the results */
 	callout_reset(&sc->ndis_scan_callout, hz * 3, ndis_scan, vap);
 }
 
 static void
 ndis_set_channel(struct ieee80211com *ic)
 {
 	/* ignore */
 }
 
 static void
 ndis_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell)
 {
 	/* ignore */
 }
 
 static void
 ndis_scan_mindwell(struct ieee80211_scan_state *ss)
 {
 	/* NB: don't try to abort scan; wait for firmware to finish */
 }
 
 static void
 ndis_scan_end(struct ieee80211com *ic)
 {
 	struct ndis_softc *sc = ic->ic_softc;
 
 	ndis_scan_results(sc);
 }
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index 98f06af5230c..45aa824eae9b 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1,4709 +1,4710 @@
 /*-
  * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_kern_tls.h"
 
 #include "en.h"
 
 #include <sys/eventhandler.h>
 #include <sys/sockio.h>
 #include <machine/atomic.h>
 
 #include <net/debugnet.h>
 
 #ifndef ETH_DRIVER_VERSION
 #define	ETH_DRIVER_VERSION	"3.5.2"
 #endif
 #define DRIVER_RELDATE	"September 2019"
 
 static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
 	ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
 
 static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
 
 struct mlx5e_channel_param {
 	struct mlx5e_rq_param rq;
 	struct mlx5e_sq_param sq;
 	struct mlx5e_cq_param rx_cq;
 	struct mlx5e_cq_param tx_cq;
 };
 
 struct media {
 	u32	subtype;
 	u64	baudrate;
 };
 
 static const struct media mlx5e_mode_table[MLX5E_LINK_SPEEDS_NUMBER][MLX5E_LINK_MODES_NUMBER] = {
 
 	[MLX5E_1000BASE_CX_SGMII][MLX5E_SGMII] = {
 		.subtype = IFM_1000_CX_SGMII,
 		.baudrate = IF_Mbps(1000ULL),
 	},
 	[MLX5E_1000BASE_KX][MLX5E_KX] = {
 		.subtype = IFM_1000_KX,
 		.baudrate = IF_Mbps(1000ULL),
 	},
 	[MLX5E_10GBASE_CX4][MLX5E_CX4] = {
 		.subtype = IFM_10G_CX4,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_KX4][MLX5E_KX4] = {
 		.subtype = IFM_10G_KX4,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_KR][MLX5E_KR] = {
 		.subtype = IFM_10G_KR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_20GBASE_KR2][MLX5E_KR2] = {
 		.subtype = IFM_20G_KR2,
 		.baudrate = IF_Gbps(20ULL),
 	},
 	[MLX5E_40GBASE_CR4][MLX5E_CR4] = {
 		.subtype = IFM_40G_CR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_KR4][MLX5E_KR4] = {
 		.subtype = IFM_40G_KR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_56GBASE_R4][MLX5E_R] = {
 		.subtype = IFM_56G_R4,
 		.baudrate = IF_Gbps(56ULL),
 	},
 	[MLX5E_10GBASE_CR][MLX5E_CR1] = {
 		.subtype = IFM_10G_CR1,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_SR][MLX5E_SR] = {
 		.subtype = IFM_10G_SR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_ER_LR][MLX5E_ER] = {
 		.subtype = IFM_10G_ER,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_ER_LR][MLX5E_LR] = {
 		.subtype = IFM_10G_LR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_40GBASE_SR4][MLX5E_SR4] = {
 		.subtype = IFM_40G_SR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_LR4_ER4][MLX5E_LR4] = {
 		.subtype = IFM_40G_LR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_LR4_ER4][MLX5E_ER4] = {
 		.subtype = IFM_40G_ER4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_100GBASE_CR4][MLX5E_CR4] = {
 		.subtype = IFM_100G_CR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GBASE_SR4][MLX5E_SR4] = {
 		.subtype = IFM_100G_SR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GBASE_KR4][MLX5E_KR4] = {
 		.subtype = IFM_100G_KR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GBASE_LR4][MLX5E_LR4] = {
 		.subtype = IFM_100G_LR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100BASE_TX][MLX5E_TX] = {
 		.subtype = IFM_100_TX,
 		.baudrate = IF_Mbps(100ULL),
 	},
 	[MLX5E_1000BASE_T][MLX5E_T] = {
 		.subtype = IFM_1000_T,
 		.baudrate = IF_Mbps(1000ULL),
 	},
 	[MLX5E_10GBASE_T][MLX5E_T] = {
 		.subtype = IFM_10G_T,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_25GBASE_CR][MLX5E_CR] = {
 		.subtype = IFM_25G_CR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GBASE_KR][MLX5E_KR] = {
 		.subtype = IFM_25G_KR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GBASE_SR][MLX5E_SR] = {
 		.subtype = IFM_25G_SR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_50GBASE_CR2][MLX5E_CR2] = {
 		.subtype = IFM_50G_CR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GBASE_KR2][MLX5E_KR2] = {
 		.subtype = IFM_50G_KR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 };
 
 static const struct media mlx5e_ext_mode_table[MLX5E_EXT_LINK_SPEEDS_NUMBER][MLX5E_LINK_MODES_NUMBER] = {
 	[MLX5E_SGMII_100M][MLX5E_SGMII] = {
 		.subtype = IFM_100_SGMII,
 		.baudrate = IF_Mbps(100),
 	},
 	[MLX5E_1000BASE_X_SGMII][MLX5E_KX] = {
 		.subtype = IFM_1000_KX,
 		.baudrate = IF_Mbps(1000),
 	},
 	[MLX5E_1000BASE_X_SGMII][MLX5E_CX_SGMII] = {
 		.subtype = IFM_1000_CX_SGMII,
 		.baudrate = IF_Mbps(1000),
 	},
 	[MLX5E_1000BASE_X_SGMII][MLX5E_CX] = {
 		.subtype = IFM_1000_CX,
 		.baudrate = IF_Mbps(1000),
 	},
 	[MLX5E_1000BASE_X_SGMII][MLX5E_LX] = {
 		.subtype = IFM_1000_LX,
 		.baudrate = IF_Mbps(1000),
 	},
 	[MLX5E_1000BASE_X_SGMII][MLX5E_SX] = {
 		.subtype = IFM_1000_SX,
 		.baudrate = IF_Mbps(1000),
 	},
 	[MLX5E_1000BASE_X_SGMII][MLX5E_T] = {
 		.subtype = IFM_1000_T,
 		.baudrate = IF_Mbps(1000),
 	},
 	[MLX5E_5GBASE_R][MLX5E_T] = {
 		.subtype = IFM_5000_T,
 		.baudrate = IF_Mbps(5000),
 	},
 	[MLX5E_5GBASE_R][MLX5E_KR] = {
 		.subtype = IFM_5000_KR,
 		.baudrate = IF_Mbps(5000),
 	},
 	[MLX5E_5GBASE_R][MLX5E_KR1] = {
 		.subtype = IFM_5000_KR1,
 		.baudrate = IF_Mbps(5000),
 	},
 	[MLX5E_5GBASE_R][MLX5E_KR_S] = {
 		.subtype = IFM_5000_KR_S,
 		.baudrate = IF_Mbps(5000),
 	},
 	[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_ER] = {
 		.subtype = IFM_10G_ER,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_KR] = {
 		.subtype = IFM_10G_KR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_LR] = {
 		.subtype = IFM_10G_LR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_SR] = {
 		.subtype = IFM_10G_SR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_T] = {
 		.subtype = IFM_10G_T,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_AOC] = {
 		.subtype = IFM_10G_AOC,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CR1] = {
 		.subtype = IFM_10G_CR1,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CR4] = {
 		.subtype = IFM_40G_CR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_KR4] = {
 		.subtype = IFM_40G_KR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_LR4] = {
 		.subtype = IFM_40G_LR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_SR4] = {
 		.subtype = IFM_40G_SR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_ER4] = {
 		.subtype = IFM_40G_ER4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR] = {
 		.subtype = IFM_25G_CR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR] = {
 		.subtype = IFM_25G_KR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_SR] = {
 		.subtype = IFM_25G_SR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_ACC] = {
 		.subtype = IFM_25G_ACC,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_AOC] = {
 		.subtype = IFM_25G_AOC,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR1] = {
 		.subtype = IFM_25G_CR1,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR_S] = {
 		.subtype = IFM_25G_CR_S,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR1] = {
 		.subtype = IFM_5000_KR1,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR_S] = {
 		.subtype = IFM_25G_KR_S,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_LR] = {
 		.subtype = IFM_25G_LR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_T] = {
 		.subtype = IFM_25G_T,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CR2] = {
 		.subtype = IFM_50G_CR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_KR2] = {
 		.subtype = IFM_50G_KR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_SR2] = {
 		.subtype = IFM_50G_SR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_LR2] = {
 		.subtype = IFM_50G_LR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_LR] = {
 		.subtype = IFM_50G_LR,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_SR] = {
 		.subtype = IFM_50G_SR,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CP] = {
 		.subtype = IFM_50G_CP,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_FR] = {
 		.subtype = IFM_50G_FR,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_KR_PAM4] = {
 		.subtype = IFM_50G_KR_PAM4,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CR4] = {
 		.subtype = IFM_100G_CR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_KR4] = {
 		.subtype = IFM_100G_KR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_LR4] = {
 		.subtype = IFM_100G_LR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_SR4] = {
 		.subtype = IFM_100G_SR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_SR2] = {
 		.subtype = IFM_100G_SR2,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CP2] = {
 		.subtype = IFM_100G_CP2,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_KR2_PAM4] = {
 		.subtype = IFM_100G_KR2_PAM4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_DR4] = {
 		.subtype = IFM_200G_DR4,
 		.baudrate = IF_Gbps(200ULL),
 	},
 	[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_LR4] = {
 		.subtype = IFM_200G_LR4,
 		.baudrate = IF_Gbps(200ULL),
 	},
 	[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_SR4] = {
 		.subtype = IFM_200G_SR4,
 		.baudrate = IF_Gbps(200ULL),
 	},
 	[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_FR4] = {
 		.subtype = IFM_200G_FR4,
 		.baudrate = IF_Gbps(200ULL),
 	},
 	[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CR4_PAM4] = {
 		.subtype = IFM_200G_CR4_PAM4,
 		.baudrate = IF_Gbps(200ULL),
 	},
 	[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_KR4_PAM4] = {
 		.subtype = IFM_200G_KR4_PAM4,
 		.baudrate = IF_Gbps(200ULL),
 	},
 };
 
 DEBUGNET_DEFINE(mlx5_en);
 
 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
 
 static void
 mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 	u32 eth_proto_oper;
 	int error;
 	u8 port_state;
 	u8 is_er_type;
 	u8 i, j;
 	bool ext;
 	struct media media_entry = {};
 
 	port_state = mlx5_query_vport_state(mdev,
 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
 
 	if (port_state == VPORT_STATE_UP) {
 		priv->media_status_last |= IFM_ACTIVE;
 	} else {
 		priv->media_status_last &= ~IFM_ACTIVE;
 		priv->media_active_last = IFM_ETHER;
 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
 		return;
 	}
 
 	error = mlx5_query_port_ptys(mdev, out, sizeof(out),
 	    MLX5_PTYS_EN, 1);
 	if (error) {
 		priv->media_active_last = IFM_ETHER;
 		priv->ifp->if_baudrate = 1;
 		mlx5_en_err(priv->ifp, "query port ptys failed: 0x%x\n",
 		    error);
 		return;
 	}
 
 	ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
 	eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
 	    eth_proto_oper);
 
 	i = ilog2(eth_proto_oper);
 
 	for (j = 0; j != MLX5E_LINK_MODES_NUMBER; j++) {
 		media_entry = ext ? mlx5e_ext_mode_table[i][j] :
 		    mlx5e_mode_table[i][j];
 		if (media_entry.baudrate != 0)
 			break;
 	}
 
 	if (media_entry.subtype == 0) {
 		mlx5_en_err(priv->ifp,
 		    "Could not find operational media subtype\n");
 		return;
 	}
 
 	switch (media_entry.subtype) {
 	case IFM_10G_ER:
 		error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
 		if (error != 0) {
 			mlx5_en_err(priv->ifp,
 			    "query port pddr failed: %d\n", error);
 		}
 		if (error != 0 || is_er_type == 0)
 			media_entry.subtype = IFM_10G_LR;
 		break;
 	case IFM_40G_LR4:
 		error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
 		if (error != 0) {
 			mlx5_en_err(priv->ifp,
 			    "query port pddr failed: %d\n", error);
 		}
 		if (error == 0 && is_er_type != 0)
 			media_entry.subtype = IFM_40G_ER4;
 		break;
 	}
 	priv->media_active_last = media_entry.subtype | IFM_ETHER | IFM_FDX;
 	priv->ifp->if_baudrate = media_entry.baudrate;
 
 	if_link_state_change(priv->ifp, LINK_STATE_UP);
 }
 
 static void
 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
 {
 	struct mlx5e_priv *priv = dev->if_softc;
 
 	ifmr->ifm_status = priv->media_status_last;
 	ifmr->ifm_active = priv->media_active_last |
 	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
 	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
 
 }
 
 static u32
 mlx5e_find_link_mode(u32 subtype, bool ext)
 {
 	u32 i;
 	u32 j;
 	u32 link_mode = 0;
 	u32 speeds_num = 0;
 	struct media media_entry = {};
 
 	switch (subtype) {
 	case IFM_10G_LR:
 		subtype = IFM_10G_ER;
 		break;
 	case IFM_40G_ER4:
 		subtype = IFM_40G_LR4;
 		break;
 	}
 
 	speeds_num = ext ? MLX5E_EXT_LINK_SPEEDS_NUMBER :
 	    MLX5E_LINK_SPEEDS_NUMBER;
 
 	for (i = 0; i != speeds_num; i++) {
 		for (j = 0; j < MLX5E_LINK_MODES_NUMBER ; ++j) {
 			media_entry = ext ? mlx5e_ext_mode_table[i][j] :
 			    mlx5e_mode_table[i][j];
 			if (media_entry.baudrate == 0)
 				continue;
 			if (media_entry.subtype == subtype) {
 				link_mode |= MLX5E_PROT_MASK(i);
 			}
 		}
 	}
 
 	return (link_mode);
 }
 
 static int
 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
 {
 	return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
 	    priv->params.rx_pauseframe_control,
 	    priv->params.tx_pauseframe_control,
 	    priv->params.rx_priority_flow_control,
 	    priv->params.tx_priority_flow_control));
 }
 
 static int
 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
 {
 	int error;
 
 	if (priv->gone != 0) {
 		error = -ENXIO;
 	} else if (priv->params.rx_pauseframe_control ||
 	    priv->params.tx_pauseframe_control) {
 		mlx5_en_err(priv->ifp,
 		    "Global pauseframes must be disabled before enabling PFC.\n");
 		error = -EINVAL;
 	} else {
 		error = mlx5e_set_port_pause_and_pfc(priv);
 	}
 	return (error);
 }
 
 static int
 mlx5e_media_change(struct ifnet *dev)
 {
 	struct mlx5e_priv *priv = dev->if_softc;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 eth_proto_cap;
 	u32 link_mode;
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 	int was_opened;
 	int locked;
 	int error;
 	bool ext;
 
 	locked = PRIV_LOCKED(priv);
 	if (!locked)
 		PRIV_LOCK(priv);
 
 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
 		error = EINVAL;
 		goto done;
 	}
 
 	error = mlx5_query_port_ptys(mdev, out, sizeof(out),
 	    MLX5_PTYS_EN, 1);
 	if (error != 0) {
 		mlx5_en_err(dev, "Query port media capability failed\n");
 		goto done;
 	}
 
 	ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media), ext);
 
 	/* query supported capabilities */
 	eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
 	    eth_proto_capability);
 
 	/* check for autoselect */
 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
 		link_mode = eth_proto_cap;
 		if (link_mode == 0) {
 			mlx5_en_err(dev, "Port media capability is zero\n");
 			error = EINVAL;
 			goto done;
 		}
 	} else {
 		link_mode = link_mode & eth_proto_cap;
 		if (link_mode == 0) {
 			mlx5_en_err(dev, "Not supported link mode requested\n");
 			error = EINVAL;
 			goto done;
 		}
 	}
 	if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
 		/* check if PFC is enabled */
 		if (priv->params.rx_priority_flow_control ||
 		    priv->params.tx_priority_flow_control) {
 			mlx5_en_err(dev, "PFC must be disabled before enabling global pauseframes.\n");
 			error = EINVAL;
 			goto done;
 		}
 	}
 	/* update pauseframe control bits */
 	priv->params.rx_pauseframe_control =
 	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
 	priv->params.tx_pauseframe_control =
 	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
 
 	/* check if device is opened */
 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	/* reconfigure the hardware */
 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN, ext);
 	error = -mlx5e_set_port_pause_and_pfc(priv);
 	if (was_opened)
 		mlx5_set_port_status(mdev, MLX5_PORT_UP);
 
 done:
 	if (!locked)
 		PRIV_UNLOCK(priv);
 	return (error);
 }
 
 static void
 mlx5e_update_carrier_work(struct work_struct *work)
 {
 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 	    update_carrier_work);
 
 	PRIV_LOCK(priv);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
 		mlx5e_update_carrier(priv);
 	PRIV_UNLOCK(priv);
 }
 
 #define	MLX5E_PCIE_PERF_GET_64(a,b,c,d,e,f)    \
 	s_debug->c = MLX5_GET64(mpcnt_reg, out, counter_set.f.c);
 
 #define	MLX5E_PCIE_PERF_GET_32(a,b,c,d,e,f)    \
 	s_debug->c = MLX5_GET(mpcnt_reg, out, counter_set.f.c);
 
 static void
 mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
 	const unsigned sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
 	void *out;
 	void *in;
 	int err;
 
 	/* allocate firmware request structures */
 	in = mlx5_vzalloc(sz);
 	out = mlx5_vzalloc(sz);
 	if (in == NULL || out == NULL)
 		goto free_out;
 
 	MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
 	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
 	if (err != 0)
 		goto free_out;
 
 	MLX5E_PCIE_PERFORMANCE_COUNTERS_64(MLX5E_PCIE_PERF_GET_64)
 	MLX5E_PCIE_PERFORMANCE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
 
 	MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP);
 	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
 	if (err != 0)
 		goto free_out;
 
 	MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
 
 	MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_LANE_COUNTERS_GROUP);
 	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
 	if (err != 0)
 		goto free_out;
 
 	MLX5E_PCIE_LANE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
 
 free_out:
 	/* free firmware request structures */
 	kvfree(in);
 	kvfree(out);
 }
 
 /*
  * This function reads the physical port counters from the firmware
  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
  * macros. The output is converted from big-endian 64-bit values into
  * host endian ones and stored in the "priv->stats.pport" structure.
  */
 static void
 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_pport_stats *s = &priv->stats.pport;
 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
 	u32 *in;
 	u32 *out;
 	const u64 *ptr;
 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 	unsigned x;
 	unsigned y;
 	unsigned z;
 
 	/* allocate firmware request structures */
 	in = mlx5_vzalloc(sz);
 	out = mlx5_vzalloc(sz);
 	if (in == NULL || out == NULL)
 		goto free_out;
 
 	/*
 	 * Get pointer to the 64-bit counter set which is located at a
 	 * fixed offset in the output firmware request structure:
 	 */
 	ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
 
 	MLX5_SET(ppcnt_reg, in, local_port, 1);
 
 	/* read IEEE802_3 counter group using predefined counter layout */
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
 	     x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
 		s->arg[y] = be64toh(ptr[x]);
 
 	/* read RFC2819 counter group using predefined counter layout */
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
 		s->arg[y] = be64toh(ptr[x]);
 
 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
 		s_debug->arg[y] = be64toh(ptr[x]);
 
 	/* read RFC2863 counter group using predefined counter layout */
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
 		s_debug->arg[y] = be64toh(ptr[x]);
 
 	/* read physical layer stats counter group using predefined counter layout */
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
 		s_debug->arg[y] = be64toh(ptr[x]);
 
 	/* read Extended Ethernet counter group using predefined counter layout */
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0; x != MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM; x++, y++)
 		s_debug->arg[y] = be64toh(ptr[x]);
 
 	/* read Extended Statistical Group */
 	if (MLX5_CAP_GEN(mdev, pcam_reg) &&
 	    MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) &&
 	    MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) {
 		/* read Extended Statistical counter group using predefined counter layout */
 		MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
 		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 
 		for (x = 0; x != MLX5E_PPORT_STATISTICAL_DEBUG_NUM; x++, y++)
 			s_debug->arg[y] = be64toh(ptr[x]);
 	}
 
 	/* read PCIE counters */
 	mlx5e_update_pcie_counters(priv);
 
 	/* read per-priority counters */
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
 
 	/* iterate all the priorities */
 	for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
 		MLX5_SET(ppcnt_reg, in, prio_tc, z);
 		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 
 		/* read per priority stats counter group using predefined counter layout */
 		for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
 		    MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
 			s->arg[y] = be64toh(ptr[x]);
 	}
 
 free_out:
 	/* free firmware request structures */
 	kvfree(in);
 	kvfree(out);
 }
 
 static void
 mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
 {
 	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
 
 	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
 		return;
 
 	MLX5_SET(query_vnic_env_in, in, opcode,
 	    MLX5_CMD_OP_QUERY_VNIC_ENV);
 	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
 	MLX5_SET(query_vnic_env_in, in, other_vport, 0);
 
 	if (mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)) != 0)
 		return;
 
 	priv->stats.vport.rx_steer_missed_packets =
 	    MLX5_GET64(query_vnic_env_out, out,
 	    vport_env.nic_receive_steering_discard);
 }
 
 /*
  * This function is called regularly to collect all statistics
  * counters from the firmware. The values can be viewed through the
  * sysctl interface. Execution is serialized using the priv's global
  * configuration lock.
  */
 static void
 mlx5e_update_stats_locked(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_vport_stats *s = &priv->stats.vport;
 	struct mlx5e_sq_stats *sq_stats;
 	struct buf_ring *sq_br;
 #if (__FreeBSD_version < 1100000)
 	struct ifnet *ifp = priv->ifp;
 #endif
 
 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
 	u64 tso_packets = 0;
 	u64 tso_bytes = 0;
 	u64 tx_queue_dropped = 0;
 	u64 tx_defragged = 0;
 	u64 tx_offload_none = 0;
 	u64 lro_packets = 0;
 	u64 lro_bytes = 0;
 	u64 sw_lro_queued = 0;
 	u64 sw_lro_flushed = 0;
 	u64 rx_csum_none = 0;
 	u64 rx_wqe_err = 0;
 	u64 rx_packets = 0;
 	u64 rx_bytes = 0;
 	u32 rx_out_of_buffer = 0;
 	int error;
 	int i;
 	int j;
 
 	out = mlx5_vzalloc(outlen);
 	if (out == NULL)
 		goto free_out;
 
 	/* Collect firts the SW counters and then HW for consistency */
 	for (i = 0; i < priv->params.num_channels; i++) {
 		struct mlx5e_channel *pch = priv->channel + i;
 		struct mlx5e_rq *rq = &pch->rq;
 		struct mlx5e_rq_stats *rq_stats = &pch->rq.stats;
 
 		/* collect stats from LRO */
 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
 		sw_lro_queued += rq_stats->sw_lro_queued;
 		sw_lro_flushed += rq_stats->sw_lro_flushed;
 		lro_packets += rq_stats->lro_packets;
 		lro_bytes += rq_stats->lro_bytes;
 		rx_csum_none += rq_stats->csum_none;
 		rx_wqe_err += rq_stats->wqe_err;
 		rx_packets += rq_stats->packets;
 		rx_bytes += rq_stats->bytes;
 
 		for (j = 0; j < priv->num_tc; j++) {
 			sq_stats = &pch->sq[j].stats;
 			sq_br = pch->sq[j].br;
 
 			tso_packets += sq_stats->tso_packets;
 			tso_bytes += sq_stats->tso_bytes;
 			tx_queue_dropped += sq_stats->dropped;
 			if (sq_br != NULL)
 				tx_queue_dropped += sq_br->br_drops;
 			tx_defragged += sq_stats->defragged;
 			tx_offload_none += sq_stats->csum_offload_none;
 		}
 	}
 
 	/* update counters */
 	s->tso_packets = tso_packets;
 	s->tso_bytes = tso_bytes;
 	s->tx_queue_dropped = tx_queue_dropped;
 	s->tx_defragged = tx_defragged;
 	s->lro_packets = lro_packets;
 	s->lro_bytes = lro_bytes;
 	s->sw_lro_queued = sw_lro_queued;
 	s->sw_lro_flushed = sw_lro_flushed;
 	s->rx_csum_none = rx_csum_none;
 	s->rx_wqe_err = rx_wqe_err;
 	s->rx_packets = rx_packets;
 	s->rx_bytes = rx_bytes;
 
 	mlx5e_grp_vnic_env_update_stats(priv);
 
 	/* HW counters */
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(query_vport_counter_in, in, opcode,
 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
 
 	memset(out, 0, outlen);
 
 	/* get number of out-of-buffer drops first */
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 &&
 	    mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
 	    &rx_out_of_buffer) == 0) {
 		s->rx_out_of_buffer = rx_out_of_buffer;
 	}
 
 	/* get port statistics */
 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen) == 0) {
 #define	MLX5_GET_CTR(out, x) \
 	MLX5_GET64(query_vport_counter_out, out, x)
 
 		s->rx_error_packets =
 		    MLX5_GET_CTR(out, received_errors.packets);
 		s->rx_error_bytes =
 		    MLX5_GET_CTR(out, received_errors.octets);
 		s->tx_error_packets =
 		    MLX5_GET_CTR(out, transmit_errors.packets);
 		s->tx_error_bytes =
 		    MLX5_GET_CTR(out, transmit_errors.octets);
 
 		s->rx_unicast_packets =
 		    MLX5_GET_CTR(out, received_eth_unicast.packets);
 		s->rx_unicast_bytes =
 		    MLX5_GET_CTR(out, received_eth_unicast.octets);
 		s->tx_unicast_packets =
 		    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
 		s->tx_unicast_bytes =
 		    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
 
 		s->rx_multicast_packets =
 		    MLX5_GET_CTR(out, received_eth_multicast.packets);
 		s->rx_multicast_bytes =
 		    MLX5_GET_CTR(out, received_eth_multicast.octets);
 		s->tx_multicast_packets =
 		    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
 		s->tx_multicast_bytes =
 		    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
 
 		s->rx_broadcast_packets =
 		    MLX5_GET_CTR(out, received_eth_broadcast.packets);
 		s->rx_broadcast_bytes =
 		    MLX5_GET_CTR(out, received_eth_broadcast.octets);
 		s->tx_broadcast_packets =
 		    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
 		s->tx_broadcast_bytes =
 		    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
 
 		s->tx_packets = s->tx_unicast_packets +
 		    s->tx_multicast_packets + s->tx_broadcast_packets;
 		s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes +
 		    s->tx_broadcast_bytes;
 
 		/* Update calculated offload counters */
 		s->tx_csum_offload = s->tx_packets - tx_offload_none;
 		s->rx_csum_good = s->rx_packets - s->rx_csum_none;
 	}
 
 	/* Get physical port counters */
 	mlx5e_update_pport_counters(priv);
 
 	s->tx_jumbo_packets =
 	    priv->stats.port_stats_debug.tx_stat_p1519to2047octets +
 	    priv->stats.port_stats_debug.tx_stat_p2048to4095octets +
 	    priv->stats.port_stats_debug.tx_stat_p4096to8191octets +
 	    priv->stats.port_stats_debug.tx_stat_p8192to10239octets;
 
 #if (__FreeBSD_version < 1100000)
 	/* no get_counters interface in fbsd 10 */
 	ifp->if_ipackets = s->rx_packets;
 	ifp->if_ierrors = priv->stats.pport.in_range_len_errors +
 	    priv->stats.pport.out_of_range_len +
 	    priv->stats.pport.too_long_errors +
 	    priv->stats.pport.check_seq_err +
 	    priv->stats.pport.alignment_err;
 	ifp->if_iqdrops = s->rx_out_of_buffer;
 	ifp->if_opackets = s->tx_packets;
 	ifp->if_oerrors = priv->stats.port_stats_debug.out_discards;
 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
 	ifp->if_ibytes = s->rx_bytes;
 	ifp->if_obytes = s->tx_bytes;
 	ifp->if_collisions =
 	    priv->stats.pport.collisions;
 #endif
 
 free_out:
 	kvfree(out);
 
 	/* Update diagnostics, if any */
 	if (priv->params_ethtool.diag_pci_enable ||
 	    priv->params_ethtool.diag_general_enable) {
 		error = mlx5_core_get_diagnostics_full(mdev,
 		    priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
 		    priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
 		if (error != 0)
 			mlx5_en_err(priv->ifp,
 			    "Failed reading diagnostics: %d\n", error);
 	}
 
 	/* Update FEC, if any */
 	error = mlx5e_fec_update(priv);
 	if (error != 0 && error != EOPNOTSUPP) {
 		mlx5_en_err(priv->ifp,
 		    "Updating FEC failed: %d\n", error);
 	}
 }
 
 static void
 mlx5e_update_stats_work(struct work_struct *work)
 {
 	struct mlx5e_priv *priv;
 
 	priv = container_of(work, struct mlx5e_priv, update_stats_work);
 	PRIV_LOCK(priv);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 &&
 	    !test_bit(MLX5_INTERFACE_STATE_TEARDOWN, &priv->mdev->intf_state))
 		mlx5e_update_stats_locked(priv);
 	PRIV_UNLOCK(priv);
 }
 
 static void
 mlx5e_update_stats(void *arg)
 {
 	struct mlx5e_priv *priv = arg;
 
 	queue_work(priv->wq, &priv->update_stats_work);
 
 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
 }
 
 static void
 mlx5e_async_event_sub(struct mlx5e_priv *priv,
     enum mlx5_dev_event event)
 {
 	switch (event) {
 	case MLX5_DEV_EVENT_PORT_UP:
 	case MLX5_DEV_EVENT_PORT_DOWN:
 		queue_work(priv->wq, &priv->update_carrier_work);
 		break;
 
 	default:
 		break;
 	}
 }
 
 static void
 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
     enum mlx5_dev_event event, unsigned long param)
 {
 	struct mlx5e_priv *priv = vpriv;
 
 	mtx_lock(&priv->async_events_mtx);
 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
 		mlx5e_async_event_sub(priv, event);
 	mtx_unlock(&priv->async_events_mtx);
 }
 
 static void
 mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
 }
 
 static void
 mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
 	mtx_lock(&priv->async_events_mtx);
 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
 	mtx_unlock(&priv->async_events_mtx);
 }
 
 static void mlx5e_calibration_callout(void *arg);
 static int mlx5e_calibration_duration = 20;
 static int mlx5e_fast_calibration = 1;
 static int mlx5e_normal_calibration = 30;
 
 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
     "MLX5 timestamp calibration parameteres");
 
 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
     &mlx5e_calibration_duration, 0,
     "Duration of initial calibration");
 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
     &mlx5e_fast_calibration, 0,
     "Recalibration interval during initial calibration");
 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
     &mlx5e_normal_calibration, 0,
     "Recalibration interval during normal operations");
 
 /*
  * Ignites the calibration process.
  */
 static void
 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
 {
 
 	if (priv->clbr_done == 0)
 		mlx5e_calibration_callout(priv);
 	else
 		callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
 		    mlx5e_calibration_duration ? mlx5e_fast_calibration :
 		    mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
 		    priv);
 }
 
 static uint64_t
 mlx5e_timespec2usec(const struct timespec *ts)
 {
 
 	return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
 }
 
 static uint64_t
 mlx5e_hw_clock(struct mlx5e_priv *priv)
 {
 	struct mlx5_init_seg *iseg;
 	uint32_t hw_h, hw_h1, hw_l;
 
 	iseg = priv->mdev->iseg;
 	do {
 		hw_h = ioread32be(&iseg->internal_timer_h);
 		hw_l = ioread32be(&iseg->internal_timer_l);
 		hw_h1 = ioread32be(&iseg->internal_timer_h);
 	} while (hw_h1 != hw_h);
 	return (((uint64_t)hw_h << 32) | hw_l);
 }
 
 /*
  * The calibration callout, it runs either in the context of the
  * thread which enables calibration, or in callout.  It takes the
  * snapshot of system and adapter clocks, then advances the pointers to
  * the calibration point to allow rx path to read the consistent data
  * lockless.
  */
 static void
 mlx5e_calibration_callout(void *arg)
 {
 	struct mlx5e_priv *priv;
 	struct mlx5e_clbr_point *next, *curr;
 	struct timespec ts;
 	int clbr_curr_next;
 
 	priv = arg;
 	curr = &priv->clbr_points[priv->clbr_curr];
 	clbr_curr_next = priv->clbr_curr + 1;
 	if (clbr_curr_next >= nitems(priv->clbr_points))
 		clbr_curr_next = 0;
 	next = &priv->clbr_points[clbr_curr_next];
 
 	next->base_prev = curr->base_curr;
 	next->clbr_hw_prev = curr->clbr_hw_curr;
 
 	next->clbr_hw_curr = mlx5e_hw_clock(priv);
 	if (((next->clbr_hw_curr - curr->clbr_hw_curr) >> MLX5E_TSTMP_PREC) ==
 	    0) {
 		if (priv->clbr_done != 0) {
 			mlx5_en_err(priv->ifp,
 			    "HW failed tstmp frozen %#jx %#jx, disabling\n",
 			     next->clbr_hw_curr, curr->clbr_hw_prev);
 			priv->clbr_done = 0;
 		}
 		atomic_store_rel_int(&curr->clbr_gen, 0);
 		return;
 	}
 
 	nanouptime(&ts);
 	next->base_curr = mlx5e_timespec2usec(&ts);
 
 	curr->clbr_gen = 0;
 	atomic_thread_fence_rel();
 	priv->clbr_curr = clbr_curr_next;
 	atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
 
 	if (priv->clbr_done < mlx5e_calibration_duration)
 		priv->clbr_done++;
 	mlx5e_reset_calibration_callout(priv);
 }
 
 static const char *mlx5e_rq_stats_desc[] = {
 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
 };
 
 static int
 mlx5e_create_rq(struct mlx5e_channel *c,
     struct mlx5e_rq_param *param,
     struct mlx5e_rq *rq)
 {
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	char buffer[16];
 	void *rqc = param->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	int wq_sz;
 	int err;
 	int i;
 	u32 nsegs, wqe_sz;
 
 	err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
 	if (err != 0)
 		goto done;
 
 	/* Create DMA descriptor TAG */
 	if ((err = -bus_dma_tag_create(
 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
 	    1,				/* any alignment */
 	    0,				/* no boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsize */
 	    nsegs,			/* nsegments */
 	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
 	    &rq->dma_tag)))
 		goto done;
 
 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
 	    &rq->wq_ctrl);
 	if (err)
 		goto err_free_dma_tag;
 
 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
 
 	err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
 	if (err != 0)
 		goto err_rq_wq_destroy;
 
 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 
 	err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz);
 	if (err)
 		goto err_rq_wq_destroy;
 
 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
 	for (i = 0; i != wq_sz; i++) {
 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
 		int j;
 
 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
 		if (err != 0) {
 			while (i--)
 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
 			goto err_rq_mbuf_free;
 		}
 
 		/* set value for constant fields */
 		for (j = 0; j < rq->nsegs; j++)
 			wqe->data[j].lkey = cpu_to_be32(priv->mr.key);
 	}
 
 	INIT_WORK(&rq->dim.work, mlx5e_dim_work);
 	if (priv->params.rx_cq_moderation_mode < 2) {
 		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
 	} else {
 		void *cqc = container_of(param,
 		    struct mlx5e_channel_param, rq)->rx_cq.cqc;
 
 		switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
 		case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
 			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 			break;
 		case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
 			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 			break;
 		default:
 			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
 			break;
 		}
 	}
 
 	rq->ifp = priv->ifp;
 	rq->channel = c;
 	rq->ix = c->ix;
 
 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
 	    rq->stats.arg);
 	return (0);
 
 err_rq_mbuf_free:
 	free(rq->mbuf, M_MLX5EN);
 	tcp_lro_free(&rq->lro);
 err_rq_wq_destroy:
 	mlx5_wq_destroy(&rq->wq_ctrl);
 err_free_dma_tag:
 	bus_dma_tag_destroy(rq->dma_tag);
 done:
 	return (err);
 }
 
 static void
 mlx5e_destroy_rq(struct mlx5e_rq *rq)
 {
 	int wq_sz;
 	int i;
 
 	/* destroy all sysctl nodes */
 	sysctl_ctx_free(&rq->stats.ctx);
 
 	/* free leftover LRO packets, if any */
 	tcp_lro_free(&rq->lro);
 
 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 	for (i = 0; i != wq_sz; i++) {
 		if (rq->mbuf[i].mbuf != NULL) {
 			bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
 			m_freem(rq->mbuf[i].mbuf);
 		}
 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
 	}
 	free(rq->mbuf, M_MLX5EN);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 	bus_dma_tag_destroy(rq->dma_tag);
 }
 
 static int
 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	void *in;
 	void *rqc;
 	void *wq;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
 	memcpy(rqc, param->rqc, sizeof(param->rqc));
 
 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
 	if (priv->counter_set_id >= 0)
 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
 	    PAGE_SHIFT);
 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
 
 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
 
 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static int
 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	void *in;
 	void *rqc;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 
 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
 	MLX5_SET(rqc, rqc, state, next_state);
 
 	err = mlx5_core_modify_rq(mdev, in, inlen);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static void
 mlx5e_disable_rq(struct mlx5e_rq *rq)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	mlx5_core_destroy_rq(mdev, rq->rqn);
 }
 
 static int
 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_wq_ll *wq = &rq->wq;
 	int i;
 
 	for (i = 0; i < 1000; i++) {
 		if (wq->cur_sz >= priv->params.min_rx_wqes)
 			return (0);
 
 		msleep(4);
 	}
 	return (-ETIMEDOUT);
 }
 
 static int
 mlx5e_open_rq(struct mlx5e_channel *c,
     struct mlx5e_rq_param *param,
     struct mlx5e_rq *rq)
 {
 	int err;
 
 	err = mlx5e_create_rq(c, param, rq);
 	if (err)
 		return (err);
 
 	err = mlx5e_enable_rq(rq, param);
 	if (err)
 		goto err_destroy_rq;
 
 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 	if (err)
 		goto err_disable_rq;
 
 	c->rq.enabled = 1;
 
 	return (0);
 
 err_disable_rq:
 	mlx5e_disable_rq(rq);
 err_destroy_rq:
 	mlx5e_destroy_rq(rq);
 
 	return (err);
 }
 
 static void
 mlx5e_close_rq(struct mlx5e_rq *rq)
 {
 	mtx_lock(&rq->mtx);
 	rq->enabled = 0;
 	callout_stop(&rq->watchdog);
 	mtx_unlock(&rq->mtx);
 
 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
 }
 
 static void
 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
 {
 
 	mlx5e_disable_rq(rq);
 	mlx5e_close_cq(&rq->cq);
 	cancel_work_sync(&rq->dim.work);
 	mlx5e_destroy_rq(rq);
 }
 
 void
 mlx5e_free_sq_db(struct mlx5e_sq *sq)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int x;
 
 	for (x = 0; x != wq_sz; x++) {
 		if (unlikely(sq->mbuf[x].p_refcount != NULL)) {
 			atomic_add_int(sq->mbuf[x].p_refcount, -1);
 			sq->mbuf[x].p_refcount = NULL;
 		}
 		if (sq->mbuf[x].mbuf != NULL) {
 			bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map);
 			m_freem(sq->mbuf[x].mbuf);
 		}
 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
 	}
 	free(sq->mbuf, M_MLX5EN);
 }
 
 int
 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int err;
 	int x;
 
 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
 
 	/* Create DMA descriptor MAPs */
 	for (x = 0; x != wq_sz; x++) {
 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
 		if (err != 0) {
 			while (x--)
 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
 			free(sq->mbuf, M_MLX5EN);
 			return (err);
 		}
 	}
 	return (0);
 }
 
 static const char *mlx5e_sq_stats_desc[] = {
 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
 };
 
 void
 mlx5e_update_sq_inline(struct mlx5e_sq *sq)
 {
 	sq->max_inline = sq->priv->params.tx_max_inline;
 	sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
 
 	/*
 	 * Check if trust state is DSCP or if inline mode is NONE which
 	 * indicates CX-5 or newer hardware.
 	 */
 	if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
 	    sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
 		if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
 			sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
 		else
 			sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
 	} else {
 		sq->min_insert_caps = 0;
 	}
 }
 
 static void
 mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
 {
 	int i;
 
 	for (i = 0; i != priv->num_tc; i++) {
 		mtx_lock(&c->sq[i].lock);
 		mlx5e_update_sq_inline(&c->sq[i]);
 		mtx_unlock(&c->sq[i].lock);
 	}
 }
 
 void
 mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
 {
 	int i;
 
 	/* check if channels are closed */
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 		return;
 
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]);
 }
 
 static int
 mlx5e_create_sq(struct mlx5e_channel *c,
     int tc,
     struct mlx5e_sq_param *param,
     struct mlx5e_sq *sq)
 {
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	char buffer[16];
 	void *sqc = param->sqc;
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
 	/* Create DMA descriptor TAG */
 	if ((err = -bus_dma_tag_create(
 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
 	    1,				/* any alignment */
 	    0,				/* no boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
 	    &sq->dma_tag)))
 		goto done;
 
 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
 	if (err)
 		goto err_free_dma_tag;
 
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
 	    &sq->wq_ctrl);
 	if (err)
 		goto err_unmap_free_uar;
 
 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
 
 	err = mlx5e_alloc_sq_db(sq);
 	if (err)
 		goto err_sq_wq_destroy;
 
 	sq->mkey_be = cpu_to_be32(priv->mr.key);
 	sq->ifp = priv->ifp;
 	sq->priv = priv;
 	sq->tc = tc;
 
 	mlx5e_update_sq_inline(sq);
 
 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
 	    sq->stats.arg);
 
 	return (0);
 
 err_sq_wq_destroy:
 	mlx5_wq_destroy(&sq->wq_ctrl);
 
 err_unmap_free_uar:
 	mlx5_unmap_free_uar(mdev, &sq->uar);
 
 err_free_dma_tag:
 	bus_dma_tag_destroy(sq->dma_tag);
 done:
 	return (err);
 }
 
 static void
 mlx5e_destroy_sq(struct mlx5e_sq *sq)
 {
 	/* destroy all sysctl nodes */
 	sysctl_ctx_free(&sq->stats.ctx);
 
 	mlx5e_free_sq_db(sq);
 	mlx5_wq_destroy(&sq->wq_ctrl);
 	mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
 	bus_dma_tag_destroy(sq->dma_tag);
 }
 
 int
 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
     int tis_num)
 {
 	void *in;
 	void *sqc;
 	void *wq;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	memcpy(sqc, param->sqc, sizeof(param->sqc));
 
 	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
 	MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
 
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
 	    PAGE_SHIFT);
 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
 
 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
 
 	err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
 
 	kvfree(in);
 
 	return (err);
 }
 
 int
 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
 {
 	void *in;
 	void *sqc;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
 
 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
 	MLX5_SET(sqc, sqc, state, next_state);
 
 	err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
 
 	kvfree(in);
 
 	return (err);
 }
 
 void
 mlx5e_disable_sq(struct mlx5e_sq *sq)
 {
 
 	mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
 }
 
 static int
 mlx5e_open_sq(struct mlx5e_channel *c,
     int tc,
     struct mlx5e_sq_param *param,
     struct mlx5e_sq *sq)
 {
 	int err;
 
 	sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
 
 	/* ensure the TX completion event factor is not zero */
 	if (sq->cev_factor == 0)
 		sq->cev_factor = 1;
 
 	err = mlx5e_create_sq(c, tc, param, sq);
 	if (err)
 		return (err);
 
 	err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
 	if (err)
 		goto err_destroy_sq;
 
 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
 	if (err)
 		goto err_disable_sq;
 
 	WRITE_ONCE(sq->running, 1);
 
 	return (0);
 
 err_disable_sq:
 	mlx5e_disable_sq(sq);
 err_destroy_sq:
 	mlx5e_destroy_sq(sq);
 
 	return (err);
 }
 
 static void
 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
 {
 	/* fill up remainder with NOPs */
 	while (sq->cev_counter != 0) {
 		while (!mlx5e_sq_has_room_for(sq, 1)) {
 			if (can_sleep != 0) {
 				mtx_unlock(&sq->lock);
 				msleep(4);
 				mtx_lock(&sq->lock);
 			} else {
 				goto done;
 			}
 		}
 		/* send a single NOP */
 		mlx5e_send_nop(sq, 1);
 		atomic_thread_fence_rel();
 	}
 done:
 	/* Check if we need to write the doorbell */
 	if (likely(sq->doorbell.d64 != 0)) {
 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
 		sq->doorbell.d64 = 0;
 	}
 }
 
 void
 mlx5e_sq_cev_timeout(void *arg)
 {
 	struct mlx5e_sq *sq = arg;
 
 	mtx_assert(&sq->lock, MA_OWNED);
 
 	/* check next state */
 	switch (sq->cev_next_state) {
 	case MLX5E_CEV_STATE_SEND_NOPS:
 		/* fill TX ring with NOPs, if any */
 		mlx5e_sq_send_nops_locked(sq, 0);
 
 		/* check if completed */
 		if (sq->cev_counter == 0) {
 			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
 			return;
 		}
 		break;
 	default:
 		/* send NOPs on next timeout */
 		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
 		break;
 	}
 
 	/* restart timer */
 	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
 }
 
 void
 mlx5e_drain_sq(struct mlx5e_sq *sq)
 {
 	int error;
 	struct mlx5_core_dev *mdev= sq->priv->mdev;
 
 	/*
 	 * Check if already stopped.
 	 *
 	 * NOTE: Serialization of this function is managed by the
 	 * caller ensuring the priv's state lock is locked or in case
 	 * of rate limit support, a single thread manages drain and
 	 * resume of SQs. The "running" variable can therefore safely
 	 * be read without any locks.
 	 */
 	if (READ_ONCE(sq->running) == 0)
 		return;
 
 	/* don't put more packets into the SQ */
 	WRITE_ONCE(sq->running, 0);
 
 	/* serialize access to DMA rings */
 	mtx_lock(&sq->lock);
 
 	/* teardown event factor timer, if any */
 	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
 	callout_stop(&sq->cev_callout);
 
 	/* send dummy NOPs in order to flush the transmit ring */
 	mlx5e_sq_send_nops_locked(sq, 1);
 	mtx_unlock(&sq->lock);
 
 	/* wait till SQ is empty or link is down */
 	mtx_lock(&sq->lock);
 	while (sq->cc != sq->pc &&
 	    (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
 	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
 		mtx_unlock(&sq->lock);
 		msleep(1);
 		sq->cq.mcq.comp(&sq->cq.mcq);
 		mtx_lock(&sq->lock);
 	}
 	mtx_unlock(&sq->lock);
 
 	/* error out remaining requests */
 	error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
 	if (error != 0) {
 		mlx5_en_err(sq->ifp,
 		    "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
 	}
 
 	/* wait till SQ is empty */
 	mtx_lock(&sq->lock);
 	while (sq->cc != sq->pc &&
 	       mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
 		mtx_unlock(&sq->lock);
 		msleep(1);
 		sq->cq.mcq.comp(&sq->cq.mcq);
 		mtx_lock(&sq->lock);
 	}
 	mtx_unlock(&sq->lock);
 }
 
 static void
 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
 {
 
 	mlx5e_drain_sq(sq);
 	mlx5e_disable_sq(sq);
 	mlx5e_destroy_sq(sq);
 }
 
 static int
 mlx5e_create_cq(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param,
     struct mlx5e_cq *cq,
     mlx5e_cq_comp_t *comp,
     int eq_ix)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_core_cq *mcq = &cq->mcq;
 	int eqn_not_used;
 	int irqn;
 	int err;
 	u32 i;
 
 	param->wq.buf_numa_node = 0;
 	param->wq.db_numa_node = 0;
 
 	err = mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
 	if (err)
 		return (err);
 
 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
 	    &cq->wq_ctrl);
 	if (err)
 		return (err);
 
 	mcq->cqe_sz = 64;
 	mcq->set_ci_db = cq->wq_ctrl.db.db;
 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
 	*mcq->set_ci_db = 0;
 	*mcq->arm_db = 0;
 	mcq->vector = eq_ix;
 	mcq->comp = comp;
 	mcq->event = mlx5e_cq_error_event;
 	mcq->irqn = irqn;
 	mcq->uar = &priv->cq_uar;
 
 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
 
 		cqe->op_own = 0xf1;
 	}
 
 	cq->priv = priv;
 
 	return (0);
 }
 
 static void
 mlx5e_destroy_cq(struct mlx5e_cq *cq)
 {
 	mlx5_wq_destroy(&cq->wq_ctrl);
 }
 
 static int
 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
 {
 	struct mlx5_core_cq *mcq = &cq->mcq;
 	void *in;
 	void *cqc;
 	int inlen;
 	int irqn_not_used;
 	int eqn;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 
 	memcpy(cqc, param->cqc, sizeof(param->cqc));
 
 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
 
 	mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
 
 	MLX5_SET(cqc, cqc, c_eqn, eqn);
 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
 	    PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
 
 	err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
 
 	kvfree(in);
 
 	if (err)
 		return (err);
 
 	mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
 
 	return (0);
 }
 
 static void
 mlx5e_disable_cq(struct mlx5e_cq *cq)
 {
 
 	mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
 }
 
 int
 mlx5e_open_cq(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param,
     struct mlx5e_cq *cq,
     mlx5e_cq_comp_t *comp,
     int eq_ix)
 {
 	int err;
 
 	err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
 	if (err)
 		return (err);
 
 	err = mlx5e_enable_cq(cq, param, eq_ix);
 	if (err)
 		goto err_destroy_cq;
 
 	return (0);
 
 err_destroy_cq:
 	mlx5e_destroy_cq(cq);
 
 	return (err);
 }
 
 void
 mlx5e_close_cq(struct mlx5e_cq *cq)
 {
 	mlx5e_disable_cq(cq);
 	mlx5e_destroy_cq(cq);
 }
 
 static int
 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
     struct mlx5e_channel_param *cparam)
 {
 	int err;
 	int tc;
 
 	for (tc = 0; tc < c->priv->num_tc; tc++) {
 		/* open completion queue */
 		err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
 		    &mlx5e_tx_cq_comp, c->ix);
 		if (err)
 			goto err_close_tx_cqs;
 	}
 	return (0);
 
 err_close_tx_cqs:
 	for (tc--; tc >= 0; tc--)
 		mlx5e_close_cq(&c->sq[tc].cq);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
 {
 	int tc;
 
 	for (tc = 0; tc < c->priv->num_tc; tc++)
 		mlx5e_close_cq(&c->sq[tc].cq);
 }
 
 static int
 mlx5e_open_sqs(struct mlx5e_channel *c,
     struct mlx5e_channel_param *cparam)
 {
 	int err;
 	int tc;
 
 	for (tc = 0; tc < c->priv->num_tc; tc++) {
 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
 		if (err)
 			goto err_close_sqs;
 	}
 
 	return (0);
 
 err_close_sqs:
 	for (tc--; tc >= 0; tc--)
 		mlx5e_close_sq_wait(&c->sq[tc]);
 
 	return (err);
 }
 
 static void
 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
 {
 	int tc;
 
 	for (tc = 0; tc < c->priv->num_tc; tc++)
 		mlx5e_close_sq_wait(&c->sq[tc]);
 }
 
 static void
 mlx5e_chan_static_init(struct mlx5e_priv *priv, struct mlx5e_channel *c, int ix)
 {
 	int tc;
 
 	/* setup priv and channel number */
 	c->priv = priv;
 	c->ix = ix;
 
 	/* setup send tag */
 	c->tag.type = IF_SND_TAG_TYPE_UNLIMITED;
 	m_snd_tag_init(&c->tag.m_snd_tag, c->priv->ifp);
 
 	init_completion(&c->completion);
 
 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
 
 	callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
 
 	for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) {
 		struct mlx5e_sq *sq = c->sq + tc;
 
 		mtx_init(&sq->lock, "mlx5tx",
 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
 		mtx_init(&sq->comp_lock, "mlx5comp",
 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
 
 		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
 	}
 }
 
 static void
 mlx5e_chan_wait_for_completion(struct mlx5e_channel *c)
 {
 
 	m_snd_tag_rele(&c->tag.m_snd_tag);
 	wait_for_completion(&c->completion);
 }
 
 static void
 mlx5e_priv_wait_for_completion(struct mlx5e_priv *priv, const uint32_t channels)
 {
 	uint32_t x;
 
 	for (x = 0; x != channels; x++)
 		mlx5e_chan_wait_for_completion(&priv->channel[x]);
 }
 
 static void
 mlx5e_chan_static_destroy(struct mlx5e_channel *c)
 {
 	int tc;
 
 	callout_drain(&c->rq.watchdog);
 
 	mtx_destroy(&c->rq.mtx);
 
 	for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) {
 		callout_drain(&c->sq[tc].cev_callout);
 		mtx_destroy(&c->sq[tc].lock);
 		mtx_destroy(&c->sq[tc].comp_lock);
 	}
 }
 
 static int
 mlx5e_open_channel(struct mlx5e_priv *priv,
     struct mlx5e_channel_param *cparam,
     struct mlx5e_channel *c)
 {
 	struct epoch_tracker et;
 	int i, err;
 
 	/* zero non-persistant data */
 	MLX5E_ZERO(&c->rq, mlx5e_rq_zero_start);
 	for (i = 0; i != priv->num_tc; i++)
 		MLX5E_ZERO(&c->sq[i], mlx5e_sq_zero_start);
 
 	/* open transmit completion queue */
 	err = mlx5e_open_tx_cqs(c, cparam);
 	if (err)
 		goto err_free;
 
 	/* open receive completion queue */
 	err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
 	    &mlx5e_rx_cq_comp, c->ix);
 	if (err)
 		goto err_close_tx_cqs;
 
 	err = mlx5e_open_sqs(c, cparam);
 	if (err)
 		goto err_close_rx_cq;
 
 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
 	if (err)
 		goto err_close_sqs;
 
 	/* poll receive queue initially */
 	NET_EPOCH_ENTER(et);
 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
 	NET_EPOCH_EXIT(et);
 
 	return (0);
 
 err_close_sqs:
 	mlx5e_close_sqs_wait(c);
 
 err_close_rx_cq:
 	mlx5e_close_cq(&c->rq.cq);
 
 err_close_tx_cqs:
 	mlx5e_close_tx_cqs(c);
 
 err_free:
 	return (err);
 }
 
 static void
 mlx5e_close_channel(struct mlx5e_channel *c)
 {
 	mlx5e_close_rq(&c->rq);
 }
 
 static void
 mlx5e_close_channel_wait(struct mlx5e_channel *c)
 {
 	mlx5e_close_rq_wait(&c->rq);
 	mlx5e_close_sqs_wait(c);
 	mlx5e_close_tx_cqs(c);
 }
 
 static int
 mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
 {
 	u32 r, n;
 
 	r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
 	    MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
 	if (r > MJUM16BYTES)
 		return (-ENOMEM);
 
 	if (r > MJUM9BYTES)
 		r = MJUM16BYTES;
 	else if (r > MJUMPAGESIZE)
 		r = MJUM9BYTES;
 	else if (r > MCLBYTES)
 		r = MJUMPAGESIZE;
 	else
 		r = MCLBYTES;
 
 	/*
 	 * n + 1 must be a power of two, because stride size must be.
 	 * Stride size is 16 * (n + 1), as the first segment is
 	 * control.
 	 */
 	for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
 		;
 
 	if (n > MLX5E_MAX_BUSDMA_RX_SEGS)
 		return (-ENOMEM);
 
 	*wqe_sz = r;
 	*nsegs = n;
 	return (0);
 }
 
 static void
 mlx5e_build_rq_param(struct mlx5e_priv *priv,
     struct mlx5e_rq_param *param)
 {
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	u32 wqe_sz, nsegs;
 
 	mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
 	    nsegs * sizeof(struct mlx5_wqe_data_seg)));
 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
 	MLX5_SET(wq, wq, pd, priv->pdn);
 
 	param->wq.buf_numa_node = 0;
 	param->wq.db_numa_node = 0;
 	param->wq.linear = 1;
 }
 
 static void
 mlx5e_build_sq_param(struct mlx5e_priv *priv,
     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
 	MLX5_SET(wq, wq, pd, priv->pdn);
 
 	param->wq.buf_numa_node = 0;
 	param->wq.db_numa_node = 0;
 	param->wq.linear = 1;
 }
 
 static void
 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
 }
 
 static void
 mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr)
 {
 
 	*ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
 
 	/* apply LRO restrictions */
 	if (priv->params.hw_lro_en &&
 	    ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
 		ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
 	}
 }
 
 static void
 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
 	struct net_dim_cq_moder curr;
 	void *cqc = param->cqc;
 
 	/*
 	 * We use MLX5_CQE_FORMAT_HASH because the RX hash mini CQE
 	 * format is more beneficial for FreeBSD use case.
 	 *
 	 * Adding support for MLX5_CQE_FORMAT_CSUM will require changes
 	 * in mlx5e_decompress_cqe.
 	 */
 	if (priv->params.cqe_zipping_en) {
 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_HASH);
 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
 	}
 
 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
 
 	switch (priv->params.rx_cq_moderation_mode) {
 	case 0:
 		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
 		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	case 1:
 		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
 		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 		else
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	case 2:
 		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
 		MLX5_SET(cqc, cqc, cq_period, curr.usec);
 		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	case 3:
 		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
 		MLX5_SET(cqc, cqc, cq_period, curr.usec);
 		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 		else
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	default:
 		break;
 	}
 
 	mlx5e_dim_build_cq_param(priv, param);
 
 	mlx5e_build_common_cq_param(priv, param);
 }
 
 static void
 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
 
 	switch (priv->params.tx_cq_moderation_mode) {
 	case 0:
 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	default:
 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 		else
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	}
 
 	mlx5e_build_common_cq_param(priv, param);
 }
 
 static void
 mlx5e_build_channel_param(struct mlx5e_priv *priv,
     struct mlx5e_channel_param *cparam)
 {
 	memset(cparam, 0, sizeof(*cparam));
 
 	mlx5e_build_rq_param(priv, &cparam->rq);
 	mlx5e_build_sq_param(priv, &cparam->sq);
 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
 }
 
 static int
 mlx5e_open_channels(struct mlx5e_priv *priv)
 {
 	struct mlx5e_channel_param *cparam;
 	int err;
 	int i;
 	int j;
 
 	cparam = malloc(sizeof(*cparam), M_MLX5EN, M_WAITOK);
 
 	mlx5e_build_channel_param(priv, cparam);
 	for (i = 0; i < priv->params.num_channels; i++) {
 		err = mlx5e_open_channel(priv, cparam, &priv->channel[i]);
 		if (err)
 			goto err_close_channels;
 	}
 
 	for (j = 0; j < priv->params.num_channels; j++) {
 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq);
 		if (err)
 			goto err_close_channels;
 	}
 	free(cparam, M_MLX5EN);
 	return (0);
 
 err_close_channels:
 	while (i--) {
 		mlx5e_close_channel(&priv->channel[i]);
 		mlx5e_close_channel_wait(&priv->channel[i]);
 	}
 	free(cparam, M_MLX5EN);
 	return (err);
 }
 
 static void
 mlx5e_close_channels(struct mlx5e_priv *priv)
 {
 	int i;
 
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_close_channel(&priv->channel[i]);
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_close_channel_wait(&priv->channel[i]);
 }
 
 static int
 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
 {
 
 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
 		uint8_t cq_mode;
 
 		switch (priv->params.tx_cq_moderation_mode) {
 		case 0:
 		case 2:
 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 			break;
 		default:
 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
 			break;
 		}
 
 		return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
 		    priv->params.tx_cq_moderation_usec,
 		    priv->params.tx_cq_moderation_pkts,
 		    cq_mode));
 	}
 
 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
 	    priv->params.tx_cq_moderation_usec,
 	    priv->params.tx_cq_moderation_pkts));
 }
 
 static int
 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
 {
 
 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
 		uint8_t cq_mode;
 		uint8_t dim_mode;
 		int retval;
 
 		switch (priv->params.rx_cq_moderation_mode) {
 		case 0:
 		case 2:
 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 			break;
 		default:
 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
 			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 			break;
 		}
 
 		/* tear down dynamic interrupt moderation */
 		mtx_lock(&rq->mtx);
 		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
 		mtx_unlock(&rq->mtx);
 
 		/* wait for dynamic interrupt moderation work task, if any */
 		cancel_work_sync(&rq->dim.work);
 
 		if (priv->params.rx_cq_moderation_mode >= 2) {
 			struct net_dim_cq_moder curr;
 
 			mlx5e_get_default_profile(priv, dim_mode, &curr);
 
 			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
 			    curr.usec, curr.pkts, cq_mode);
 
 			/* set dynamic interrupt moderation mode and zero defaults */
 			mtx_lock(&rq->mtx);
 			rq->dim.mode = dim_mode;
 			rq->dim.state = 0;
 			rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
 			mtx_unlock(&rq->mtx);
 		} else {
 			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
 			    priv->params.rx_cq_moderation_usec,
 			    priv->params.rx_cq_moderation_pkts,
 			    cq_mode);
 		}
 		return (retval);
 	}
 
 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
 	    priv->params.rx_cq_moderation_usec,
 	    priv->params.rx_cq_moderation_pkts));
 }
 
 static int
 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
 {
 	int err;
 	int i;
 
 	err = mlx5e_refresh_rq_params(priv, &c->rq);
 	if (err)
 		goto done;
 
 	for (i = 0; i != priv->num_tc; i++) {
 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
 		if (err)
 			goto done;
 	}
 done:
 	return (err);
 }
 
 int
 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
 {
 	int i;
 
 	/* check if channels are closed */
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 		return (EINVAL);
 
 	for (i = 0; i < priv->params.num_channels; i++) {
 		int err;
 
 		err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]);
 		if (err)
 			return (err);
 	}
 	return (0);
 }
 
 static int
 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(tisc, tisc, prio, tc);
 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
 
 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
 }
 
 static void
 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
 {
 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
 
 static int
 mlx5e_open_tises(struct mlx5e_priv *priv)
 {
 	int num_tc = priv->num_tc;
 	int err;
 	int tc;
 
 	for (tc = 0; tc < num_tc; tc++) {
 		err = mlx5e_open_tis(priv, tc);
 		if (err)
 			goto err_close_tises;
 	}
 
 	return (0);
 
 err_close_tises:
 	for (tc--; tc >= 0; tc--)
 		mlx5e_close_tis(priv, tc);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tises(struct mlx5e_priv *priv)
 {
 	int num_tc = priv->num_tc;
 	int tc;
 
 	for (tc = 0; tc < num_tc; tc++)
 		mlx5e_close_tis(priv, tc);
 }
 
 static int
 mlx5e_open_rqt(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 *in;
 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
 	void *rqtc;
 	int inlen;
 	int err;
 	int sz;
 	int i;
 
 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
 
 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 
 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 
 	for (i = 0; i < sz; i++) {
 		int ix = i;
 #ifdef RSS
 		ix = rss_get_indirection_to_bucket(ix);
 #endif
 		/* ensure we don't overflow */
 		ix %= priv->params.num_channels;
 
 		/* apply receive side scaling stride, if any */
 		ix -= ix % (int)priv->params.channels_rsss;
 
 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn);
 	}
 
 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
 
 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 	if (!err)
 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static void
 mlx5e_close_rqt(struct mlx5e_priv *priv)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
 
 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
 
 	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
 }
 
 static void
 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
 {
 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 	__be32 *hkey;
 
 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
 
 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
 
 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 			  MLX5_HASH_FIELD_SEL_DST_IP)
 
 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
 
 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
 
 	if (priv->params.hw_lro_en) {
 		MLX5_SET(tirc, tirc, lro_enable_mask,
 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
 		    (priv->params.lro_wqe_sz -
 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
 		/* TODO: add the option to choose timer value dynamically */
 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
 		    MLX5_CAP_ETH(priv->mdev,
 		    lro_timer_supported_periods[2]));
 	}
 
 	/* setup parameters for hashing TIR type, if any */
 	switch (tt) {
 	case MLX5E_TT_ANY:
 		MLX5_SET(tirc, tirc, disp_type,
 		    MLX5_TIRC_DISP_TYPE_DIRECT);
 		MLX5_SET(tirc, tirc, inline_rqn,
 		    priv->channel[0].rq.rqn);
 		break;
 	default:
 		MLX5_SET(tirc, tirc, disp_type,
 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
 		MLX5_SET(tirc, tirc, indirect_table,
 		    priv->rqtn);
 		MLX5_SET(tirc, tirc, rx_hash_fn,
 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
 #ifdef RSS
 		/*
 		 * The FreeBSD RSS implementation does currently not
 		 * support symmetric Toeplitz hashes:
 		 */
 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
 		rss_getkey((uint8_t *)hkey);
 #else
 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
 		hkey[0] = cpu_to_be32(0xD181C62C);
 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
 		hkey[2] = cpu_to_be32(0x1983A2FC);
 		hkey[3] = cpu_to_be32(0x943E1ADB);
 		hkey[4] = cpu_to_be32(0xD9389E6B);
 		hkey[5] = cpu_to_be32(0xD1039C2C);
 		hkey[6] = cpu_to_be32(0xA74499AD);
 		hkey[7] = cpu_to_be32(0x593D56D9);
 		hkey[8] = cpu_to_be32(0xF3253C06);
 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
 #endif
 		break;
 	}
 
 	switch (tt) {
 	case MLX5E_TT_IPV4_TCP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_TCP);
 #ifdef RSS
 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 			    MLX5_HASH_IP);
 		} else
 #endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV6_TCP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_TCP);
 #ifdef RSS
 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 			    MLX5_HASH_IP);
 		} else
 #endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV4_UDP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_UDP);
 #ifdef RSS
 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 			    MLX5_HASH_IP);
 		} else
 #endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV6_UDP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_UDP);
 #ifdef RSS
 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 			    MLX5_HASH_IP);
 		} else
 #endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV4_IPSEC_AH:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV6_IPSEC_AH:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV4_IPSEC_ESP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV6_IPSEC_ESP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV4:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP);
 		break;
 
 	case MLX5E_TT_IPV6:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP);
 		break;
 
 	default:
 		break;
 	}
 }
 
 static int
 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 *in;
 	void *tirc;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
 
 	mlx5e_build_tir_ctx(priv, tirc, tt);
 
 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
 {
 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
 }
 
 static int
 mlx5e_open_tirs(struct mlx5e_priv *priv)
 {
 	int err;
 	int i;
 
 	for (i = 0; i < MLX5E_NUM_TT; i++) {
 		err = mlx5e_open_tir(priv, i);
 		if (err)
 			goto err_close_tirs;
 	}
 
 	return (0);
 
 err_close_tirs:
 	for (i--; i >= 0; i--)
 		mlx5e_close_tir(priv, i);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tirs(struct mlx5e_priv *priv)
 {
 	int i;
 
 	for (i = 0; i < MLX5E_NUM_TT; i++)
 		mlx5e_close_tir(priv, i);
 }
 
 /*
  * SW MTU does not include headers,
  * HW MTU includes all headers and checksums.
  */
 static int
 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int hw_mtu;
 	int err;
 
 	hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
 
 	err = mlx5_set_port_mtu(mdev, hw_mtu);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5_set_port_mtu failed setting %d, err=%d\n",
 		    sw_mtu, err);
 		return (err);
 	}
 
 	/* Update vport context MTU */
 	err = mlx5_set_vport_mtu(mdev, hw_mtu);
 	if (err) {
 		mlx5_en_err(ifp,
 		    "Failed updating vport context with MTU size, err=%d\n",
 		    err);
 	}
 
 	ifp->if_mtu = sw_mtu;
 
 	err = mlx5_query_vport_mtu(mdev, &hw_mtu);
 	if (err || !hw_mtu) {
 		/* fallback to port oper mtu */
 		err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
 	}
 	if (err) {
 		mlx5_en_err(ifp,
 		    "Query port MTU, after setting new MTU value, failed\n");
 		return (err);
 	} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
 		err = -E2BIG,
 		mlx5_en_err(ifp,
 		    "Port MTU %d is smaller than ifp mtu %d\n",
 		    hw_mtu, sw_mtu);
 	} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
 		err = -EINVAL;
                 mlx5_en_err(ifp,
 		    "Port MTU %d is bigger than ifp mtu %d\n",
 		    hw_mtu, sw_mtu);
 	}
 	priv->params_ethtool.hw_mtu = hw_mtu;
 
 	/* compute MSB */
 	while (hw_mtu & (hw_mtu - 1))
 		hw_mtu &= (hw_mtu - 1);
 	priv->params_ethtool.hw_mtu_msb = hw_mtu;
 
 	return (err);
 }
 
 int
 mlx5e_open_locked(struct ifnet *ifp)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 	int err;
 	u16 set_id;
 
 	/* check if already opened */
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
 		return (0);
 
 #ifdef RSS
 	if (rss_getnumbuckets() > priv->params.num_channels) {
 		mlx5_en_info(ifp,
 		    "NOTE: There are more RSS buckets(%u) than channels(%u) available\n",
 		    rss_getnumbuckets(), priv->params.num_channels);
 	}
 #endif
 	err = mlx5e_open_tises(priv);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5e_open_tises failed, %d\n", err);
 		return (err);
 	}
 	err = mlx5_vport_alloc_q_counter(priv->mdev,
 	    MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
 	if (err) {
 		mlx5_en_err(priv->ifp,
 		    "mlx5_vport_alloc_q_counter failed: %d\n", err);
 		goto err_close_tises;
 	}
 	/* store counter set ID */
 	priv->counter_set_id = set_id;
 
 	err = mlx5e_open_channels(priv);
 	if (err) {
 		mlx5_en_err(ifp,
 		    "mlx5e_open_channels failed, %d\n", err);
 		goto err_dalloc_q_counter;
 	}
 	err = mlx5e_open_rqt(priv);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5e_open_rqt failed, %d\n", err);
 		goto err_close_channels;
 	}
 	err = mlx5e_open_tirs(priv);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5e_open_tir failed, %d\n", err);
 		goto err_close_rqls;
 	}
 	err = mlx5e_open_flow_table(priv);
 	if (err) {
 		mlx5_en_err(ifp,
 		    "mlx5e_open_flow_table failed, %d\n", err);
 		goto err_close_tirs;
 	}
 	err = mlx5e_add_all_vlan_rules(priv);
 	if (err) {
 		mlx5_en_err(ifp,
 		    "mlx5e_add_all_vlan_rules failed, %d\n", err);
 		goto err_close_flow_table;
 	}
 	set_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	mlx5e_update_carrier(priv);
 	mlx5e_set_rx_mode_core(priv);
 
 	return (0);
 
 err_close_flow_table:
 	mlx5e_close_flow_table(priv);
 
 err_close_tirs:
 	mlx5e_close_tirs(priv);
 
 err_close_rqls:
 	mlx5e_close_rqt(priv);
 
 err_close_channels:
 	mlx5e_close_channels(priv);
 
 err_dalloc_q_counter:
 	mlx5_vport_dealloc_q_counter(priv->mdev,
 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
 
 err_close_tises:
 	mlx5e_close_tises(priv);
 
 	return (err);
 }
 
 static void
 mlx5e_open(void *arg)
 {
 	struct mlx5e_priv *priv = arg;
 
 	PRIV_LOCK(priv);
 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
 		mlx5_en_err(priv->ifp,
 		    "Setting port status to up failed\n");
 
 	mlx5e_open_locked(priv->ifp);
 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	PRIV_UNLOCK(priv);
 }
 
 int
 mlx5e_close_locked(struct ifnet *ifp)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 
 	/* check if already closed */
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 		return (0);
 
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	mlx5e_set_rx_mode_core(priv);
 	mlx5e_del_all_vlan_rules(priv);
 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
 	mlx5e_close_flow_table(priv);
 	mlx5e_close_tirs(priv);
 	mlx5e_close_rqt(priv);
 	mlx5e_close_channels(priv);
 	mlx5_vport_dealloc_q_counter(priv->mdev,
 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
 	mlx5e_close_tises(priv);
 
 	return (0);
 }
 
 #if (__FreeBSD_version >= 1100000)
 static uint64_t
 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 	u64 retval;
 
 	/* PRIV_LOCK(priv); XXX not allowed */
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		retval = priv->stats.vport.rx_packets;
 		break;
 	case IFCOUNTER_IERRORS:
 		retval = priv->stats.pport.in_range_len_errors +
 		    priv->stats.pport.out_of_range_len +
 		    priv->stats.pport.too_long_errors +
 		    priv->stats.pport.check_seq_err +
 		    priv->stats.pport.alignment_err;
 		break;
 	case IFCOUNTER_IQDROPS:
 		retval = priv->stats.vport.rx_out_of_buffer;
 		break;
 	case IFCOUNTER_OPACKETS:
 		retval = priv->stats.vport.tx_packets;
 		break;
 	case IFCOUNTER_OERRORS:
 		retval = priv->stats.port_stats_debug.out_discards;
 		break;
 	case IFCOUNTER_IBYTES:
 		retval = priv->stats.vport.rx_bytes;
 		break;
 	case IFCOUNTER_OBYTES:
 		retval = priv->stats.vport.tx_bytes;
 		break;
 	case IFCOUNTER_IMCASTS:
 		retval = priv->stats.vport.rx_multicast_packets;
 		break;
 	case IFCOUNTER_OMCASTS:
 		retval = priv->stats.vport.tx_multicast_packets;
 		break;
 	case IFCOUNTER_OQDROPS:
 		retval = priv->stats.vport.tx_queue_dropped;
 		break;
 	case IFCOUNTER_COLLISIONS:
 		retval = priv->stats.pport.collisions;
 		break;
 	default:
 		retval = if_get_counter_default(ifp, cnt);
 		break;
 	}
 	/* PRIV_UNLOCK(priv); XXX not allowed */
 	return (retval);
 }
 #endif
 
 static void
 mlx5e_set_rx_mode(struct ifnet *ifp)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 }
 
 static int
 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct mlx5e_priv *priv;
 	struct ifreq *ifr;
 	struct ifi2creq i2c;
 	int error = 0;
 	int mask = 0;
 	int size_read = 0;
 	int module_status;
 	int module_num;
 	int max_mtu;
 	uint8_t read_addr;
 
 	priv = ifp->if_softc;
 
 	/* check if detaching */
 	if (priv == NULL || priv->gone != 0)
 		return (ENXIO);
 
 	switch (command) {
 	case SIOCSIFMTU:
 		ifr = (struct ifreq *)data;
 
 		PRIV_LOCK(priv);
 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
 
 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
 			int was_opened;
 
 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 			if (was_opened)
 				mlx5e_close_locked(ifp);
 
 			/* set new MTU */
 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
 
 			if (was_opened)
 				mlx5e_open_locked(ifp);
 		} else {
 			error = EINVAL;
 			mlx5_en_err(ifp,
 			    "Invalid MTU value. Min val: %d, Max val: %d\n",
 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
 		}
 		PRIV_UNLOCK(priv);
 		break;
 	case SIOCSIFFLAGS:
 		if ((ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			mlx5e_set_rx_mode(ifp);
 			break;
 		}
 		PRIV_LOCK(priv);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 					mlx5e_open_locked(ifp);
 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				mlx5_set_port_status(priv->mdev,
 				    MLX5_PORT_DOWN);
 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
 					mlx5e_close_locked(ifp);
 				mlx5e_update_carrier(priv);
 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			}
 		}
 		PRIV_UNLOCK(priv);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		mlx5e_set_rx_mode(ifp);
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 	case SIOCGIFXMEDIA:
 		ifr = (struct ifreq *)data;
 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
 		break;
 	case SIOCSIFCAP:
 		ifr = (struct ifreq *)data;
 		PRIV_LOCK(priv);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 
 			if (IFCAP_TSO4 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				ifp->if_hwassist &= ~CSUM_IP_TSO;
 				mlx5_en_err(ifp,
 				    "tso4 disabled due to -txcsum.\n");
 			}
 		}
 		if (mask & IFCAP_TXCSUM_IPV6) {
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 
 			if (IFCAP_TSO6 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO6;
 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 				mlx5_en_err(ifp,
 				    "tso6 disabled due to -txcsum6.\n");
 			}
 		}
 		if (mask & IFCAP_NOMAP)
 			ifp->if_capenable ^= IFCAP_NOMAP;
 		if (mask & IFCAP_TXTLS4)
 			ifp->if_capenable ^= IFCAP_TXTLS4;
 		if (mask & IFCAP_TXTLS6)
 			ifp->if_capenable ^= IFCAP_TXTLS6;
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if (mask & IFCAP_RXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 		if (mask & IFCAP_TSO4) {
 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				mlx5_en_err(ifp, "enable txcsum first.\n");
 				error = EAGAIN;
 				goto out;
 			}
 			ifp->if_capenable ^= IFCAP_TSO4;
 			ifp->if_hwassist ^= CSUM_IP_TSO;
 		}
 		if (mask & IFCAP_TSO6) {
 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				mlx5_en_err(ifp, "enable txcsum6 first.\n");
 				error = EAGAIN;
 				goto out;
 			}
 			ifp->if_capenable ^= IFCAP_TSO6;
 			ifp->if_hwassist ^= CSUM_IP6_TSO;
 		}
 		if (mask & IFCAP_VLAN_HWFILTER) {
 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 				mlx5e_disable_vlan_filter(priv);
 			else
 				mlx5e_enable_vlan_filter(priv);
 
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 		}
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 		if (mask & IFCAP_WOL_MAGIC)
 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
 
 		VLAN_CAPABILITIES(ifp);
 		/* turn off LRO means also turn of HW LRO - if it's on */
 		if (mask & IFCAP_LRO) {
 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 			bool need_restart = false;
 
 			ifp->if_capenable ^= IFCAP_LRO;
 
 			/* figure out if updating HW LRO is needed */
 			if (!(ifp->if_capenable & IFCAP_LRO)) {
 				if (priv->params.hw_lro_en) {
 					priv->params.hw_lro_en = false;
 					need_restart = true;
 				}
 			} else {
 				if (priv->params.hw_lro_en == false &&
 				    priv->params_ethtool.hw_lro != 0) {
 					priv->params.hw_lro_en = true;
 					need_restart = true;
 				}
 			}
 			if (was_opened && need_restart) {
 				mlx5e_close_locked(ifp);
 				mlx5e_open_locked(ifp);
 			}
 		}
 		if (mask & IFCAP_HWRXTSTMP) {
 			ifp->if_capenable ^= IFCAP_HWRXTSTMP;
 			if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
 				if (priv->clbr_done == 0)
 					mlx5e_reset_calibration_callout(priv);
 			} else {
 				callout_drain(&priv->tstmp_clbr);
 				priv->clbr_done = 0;
 			}
 		}
 out:
 		PRIV_UNLOCK(priv);
 		break;
 
 	case SIOCGI2C:
 		ifr = (struct ifreq *)data;
 
 		/*
 		 * Copy from the user-space address ifr_data to the
 		 * kernel-space address i2c
 		 */
 		error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
 		if (error)
 			break;
 
 		if (i2c.len > sizeof(i2c.data)) {
 			error = EINVAL;
 			break;
 		}
 
 		PRIV_LOCK(priv);
 		/* Get module_num which is required for the query_eeprom */
 		error = mlx5_query_module_num(priv->mdev, &module_num);
 		if (error) {
 			mlx5_en_err(ifp,
 			    "Query module num failed, eeprom reading is not supported\n");
 			error = EINVAL;
 			goto err_i2c;
 		}
 		/* Check if module is present before doing an access */
 		module_status = mlx5_query_module_status(priv->mdev, module_num);
 		if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) {
 			error = EINVAL;
 			goto err_i2c;
 		}
 		/*
 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
 		 * The internal conversion is as follows:
 		 */
 		if (i2c.dev_addr == 0xA0)
 			read_addr = MLX5_I2C_ADDR_LOW;
 		else if (i2c.dev_addr == 0xA2)
 			read_addr = MLX5_I2C_ADDR_HIGH;
 		else {
 			mlx5_en_err(ifp,
 			    "Query eeprom failed, Invalid Address: %X\n",
 			    i2c.dev_addr);
 			error = EINVAL;
 			goto err_i2c;
 		}
 		error = mlx5_query_eeprom(priv->mdev,
 		    read_addr, MLX5_EEPROM_LOW_PAGE,
 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
 		    (uint32_t *)i2c.data, &size_read);
 		if (error) {
 			mlx5_en_err(ifp,
 			    "Query eeprom failed, eeprom reading is not supported\n");
 			error = EINVAL;
 			goto err_i2c;
 		}
 
 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
 			error = mlx5_query_eeprom(priv->mdev,
 			    read_addr, MLX5_EEPROM_LOW_PAGE,
 			    (uint32_t)(i2c.offset + size_read),
 			    (uint32_t)(i2c.len - size_read), module_num,
 			    (uint32_t *)(i2c.data + size_read), &size_read);
 		}
 		if (error) {
 			mlx5_en_err(ifp,
 			    "Query eeprom failed, eeprom reading is not supported\n");
 			error = EINVAL;
 			goto err_i2c;
 		}
 
 		error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
 err_i2c:
 		PRIV_UNLOCK(priv);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 	return (error);
 }
 
 static int
 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 {
 	/*
 	 * TODO: uncoment once FW really sets all these bits if
 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
 	 * -ENOTSUPP;
 	 */
 
 	/* TODO: add more must-to-have features */
 
 	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return (-ENODEV);
 
 	return (0);
 }
 
 static u16
 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
 {
 	uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
 
 	bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
 
 	/* verify against driver hardware limit */
 	if (bf_buf_size > MLX5E_MAX_TX_INLINE)
 		bf_buf_size = MLX5E_MAX_TX_INLINE;
 
 	return (bf_buf_size);
 }
 
 static int
 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
     struct mlx5e_priv *priv,
     int num_comp_vectors)
 {
 	int err;
 
 	/*
 	 * TODO: Consider link speed for setting "log_sq_size",
 	 * "log_rq_size" and "cq_moderation_xxx":
 	 */
 	priv->params.log_sq_size =
 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 	priv->params.log_rq_size =
 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 	priv->params.rx_cq_moderation_usec =
 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
 	priv->params.rx_cq_moderation_mode =
 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
 	priv->params.rx_cq_moderation_pkts =
 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
 	priv->params.tx_cq_moderation_usec =
 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
 	priv->params.tx_cq_moderation_pkts =
 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
 	priv->params.min_rx_wqes =
 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
 	priv->params.rx_hash_log_tbl_sz =
 	    (order_base_2(num_comp_vectors) >
 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
 	    order_base_2(num_comp_vectors) :
 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
 	priv->params.num_tc = 1;
 	priv->params.default_vlan_prio = 0;
 	priv->counter_set_id = -1;
 	priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
 
 	err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
 	if (err)
 		return (err);
 
 	/*
 	 * hw lro is currently defaulted to off. when it won't anymore we
 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
 	 */
 	priv->params.hw_lro_en = false;
 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 
 	/*
 	 * CQE zipping is currently defaulted to off. when it won't
 	 * anymore we will consider the HW capability:
 	 * "!!MLX5_CAP_GEN(mdev, cqe_compression)"
 	 */
 	priv->params.cqe_zipping_en = false;
 
 	priv->mdev = mdev;
 	priv->params.num_channels = num_comp_vectors;
 	priv->params.channels_rsss = 1;
 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
 	priv->queue_mapping_channel_mask =
 	    roundup_pow_of_two(num_comp_vectors) - 1;
 	priv->num_tc = priv->params.num_tc;
 	priv->default_vlan_prio = priv->params.default_vlan_prio;
 
 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
 
 	return (0);
 }
 
 static int
 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
 		  struct mlx5_core_mr *mkey)
 {
 	struct ifnet *ifp = priv->ifp;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	void *mkc;
 	u32 *in;
 	int err;
 
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL) {
 		mlx5_en_err(ifp, "failed to allocate inbox\n");
 		return (-ENOMEM);
 	}
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
 	MLX5_SET(mkc, mkc, umr_en, 1);	/* used by HW TLS */
 	MLX5_SET(mkc, mkc, lw, 1);
 	MLX5_SET(mkc, mkc, lr, 1);
 
 	MLX5_SET(mkc, mkc, pd, pdn);
 	MLX5_SET(mkc, mkc, length64, 1);
 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
 
 	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
 	if (err)
 		mlx5_en_err(ifp, "mlx5_core_create_mkey failed, %d\n",
 		    err);
 
 	kvfree(in);
 	return (err);
 }
 
 static const char *mlx5e_vport_stats_desc[] = {
 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
 };
 
 static const char *mlx5e_pport_stats_desc[] = {
 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
 };
 
 static void
 mlx5e_priv_static_init(struct mlx5e_priv *priv, const uint32_t channels)
 {
 	uint32_t x;
 
 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
 	sx_init(&priv->state_lock, "mlx5state");
 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
 	MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
 	for (x = 0; x != channels; x++)
 		mlx5e_chan_static_init(priv, &priv->channel[x], x);
 }
 
 static void
 mlx5e_priv_static_destroy(struct mlx5e_priv *priv, const uint32_t channels)
 {
 	uint32_t x;
 
 	for (x = 0; x != channels; x++)
 		mlx5e_chan_static_destroy(&priv->channel[x]);
 	callout_drain(&priv->watchdog);
 	mtx_destroy(&priv->async_events_mtx);
 	sx_destroy(&priv->state_lock);
 }
 
 static int
 sysctl_firmware(SYSCTL_HANDLER_ARGS)
 {
 	/*
 	 * %d.%d%.d the string format.
 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
 	 * We need at most 5 chars to store that.
 	 * It also has: two "." and NULL at the end, which means we need 18
 	 * (5*3 + 3) chars at most.
 	 */
 	char fw[18];
 	struct mlx5e_priv *priv = arg1;
 	int error;
 
 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
 	    fw_rev_sub(priv->mdev));
 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
 	return (error);
 }
 
 static void
 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
 {
 	int i;
 
 	for (i = 0; i < ch->priv->num_tc; i++)
 		mlx5e_drain_sq(&ch->sq[i]);
 }
 
 static void
 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
 {
 
 	sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
 	sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
 	mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
 	sq->doorbell.d64 = 0;
 }
 
 void
 mlx5e_resume_sq(struct mlx5e_sq *sq)
 {
 	int err;
 
 	/* check if already enabled */
 	if (READ_ONCE(sq->running) != 0)
 		return;
 
 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
 	    MLX5_SQC_STATE_RST);
 	if (err != 0) {
 		mlx5_en_err(sq->ifp,
 		    "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
 	}
 
 	sq->cc = 0;
 	sq->pc = 0;
 
 	/* reset doorbell prior to moving from RST to RDY */
 	mlx5e_reset_sq_doorbell_record(sq);
 
 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
 	    MLX5_SQC_STATE_RDY);
 	if (err != 0) {
 		mlx5_en_err(sq->ifp,
 		    "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
 	}
 
 	sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
 	WRITE_ONCE(sq->running, 1);
 }
 
 static void
 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
 {
         int i;
 
 	for (i = 0; i < ch->priv->num_tc; i++)
 		mlx5e_resume_sq(&ch->sq[i]);
 }
 
 static void
 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
 {
 	struct mlx5e_rq *rq = &ch->rq;
 	struct epoch_tracker et;
 	int err;
 
 	mtx_lock(&rq->mtx);
 	rq->enabled = 0;
 	callout_stop(&rq->watchdog);
 	mtx_unlock(&rq->mtx);
 
 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
 	if (err != 0) {
 		mlx5_en_err(rq->ifp,
 		    "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
 	}
 
 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
 		msleep(1);
 		NET_EPOCH_ENTER(et);
 		rq->cq.mcq.comp(&rq->cq.mcq);
 		NET_EPOCH_EXIT(et);
 	}
 
 	/*
 	 * Transitioning into RST state will allow the FW to track less ERR state queues,
 	 * thus reducing the recv queue flushing time
 	 */
 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
 	if (err != 0) {
 		mlx5_en_err(rq->ifp,
 		    "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
 	}
 }
 
 static void
 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
 {
 	struct mlx5e_rq *rq = &ch->rq;
 	struct epoch_tracker et;
 	int err;
 
 	rq->wq.wqe_ctr = 0;
 	mlx5_wq_ll_update_db_record(&rq->wq);
 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 	if (err != 0) {
 		mlx5_en_err(rq->ifp,
 		    "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
         }
 
 	rq->enabled = 1;
 
 	NET_EPOCH_ENTER(et);
 	rq->cq.mcq.comp(&rq->cq.mcq);
 	NET_EPOCH_EXIT(et);
 }
 
 void
 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
 {
 	int i;
 
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 		return;
 
 	for (i = 0; i < priv->params.num_channels; i++) {
 		if (value)
 			mlx5e_disable_tx_dma(&priv->channel[i]);
 		else
 			mlx5e_enable_tx_dma(&priv->channel[i]);
 	}
 }
 
 void
 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
 {
 	int i;
 
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 		return;
 
 	for (i = 0; i < priv->params.num_channels; i++) {
 		if (value)
 			mlx5e_disable_rx_dma(&priv->channel[i]);
 		else
 			mlx5e_enable_rx_dma(&priv->channel[i]);
 	}
 }
 
 static void
 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
 {
 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
 	    sysctl_firmware, "A", "HCA firmware version");
 
 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
 	    "Board ID");
 }
 
 static int
 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
 {
 	struct mlx5e_priv *priv = arg1;
 	uint8_t temp[MLX5E_MAX_PRIORITY];
 	uint32_t tx_pfc;
 	int err;
 	int i;
 
 	PRIV_LOCK(priv);
 
 	tx_pfc = priv->params.tx_priority_flow_control;
 
 	for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
 		temp[i] = (tx_pfc >> i) & 1;
 
 	err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
 	if (err || !req->newptr)
 		goto done;
 	err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
 	if (err)
 		goto done;
 
 	priv->params.tx_priority_flow_control = 0;
 
 	/* range check input value */
 	for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
 		if (temp[i] > 1) {
 			err = ERANGE;
 			goto done;
 		}
 		priv->params.tx_priority_flow_control |= (temp[i] << i);
 	}
 
 	/* check if update is required */
 	if (tx_pfc != priv->params.tx_priority_flow_control)
 		err = -mlx5e_set_port_pfc(priv);
 done:
 	if (err != 0)
 		priv->params.tx_priority_flow_control= tx_pfc;
 	PRIV_UNLOCK(priv);
 
 	return (err);
 }
 
 static int
 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
 {
 	struct mlx5e_priv *priv = arg1;
 	uint8_t temp[MLX5E_MAX_PRIORITY];
 	uint32_t rx_pfc;
 	int err;
 	int i;
 
 	PRIV_LOCK(priv);
 
 	rx_pfc = priv->params.rx_priority_flow_control;
 
 	for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
 		temp[i] = (rx_pfc >> i) & 1;
 
 	err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
 	if (err || !req->newptr)
 		goto done;
 	err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
 	if (err)
 		goto done;
 
 	priv->params.rx_priority_flow_control = 0;
 
 	/* range check input value */
 	for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
 		if (temp[i] > 1) {
 			err = ERANGE;
 			goto done;
 		}
 		priv->params.rx_priority_flow_control |= (temp[i] << i);
 	}
 
 	/* check if update is required */
 	if (rx_pfc != priv->params.rx_priority_flow_control) {
 		err = -mlx5e_set_port_pfc(priv);
 		if (err == 0 && priv->sw_is_port_buf_owner)
 			err = mlx5e_update_buf_lossy(priv);
 	}
 done:
 	if (err != 0)
 		priv->params.rx_priority_flow_control= rx_pfc;
 	PRIV_UNLOCK(priv);
 
 	return (err);
 }
 
 static void
 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
 {
 #if (__FreeBSD_version < 1100000)
 	char path[96];
 #endif
 	int error;
 
 	/* enable pauseframes by default */
 	priv->params.tx_pauseframe_control = 1;
 	priv->params.rx_pauseframe_control = 1;
 
 	/* disable ports flow control, PFC, by default */
 	priv->params.tx_priority_flow_control = 0;
 	priv->params.rx_priority_flow_control = 0;
 
 #if (__FreeBSD_version < 1100000)
 	/* compute path for sysctl */
 	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
 
 	/* try to fetch tunable, if any */
 	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
 
 	/* compute path for sysctl */
 	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
 
 	/* try to fetch tunable, if any */
 	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
 #endif
 
 	/* register pauseframe SYSCTLs */
 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
 	    &priv->params.tx_pauseframe_control, 0,
 	    "Set to enable TX pause frames. Clear to disable.");
 
 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
 	    &priv->params.rx_pauseframe_control, 0,
 	    "Set to enable RX pause frames. Clear to disable.");
 
 	/* register priority flow control, PFC, SYSCTLs */
 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    OID_AUTO, "tx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
 	    CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_tx_priority_flow_control, "CU",
 	    "Set to enable TX ports flow control frames for priorities 0..7. Clear to disable.");
 
 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    OID_AUTO, "rx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
 	    CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_rx_priority_flow_control, "CU",
 	    "Set to enable RX ports flow control frames for priorities 0..7. Clear to disable.");
 
 	PRIV_LOCK(priv);
 
 	/* range check */
 	priv->params.tx_pauseframe_control =
 	    priv->params.tx_pauseframe_control ? 1 : 0;
 	priv->params.rx_pauseframe_control =
 	    priv->params.rx_pauseframe_control ? 1 : 0;
 
 	/* update firmware */
 	error = mlx5e_set_port_pause_and_pfc(priv);
 	if (error == -EINVAL) {
 		mlx5_en_err(priv->ifp,
 		    "Global pauseframes must be disabled before enabling PFC.\n");
 		priv->params.rx_priority_flow_control = 0;
 		priv->params.tx_priority_flow_control = 0;
 
 		/* update firmware */
 		(void) mlx5e_set_port_pause_and_pfc(priv);
 	}
 	PRIV_UNLOCK(priv);
 }
 
 int
 mlx5e_ul_snd_tag_alloc(struct ifnet *ifp,
     union if_snd_tag_alloc_params *params,
     struct m_snd_tag **ppmt)
 {
 	struct mlx5e_priv *priv;
 	struct mlx5e_channel *pch;
 
 	priv = ifp->if_softc;
 
 	if (unlikely(priv->gone || params->hdr.flowtype == M_HASHTYPE_NONE)) {
 		return (EOPNOTSUPP);
 	} else {
 		/* keep this code synced with mlx5e_select_queue() */
 		u32 ch = priv->params.num_channels;
 #ifdef RSS
 		u32 temp;
 
 		if (rss_hash2bucket(params->hdr.flowid,
 		    params->hdr.flowtype, &temp) == 0)
 			ch = temp % ch;
 		else
 #endif
 			ch = (params->hdr.flowid % 128) % ch;
 
 		/*
 		 * NOTE: The channels array is only freed at detach
 		 * and it safe to return a pointer to the send tag
 		 * inside the channels structure as long as we
 		 * reference the priv.
 		 */
 		pch = priv->channel + ch;
 
 		/* check if send queue is not running */
 		if (unlikely(pch->sq[0].running == 0))
 			return (ENXIO);
 		m_snd_tag_ref(&pch->tag.m_snd_tag);
 		*ppmt = &pch->tag.m_snd_tag;
 		return (0);
 	}
 }
 
 int
 mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
 {
 	struct mlx5e_channel *pch =
 	    container_of(pmt, struct mlx5e_channel, tag.m_snd_tag);
 
 	params->unlimited.max_rate = -1ULL;
 	params->unlimited.queue_level = mlx5e_sq_queue_level(&pch->sq[0]);
 	return (0);
 }
 
 void
 mlx5e_ul_snd_tag_free(struct m_snd_tag *pmt)
 {
 	struct mlx5e_channel *pch =
 	    container_of(pmt, struct mlx5e_channel, tag.m_snd_tag);
 
 	complete(&pch->completion);
 }
 
 static int
 mlx5e_snd_tag_alloc(struct ifnet *ifp,
     union if_snd_tag_alloc_params *params,
     struct m_snd_tag **ppmt)
 {
 
 	switch (params->hdr.type) {
 #ifdef RATELIMIT
 	case IF_SND_TAG_TYPE_RATE_LIMIT:
 		return (mlx5e_rl_snd_tag_alloc(ifp, params, ppmt));
 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
 		return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt));
 #endif
 #endif
 	case IF_SND_TAG_TYPE_UNLIMITED:
 		return (mlx5e_ul_snd_tag_alloc(ifp, params, ppmt));
 #ifdef KERN_TLS
 	case IF_SND_TAG_TYPE_TLS:
 		return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt));
 #endif
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 static int
 mlx5e_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
 {
 	struct mlx5e_snd_tag *tag =
 	    container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
 
 	switch (tag->type) {
 #ifdef RATELIMIT
 	case IF_SND_TAG_TYPE_RATE_LIMIT:
 		return (mlx5e_rl_snd_tag_modify(pmt, params));
 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
 		return (mlx5e_tls_snd_tag_modify(pmt, params));
 #endif
 #endif
 	case IF_SND_TAG_TYPE_UNLIMITED:
 #ifdef KERN_TLS
 	case IF_SND_TAG_TYPE_TLS:
 #endif
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 static int
 mlx5e_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
 {
 	struct mlx5e_snd_tag *tag =
 	    container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
 
 	switch (tag->type) {
 #ifdef RATELIMIT
 	case IF_SND_TAG_TYPE_RATE_LIMIT:
 		return (mlx5e_rl_snd_tag_query(pmt, params));
 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
 		return (mlx5e_tls_snd_tag_query(pmt, params));
 #endif
 #endif
 	case IF_SND_TAG_TYPE_UNLIMITED:
 		return (mlx5e_ul_snd_tag_query(pmt, params));
 #ifdef KERN_TLS
 	case IF_SND_TAG_TYPE_TLS:
 		return (mlx5e_tls_snd_tag_query(pmt, params));
 #endif
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 #ifdef RATELIMIT
 #define NUM_HDWR_RATES_MLX 13
 static const uint64_t adapter_rates_mlx[NUM_HDWR_RATES_MLX] = {
 	135375,			/* 1,083,000 */
 	180500,			/* 1,444,000 */
 	270750,			/* 2,166,000 */
 	361000,			/* 2,888,000 */
 	541500,			/* 4,332,000 */
 	721875,			/* 5,775,000 */
 	1082875,		/* 8,663,000 */
 	1443875,		/* 11,551,000 */
 	2165750,		/* 17,326,000 */
 	2887750,		/* 23,102,000 */
 	4331625,		/* 34,653,000 */
 	5775500,		/* 46,204,000 */
 	8663125			/* 69,305,000 */
 };
 
 static void
 mlx5e_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
 {
 	/*
 	 * This function needs updating by the driver maintainer!
 	 * For the MLX card there are currently (ConectX-4?) 13 
 	 * pre-set rates and others i.e. ConnectX-5, 6, 7??
 	 *
 	 * This will change based on later adapters
 	 * and this code should be updated to look at ifp
 	 * and figure out the specific adapter type
 	 * settings i.e. how many rates as well
 	 * as if they are fixed (as is shown here) or
 	 * if they are dynamic (example chelsio t4). Also if there
 	 * is a maximum number of flows that the adapter
 	 * can handle that too needs to be updated in
 	 * the max_flows field.
 	 */
 	q->rate_table = adapter_rates_mlx;
 	q->flags = RT_IS_FIXED_TABLE;
 	q->max_flows = 0;	/* mlx has no limit */
 	q->number_of_rates = NUM_HDWR_RATES_MLX;
 	q->min_segment_burst = 1;
 }
 #endif
 
 static void
 mlx5e_snd_tag_free(struct m_snd_tag *pmt)
 {
 	struct mlx5e_snd_tag *tag =
 	    container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
 
 	switch (tag->type) {
 #ifdef RATELIMIT
 	case IF_SND_TAG_TYPE_RATE_LIMIT:
 		mlx5e_rl_snd_tag_free(pmt);
 		break;
 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
 		mlx5e_tls_snd_tag_free(pmt);
 		break;
 #endif
 #endif
 	case IF_SND_TAG_TYPE_UNLIMITED:
 		mlx5e_ul_snd_tag_free(pmt);
 		break;
 #ifdef KERN_TLS
 	case IF_SND_TAG_TYPE_TLS:
 		mlx5e_tls_snd_tag_free(pmt);
 		break;
 #endif
 	default:
 		break;
 	}
 }
 
 static void *
 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
 {
 	struct ifnet *ifp;
 	struct mlx5e_priv *priv;
 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
 	u8 connector_type;
 	struct sysctl_oid_list *child;
 	int ncv = mdev->priv.eq_table.num_comp_vectors;
 	char unit[16];
 	struct pfil_head_args pa;
 	int err;
 	int i,j;
 	u32 eth_proto_cap;
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 	bool ext = 0;
 	u32 speeds_num;
 	struct media media_entry = {};
 
 	if (mlx5e_check_required_hca_cap(mdev)) {
 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
 		return (NULL);
 	}
 	/*
 	 * Try to allocate the priv and make room for worst-case
 	 * number of channel structures:
 	 */
 	priv = malloc(sizeof(*priv) +
 	    (sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors),
 	    M_MLX5EN, M_WAITOK | M_ZERO);
 
 	ifp = priv->ifp = if_alloc_dev(IFT_ETHER, mdev->pdev->dev.bsddev);
 	if (ifp == NULL) {
 		mlx5_core_err(mdev, "if_alloc() failed\n");
 		goto err_free_priv;
 	}
 	/* setup all static fields */
 	mlx5e_priv_static_init(priv, mdev->priv.eq_table.num_comp_vectors);
 
 	ifp->if_softc = priv;
 	if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_init = mlx5e_open;
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
+	    IFF_KNOWSEPOCH;
 	ifp->if_ioctl = mlx5e_ioctl;
 	ifp->if_transmit = mlx5e_xmit;
 	ifp->if_qflush = if_qflush;
 #if (__FreeBSD_version >= 1100000)
 	ifp->if_get_counter = mlx5e_get_counter;
 #endif
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	/*
          * Set driver features
          */
 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
 	ifp->if_capabilities |= IFCAP_LRO;
 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
 	ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
 	ifp->if_capabilities |= IFCAP_NOMAP;
 	ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6;
 	ifp->if_capabilities |= IFCAP_TXRTLMT;
 	ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc;
 	ifp->if_snd_tag_free = mlx5e_snd_tag_free;
 	ifp->if_snd_tag_modify = mlx5e_snd_tag_modify;
 	ifp->if_snd_tag_query = mlx5e_snd_tag_query;
 #ifdef RATELIMIT
 	ifp->if_ratelimit_query = mlx5e_ratelimit_query;
 #endif
 	/* set TSO limits so that we don't have to drop TX packets */
 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
 
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TSO)
 		ifp->if_hwassist |= CSUM_TSO;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 
 	/* ifnet sysctl tree */
 	sysctl_ctx_init(&priv->sysctl_ctx);
 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
 	if (priv->sysctl_ifnet == NULL) {
 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 		goto err_free_sysctl;
 	}
 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
 	if (priv->sysctl_ifnet == NULL) {
 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 		goto err_free_sysctl;
 	}
 
 	/* HW sysctl tree */
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
 	if (priv->sysctl_hw == NULL) {
 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 		goto err_free_sysctl;
 	}
 
 	err = mlx5e_build_ifp_priv(mdev, priv, ncv);
 	if (err) {
 		mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
 		goto err_free_sysctl;
 	}
 
 	/* reuse mlx5core's watchdog workqueue */
 	priv->wq = mdev->priv.health.wq_watchdog;
 
 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5_alloc_map_uar failed, %d\n", err);
 		goto err_free_wq;
 	}
 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err);
 		goto err_unmap_free_uar;
 	}
 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
 	if (err) {
 		mlx5_en_err(ifp,
 		    "mlx5_alloc_transport_domain failed, %d\n", err);
 		goto err_dealloc_pd;
 	}
 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5e_create_mkey failed, %d\n", err);
 		goto err_dealloc_transport_domain;
 	}
 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
 
 	/* check if we should generate a random MAC address */
 	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
 	    is_zero_ether_addr(dev_addr)) {
 		random_ether_addr(dev_addr);
 		mlx5_en_err(ifp, "Assigned random MAC address\n");
 	}
 
 	err = mlx5e_rl_init(priv);
 	if (err) {
 		mlx5_en_err(ifp, "mlx5e_rl_init failed, %d\n", err);
 		goto err_create_mkey;
 	}
 
 	err = mlx5e_tls_init(priv);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_tls_init failed\n", __func__);
 		goto err_rl_init;
 	}
 
 	/* set default MTU */
 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
 
 	/* Set default media status */
 	priv->media_status_last = IFM_AVALID;
 	priv->media_active_last = IFM_ETHER | IFM_AUTO |
 	    IFM_ETH_RXPAUSE | IFM_FDX;
 
 	/* setup default pauseframes configuration */
 	mlx5e_setup_pauseframes(priv);
 
 	/* Setup supported medias */
 	//TODO: If we failed to query ptys is it ok to proceed??
 	if (!mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) {
 		ext = MLX5_CAP_PCAM_FEATURE(mdev,
 		    ptys_extended_ethernet);
 		eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
 		    eth_proto_capability);
 		if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type))
 			connector_type = MLX5_GET(ptys_reg, out,
 			    connector_type);
 	} else {
 		eth_proto_cap = 0;
 		mlx5_en_err(ifp, "Query port media capability failed, %d\n", err);
 	}
 
 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
 	    mlx5e_media_change, mlx5e_media_status);
 
 	speeds_num = ext ? MLX5E_EXT_LINK_SPEEDS_NUMBER : MLX5E_LINK_SPEEDS_NUMBER;
 	for (i = 0; i != speeds_num; i++) {
 		for (j = 0; j < MLX5E_LINK_MODES_NUMBER ; ++j) {
 			media_entry = ext ? mlx5e_ext_mode_table[i][j] :
 			    mlx5e_mode_table[i][j];
 			if (media_entry.baudrate == 0)
 				continue;
 			if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
 				ifmedia_add(&priv->media,
 				    media_entry.subtype |
 				    IFM_ETHER, 0, NULL);
 				ifmedia_add(&priv->media,
 				    media_entry.subtype |
 				    IFM_ETHER | IFM_FDX |
 				    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
 			}
 		}
 	}
 
 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
 
 	/* Set autoselect by default */
 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
 
 	DEBUGNET_SET(ifp, mlx5_en);
 
 	ether_ifattach(ifp, dev_addr);
 
 	/* Register for VLAN events */
 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
 
 	/* Link is down by default */
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 
 	mlx5e_enable_async_events(priv);
 
 	mlx5e_add_hw_stats(priv);
 
 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
 	    priv->stats.vport.arg);
 
 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
 	    priv->stats.pport.arg);
 
 	mlx5e_create_ethtool(priv);
 
 	mtx_lock(&priv->async_events_mtx);
 	mlx5e_update_stats(priv);
 	mtx_unlock(&priv->async_events_mtx);
 
 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
 	    &priv->clbr_done, 0,
 	    "RX timestamps calibration state");
 	callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
 	mlx5e_reset_calibration_callout(priv);
 
 	pa.pa_version = PFIL_VERSION;
 	pa.pa_flags = PFIL_IN;
 	pa.pa_type = PFIL_TYPE_ETHERNET;
 	pa.pa_headname = ifp->if_xname;
 	priv->pfil = pfil_head_register(&pa);
 
 	return (priv);
 
 err_rl_init:
 	mlx5e_rl_cleanup(priv);
 
 err_create_mkey:
 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
 
 err_dealloc_transport_domain:
 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
 
 err_dealloc_pd:
 	mlx5_core_dealloc_pd(mdev, priv->pdn);
 
 err_unmap_free_uar:
 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
 
 err_free_wq:
 	flush_workqueue(priv->wq);
 
 err_free_sysctl:
 	sysctl_ctx_free(&priv->sysctl_ctx);
 	if (priv->sysctl_debug)
 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
 	mlx5e_priv_static_destroy(priv, mdev->priv.eq_table.num_comp_vectors);
 	if_free(ifp);
 
 err_free_priv:
 	free(priv, M_MLX5EN);
 	return (NULL);
 }
 
 static void
 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
 {
 	struct mlx5e_priv *priv = vpriv;
 	struct ifnet *ifp = priv->ifp;
 
 	/* don't allow more IOCTLs */
 	priv->gone = 1;
 
 	/* XXX wait a bit to allow IOCTL handlers to complete */
 	pause("W", hz);
 
 #ifdef RATELIMIT
 	/*
 	 * The kernel can have reference(s) via the m_snd_tag's into
 	 * the ratelimit channels, and these must go away before
 	 * detaching:
 	 */
 	while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
 		mlx5_en_err(priv->ifp,
 		    "Waiting for all ratelimit connections to terminate\n");
 		pause("W", hz);
 	}
 #endif
 	/* wait for all unlimited send tags to complete */
 	mlx5e_priv_wait_for_completion(priv, mdev->priv.eq_table.num_comp_vectors);
 
 	/* stop watchdog timer */
 	callout_drain(&priv->watchdog);
 
 	callout_drain(&priv->tstmp_clbr);
 
 	if (priv->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
 	if (priv->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
 
 	/* make sure device gets closed */
 	PRIV_LOCK(priv);
 	mlx5e_close_locked(ifp);
 	PRIV_UNLOCK(priv);
 
 	/* deregister pfil */
 	if (priv->pfil != NULL) {
 		pfil_head_unregister(priv->pfil);
 		priv->pfil = NULL;
 	}
 
 	/* unregister device */
 	ifmedia_removeall(&priv->media);
 	ether_ifdetach(ifp);
 
 	mlx5e_tls_cleanup(priv);
 	mlx5e_rl_cleanup(priv);
 
 	/* destroy all remaining sysctl nodes */
 	sysctl_ctx_free(&priv->stats.vport.ctx);
 	sysctl_ctx_free(&priv->stats.pport.ctx);
 	if (priv->sysctl_debug)
 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
 	sysctl_ctx_free(&priv->sysctl_ctx);
 
 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
 	mlx5e_disable_async_events(priv);
 	flush_workqueue(priv->wq);
 	mlx5e_priv_static_destroy(priv, mdev->priv.eq_table.num_comp_vectors);
 	if_free(ifp);
 	free(priv, M_MLX5EN);
 }
 
 #ifdef DEBUGNET
 static void
 mlx5_en_debugnet_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize)
 {
 	struct mlx5e_priv *priv = if_getsoftc(dev);
 
 	PRIV_LOCK(priv);
 	*nrxr = priv->params.num_channels;
 	*ncl = DEBUGNET_MAX_IN_FLIGHT;
 	*clsize = MLX5E_MAX_RX_BYTES;
 	PRIV_UNLOCK(priv);
 }
 
 static void
 mlx5_en_debugnet_event(struct ifnet *dev, enum debugnet_ev event)
 {
 }
 
 static int
 mlx5_en_debugnet_transmit(struct ifnet *dev, struct mbuf *m)
 {
 	struct mlx5e_priv *priv = if_getsoftc(dev);
 	struct mlx5e_sq *sq;
 	int err;
 
 	if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || (priv->media_status_last & IFM_ACTIVE) == 0)
 		return (ENOENT);
 
 	sq = &priv->channel[0].sq[0];
 
 	if (sq->running == 0) {
 		m_freem(m);
 		return (ENOENT);
 	}
 
 	if (mlx5e_sq_xmit(sq, &m) != 0) {
 		m_freem(m);
 		err = ENOBUFS;
 	} else {
 		err = 0;
 	}
 
 	if (likely(sq->doorbell.d64 != 0)) {
 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
 		sq->doorbell.d64 = 0;
 	}
 	return (err);
 }
 
 static int
 mlx5_en_debugnet_poll(struct ifnet *dev, int count)
 {
 	struct mlx5e_priv *priv = if_getsoftc(dev);
 
 	if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0 ||
 	    (priv->media_status_last & IFM_ACTIVE) == 0)
 		return (ENOENT);
 
 	mlx5_poll_interrupts(priv->mdev);
 
 	return (0);
 }
 #endif /* DEBUGNET */
 
 static void *
 mlx5e_get_ifp(void *vpriv)
 {
 	struct mlx5e_priv *priv = vpriv;
 
 	return (priv->ifp);
 }
 
 static struct mlx5_interface mlx5e_interface = {
 	.add = mlx5e_create_ifp,
 	.remove = mlx5e_destroy_ifp,
 	.event = mlx5e_async_event,
 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
 	.get_dev = mlx5e_get_ifp,
 };
 
 void
 mlx5e_init(void)
 {
 	mlx5_register_interface(&mlx5e_interface);
 }
 
 void
 mlx5e_cleanup(void)
 {
 	mlx5_unregister_interface(&mlx5e_interface);
 }
 
 static void
 mlx5e_show_version(void __unused *arg)
 {
 
 	printf("%s", mlx5e_version);
 }
 SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
 
 module_init_order(mlx5e_init, SI_ORDER_THIRD);
 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
 
 #if (__FreeBSD_version >= 1100000)
 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
 #endif
 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
 MODULE_VERSION(mlx5en, 1);
diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c
index 1e1f98a54132..3bae01aae49d 100644
--- a/sys/dev/ntb/if_ntb/if_ntb.c
+++ b/sys/dev/ntb/if_ntb/if_ntb.c
@@ -1,514 +1,513 @@
 /*-
  * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
  * Copyright (C) 2013 Intel Corporation
  * Copyright (C) 2015 EMC Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
  * two or more systems using a PCI-e links, providing remote memory access.
  *
  * This module contains a driver for simulated Ethernet device, using
  * underlying NTB Transport device.
  *
  * NOTE: Much of the code in this module is shared with Linux. Any patches may
  * be picked up and redistributed in Linux with a dual GPL/BSD license.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/buf_ring.h>
 #include <sys/bus.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/if_var.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 
 #include <machine/bus.h>
 
 #include "../ntb_transport.h"
 
 #define KTR_NTB KTR_SPARE3
 #define NTB_MEDIATYPE		 (IFM_ETHER | IFM_AUTO | IFM_FDX)
 
 #define	NTB_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
 #define	NTB_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
 #define	NTB_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
 				    CSUM_PSEUDO_HDR | \
 				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
 				    CSUM_SCTP_VALID)
 
 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
 
 static unsigned g_if_ntb_num_queues = UINT_MAX;
 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
     &g_if_ntb_num_queues, 0, "Number of queues per interface");
 
 struct ntb_net_queue {
 	struct ntb_net_ctx	*sc;
 	if_t			 ifp;
 	struct ntb_transport_qp *qp;
 	struct buf_ring		*br;
 	struct task		 tx_task;
 	struct taskqueue	*tx_tq;
 	struct mtx		 tx_lock;
 	struct callout		 queue_full;
 };
 
 struct ntb_net_ctx {
 	if_t			 ifp;
 	struct ifmedia		 media;
 	u_char			 eaddr[ETHER_ADDR_LEN];
 	int			 num_queues;
 	struct ntb_net_queue	*queues;
 	int			 mtu;
 };
 
 static int ntb_net_probe(device_t dev);
 static int ntb_net_attach(device_t dev);
 static int ntb_net_detach(device_t dev);
 static void ntb_net_init(void *arg);
 static int ntb_ifmedia_upd(struct ifnet *);
 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
 static int ntb_transmit(if_t ifp, struct mbuf *m);
 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
     void *data, int len);
 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
     void *data, int len);
 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
 static void ntb_handle_tx(void *arg, int pending);
 static void ntb_qp_full(void *arg);
 static void ntb_qflush(if_t ifp);
 static void create_random_local_eui48(u_char *eaddr);
 
 static int
 ntb_net_probe(device_t dev)
 {
 
 	device_set_desc(dev, "NTB Network Interface");
 	return (0);
 }
 
 static int
 ntb_net_attach(device_t dev)
 {
 	struct ntb_net_ctx *sc = device_get_softc(dev);
 	struct ntb_net_queue *q;
 	if_t ifp;
 	struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
 	    ntb_net_tx_handler, ntb_net_event_handler };
 	int i;
 
 	ifp = sc->ifp = if_gethandle(IFT_ETHER);
 	if (ifp == NULL) {
 		printf("ntb: Cannot allocate ifnet structure\n");
 		return (ENOMEM);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setdev(ifp, dev);
 
 	sc->num_queues = min(g_if_ntb_num_queues,
 	    ntb_transport_queue_count(dev));
 	sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 	sc->mtu = INT_MAX;
 	for (i = 0; i < sc->num_queues; i++) {
 		q = &sc->queues[i];
 		q->sc = sc;
 		q->ifp = ifp;
 		q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
 		if (q->qp == NULL)
 			break;
 		sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
 		mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
 		q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
 		TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
 		q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &q->tx_tq);
 		taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
 		    device_get_nameunit(dev), i);
 		callout_init(&q->queue_full, 1);
 	}
 	sc->num_queues = i;
 	device_printf(dev, "%d queue(s)\n", sc->num_queues);
 
 	if_setinitfn(ifp, ntb_net_init);
 	if_setsoftc(ifp, sc);
-	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
-	    IFF_NEEDSEPOCH);
+	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 	if_setioctlfn(ifp, ntb_ioctl);
 	if_settransmitfn(ifp, ntb_transmit);
 	if_setqflushfn(ifp, ntb_qflush);
 	create_random_local_eui48(sc->eaddr);
 	ether_ifattach(ifp, sc->eaddr);
 	if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
 	    IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
 	if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
 	if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
 
 	ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
 	    ntb_ifmedia_sts);
 	ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
 	ifmedia_set(&sc->media, NTB_MEDIATYPE);
 
 	for (i = 0; i < sc->num_queues; i++)
 		ntb_transport_link_up(sc->queues[i].qp);
 	return (0);
 }
 
 static int
 ntb_net_detach(device_t dev)
 {
 	struct ntb_net_ctx *sc = device_get_softc(dev);
 	struct ntb_net_queue *q;
 	int i;
 
 	for (i = 0; i < sc->num_queues; i++)
 		ntb_transport_link_down(sc->queues[i].qp);
 	ether_ifdetach(sc->ifp);
 	if_free(sc->ifp);
 	ifmedia_removeall(&sc->media);
 	for (i = 0; i < sc->num_queues; i++) {
 		q = &sc->queues[i];
 		ntb_transport_free_queue(q->qp);
 		buf_ring_free(q->br, M_DEVBUF);
 		callout_drain(&q->queue_full);
 		taskqueue_drain_all(q->tx_tq);
 		mtx_destroy(&q->tx_lock);
 	}
 	free(sc->queues, M_DEVBUF);
 	return (0);
 }
 
 /* Network device interface */
 
 static void
 ntb_net_init(void *arg)
 {
 	struct ntb_net_ctx *sc = arg;
 	if_t ifp = sc->ifp;
 
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 	if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp));
 	if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
 	    LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static int
 ntb_ioctl(if_t ifp, u_long command, caddr_t data)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 
 	case SIOCSIFMTU:
 	    {
 		if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
 			error = EINVAL;
 			break;
 		}
 
 		if_setmtu(ifp, ifr->ifr_mtu);
 		break;
 	    }
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
 		break;
 
 	case SIOCSIFCAP:
 		if (ifr->ifr_reqcap & IFCAP_RXCSUM)
 			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
 		else
 			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
 		if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
 			if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
 			if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
 		} else {
 			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
 			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
 		}
 		if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
 			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
 		else
 			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
 		if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
 			if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
 			if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
 		} else {
 			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
 			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
 		}
 		break;
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static int
 ntb_ifmedia_upd(struct ifnet *ifp)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ifmedia *ifm = &sc->media;
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	return (0);
 }
 
 static void
 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = NTB_MEDIATYPE;
 	if (ntb_transport_link_query(sc->queues[0].qp))
 		ifmr->ifm_status |= IFM_ACTIVE;
 }
 
 static void
 ntb_transmit_locked(struct ntb_net_queue *q)
 {
 	if_t ifp = q->ifp;
 	struct mbuf *m;
 	int rc, len;
 	short mflags;
 
 	CTR0(KTR_NTB, "TX: ntb_transmit_locked");
 	while ((m = drbr_peek(ifp, q->br)) != NULL) {
 		CTR1(KTR_NTB, "TX: start mbuf %p", m);
 		if_etherbpfmtap(ifp, m);
 		len = m->m_pkthdr.len;
 		mflags = m->m_flags;
 		rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
 		if (rc != 0) {
 			CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
 			if (rc == EAGAIN) {
 				drbr_putback(ifp, q->br, m);
 				callout_reset_sbt(&q->queue_full,
 				    SBT_1MS / 4, SBT_1MS / 4,
 				    ntb_qp_full, q, 0);
 			} else {
 				m_freem(m);
 				drbr_advance(ifp, q->br);
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			}
 			break;
 		}
 		drbr_advance(ifp, q->br);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 		if (mflags & M_MCAST)
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 	}
 }
 
 static int
 ntb_transmit(if_t ifp, struct mbuf *m)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ntb_net_queue *q;
 	int error, i;
 
 	CTR0(KTR_NTB, "TX: ntb_transmit");
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % sc->num_queues;
 	else
 		i = curcpu % sc->num_queues;
 	q = &sc->queues[i];
 
 	error = drbr_enqueue(ifp, q->br, m);
 	if (error)
 		return (error);
 
 	if (mtx_trylock(&q->tx_lock)) {
 		ntb_transmit_locked(q);
 		mtx_unlock(&q->tx_lock);
 	} else
 		taskqueue_enqueue(q->tx_tq, &q->tx_task);
 	return (0);
 }
 
 static void
 ntb_handle_tx(void *arg, int pending)
 {
 	struct ntb_net_queue *q = arg;
 
 	mtx_lock(&q->tx_lock);
 	ntb_transmit_locked(q);
 	mtx_unlock(&q->tx_lock);
 }
 
 static void
 ntb_qp_full(void *arg)
 {
 	struct ntb_net_queue *q = arg;
 
 	CTR0(KTR_NTB, "TX: qp_full callout");
 	if (ntb_transport_tx_free_entry(q->qp) > 0)
 		taskqueue_enqueue(q->tx_tq, &q->tx_task);
 	else
 		callout_schedule_sbt(&q->queue_full,
 		    SBT_1MS / 4, SBT_1MS / 4, 0);
 }
 
 static void
 ntb_qflush(if_t ifp)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ntb_net_queue *q;
 	struct mbuf *m;
 	int i;
 
 	for (i = 0; i < sc->num_queues; i++) {
 		q = &sc->queues[i];
 		mtx_lock(&q->tx_lock);
 		while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
 			m_freem(m);
 		mtx_unlock(&q->tx_lock);
 	}
 	if_qflush(ifp);
 }
 
 /* Network Device Callbacks */
 static void
 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
     int len)
 {
 
 	m_freem(data);
 	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
 }
 
 static void
 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
     int len)
 {
 	struct ntb_net_queue *q = qp_data;
 	struct ntb_net_ctx *sc = q->sc;
 	struct mbuf *m = data;
 	if_t ifp = q->ifp;
 	uint16_t proto;
 
 	CTR1(KTR_NTB, "RX: rx handler (%d)", len);
 	if (len < 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		return;
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 	if (sc->num_queues > 1) {
 		m->m_pkthdr.flowid = q - sc->queues;
 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 	}
 	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 		m_copydata(m, 12, 2, (void *)&proto);
 		switch (ntohs(proto)) {
 		case ETHERTYPE_IP:
 			if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
 				m->m_pkthdr.csum_data = 0xffff;
 				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
 			}
 			break;
 		case ETHERTYPE_IPV6:
 			if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
 				m->m_pkthdr.csum_data = 0xffff;
 				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
 			}
 			break;
 		}
 	}
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_input(ifp, m);
 }
 
 static void
 ntb_net_event_handler(void *data, enum ntb_link_event status)
 {
 	struct ntb_net_queue *q = data;
 
 	if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp));
 	if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP :
 	    LINK_STATE_DOWN);
 }
 
 /* Helper functions */
 /* TODO: This too should really be part of the kernel */
 #define EUI48_MULTICAST			1 << 0
 #define EUI48_LOCALLY_ADMINISTERED	1 << 1
 static void
 create_random_local_eui48(u_char *eaddr)
 {
 	static uint8_t counter = 0;
 
 	eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
 	arc4rand(&eaddr[1], 4, 0);
 	eaddr[5] = counter++;
 }
 
 static device_method_t ntb_net_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,     ntb_net_probe),
 	DEVMETHOD(device_attach,    ntb_net_attach),
 	DEVMETHOD(device_detach,    ntb_net_detach),
 	DEVMETHOD_END
 };
 
 devclass_t ntb_net_devclass;
 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
     sizeof(struct ntb_net_ctx));
 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
     NULL, NULL);
 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
 MODULE_VERSION(if_ntb, 1);
diff --git a/sys/dev/sbni/if_sbni.c b/sys/dev/sbni/if_sbni.c
index 62b86112b26f..267001f7897d 100644
--- a/sys/dev/sbni/if_sbni.c
+++ b/sys/dev/sbni/if_sbni.c
@@ -1,1278 +1,1277 @@
 /*-
  * Copyright (c) 1997-2001 Granch, Ltd. All rights reserved.
  * Author: Denis I.Timofeev <timofeev@granch.ru>
  *
  * Redistributon and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Device driver for Granch SBNI12 leased line adapters
  *
  * Revision 2.0.0  1997/08/06
  * Initial revision by Alexey Zverev
  *
  * Revision 2.0.1 1997/08/11
  * Additional internal statistics support (tx statistics)
  *
  * Revision 2.0.2 1997/11/05
  * if_bpf bug has been fixed
  *
  * Revision 2.0.3 1998/12/20
  * Memory leakage has been eliminated in
  * the sbni_st and sbni_timeout routines.
  *
  * Revision 3.0 2000/08/10 by Yaroslav Polyakov
  * Support for PCI cards. 4.1 modification.
  *
  * Revision 3.1 2000/09/12
  * Removed extra #defines around bpf functions
  *
  * Revision 4.0 2000/11/23 by Denis Timofeev
  * Completely redesigned the buffer management
  *
  * Revision 4.1 2001/01/21
  * Support for PCI Dual cards and new SBNI12D-10, -11 Dual/ISA cards
  *
  * Written with reference to NE2000 driver developed by David Greenman.
  */
  
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/callout.h>
 #include <sys/syslog.h>
 #include <sys/random.h>
 
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/ethernet.h>
 #include <net/bpf.h>
 #include <net/if_types.h>
 
 #include <dev/sbni/if_sbnireg.h>
 #include <dev/sbni/if_sbnivar.h>
 
 static void	sbni_init(void *);
 static void	sbni_init_locked(struct sbni_softc *);
 static void	sbni_start(struct ifnet *);
 static void	sbni_start_locked(struct ifnet *);
 static int	sbni_ioctl(struct ifnet *, u_long, caddr_t);
 static void	sbni_stop(struct sbni_softc *);
 static void	handle_channel(struct sbni_softc *);
 
 static void	card_start(struct sbni_softc *);
 static int	recv_frame(struct sbni_softc *);
 static void	send_frame(struct sbni_softc *);
 static int	upload_data(struct sbni_softc *, u_int, u_int, u_int, u_int32_t);
 static int	skip_tail(struct sbni_softc *, u_int, u_int32_t);
 static void	interpret_ack(struct sbni_softc *, u_int);
 static void	download_data(struct sbni_softc *, u_int32_t *);
 static void	prepare_to_send(struct sbni_softc *);
 static void	drop_xmit_queue(struct sbni_softc *);
 static int	get_rx_buf(struct sbni_softc *);
 static void	indicate_pkt(struct sbni_softc *);
 static void	change_level(struct sbni_softc *);
 static int	check_fhdr(struct sbni_softc *, u_int *, u_int *,
 			   u_int *, u_int *, u_int32_t *); 
 static int	append_frame_to_pkt(struct sbni_softc *, u_int, u_int32_t);
 static void	timeout_change_level(struct sbni_softc *);
 static void	send_frame_header(struct sbni_softc *, u_int32_t *);
 static void	set_initial_values(struct sbni_softc *, struct sbni_flags);
 
 static u_int32_t	calc_crc32(u_int32_t, caddr_t, u_int);
 static callout_func_t	sbni_timeout;
 
 static __inline u_char	sbni_inb(struct sbni_softc *, enum sbni_reg);
 static __inline void	sbni_outb(struct sbni_softc *, enum sbni_reg, u_char);
 static __inline void	sbni_insb(struct sbni_softc *, u_char *, u_int);
 static __inline void	sbni_outsb(struct sbni_softc *, u_char *, u_int);
 
 static u_int32_t crc32tab[];
 
 #ifdef SBNI_DUAL_COMPOUND
 static struct mtx headlist_lock;
 MTX_SYSINIT(headlist_lock, &headlist_lock, "sbni headlist", MTX_DEF);
 static struct sbni_softc *sbni_headlist;
 #endif
 
 /* -------------------------------------------------------------------------- */
 
 static __inline u_char
 sbni_inb(struct sbni_softc *sc, enum sbni_reg reg)
 {
 	return bus_space_read_1(
 	    rman_get_bustag(sc->io_res),
 	    rman_get_bushandle(sc->io_res),
 	    sc->io_off + reg);
 }
 
 static __inline void
 sbni_outb(struct sbni_softc *sc, enum sbni_reg reg, u_char value)
 {
 	bus_space_write_1(
 	    rman_get_bustag(sc->io_res),
 	    rman_get_bushandle(sc->io_res),
 	    sc->io_off + reg, value);
 }
 
 static __inline void
 sbni_insb(struct sbni_softc *sc, u_char *to, u_int len)
 {
 	bus_space_read_multi_1(
 	    rman_get_bustag(sc->io_res),
 	    rman_get_bushandle(sc->io_res),
 	    sc->io_off + DAT, to, len);
 }
 
 static __inline void
 sbni_outsb(struct sbni_softc *sc, u_char *from, u_int len)
 {
 	bus_space_write_multi_1(
 	    rman_get_bustag(sc->io_res),
 	    rman_get_bushandle(sc->io_res),
 	    sc->io_off + DAT, from, len);
 }
 
 
 /*
 	Valid combinations in CSR0 (for probing):
 
 	VALID_DECODER	0000,0011,1011,1010
 
 				    	; 0   ; -
 				TR_REQ	; 1   ; +
 			TR_RDY	    	; 2   ; -
 			TR_RDY	TR_REQ	; 3   ; +
 		BU_EMP		    	; 4   ; +
 		BU_EMP	     	TR_REQ	; 5   ; +
 		BU_EMP	TR_RDY	    	; 6   ; -
 		BU_EMP	TR_RDY	TR_REQ	; 7   ; +
 	RC_RDY 		     		; 8   ; +
 	RC_RDY			TR_REQ	; 9   ; +
 	RC_RDY		TR_RDY		; 10  ; -
 	RC_RDY		TR_RDY	TR_REQ	; 11  ; -
 	RC_RDY	BU_EMP			; 12  ; -
 	RC_RDY	BU_EMP		TR_REQ	; 13  ; -
 	RC_RDY	BU_EMP	TR_RDY		; 14  ; -
 	RC_RDY	BU_EMP	TR_RDY	TR_REQ	; 15  ; -
 */
 
 #define VALID_DECODER	(2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200)
 
 
 int
 sbni_probe(struct sbni_softc *sc)
 {
 	u_char csr0;
 
 	csr0 = sbni_inb(sc, CSR0);
 	if (csr0 != 0xff && csr0 != 0x00) {
 		csr0 &= ~EN_INT;
 		if (csr0 & BU_EMP)
 			csr0 |= EN_INT;
       
 		if (VALID_DECODER & (1 << (csr0 >> 4)))
 			return (0);
 	}
    
 	return (ENXIO);
 }
 
 
 /*
  * Install interface into kernel networking data structures
  */
 int
 sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags)
 {
 	struct ifnet *ifp;
 	u_char csr0;
    
 	ifp = sc->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL)
 		return (ENOMEM);
 	sbni_outb(sc, CSR0, 0);
 	set_initial_values(sc, flags);
 
 	/* Initialize ifnet structure */
 	ifp->if_softc	= sc;
 	if_initname(ifp, "sbni", unit);
 	ifp->if_init	= sbni_init;
 	ifp->if_start	= sbni_start;
 	ifp->if_ioctl	= sbni_ioctl;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 
 	/* report real baud rate */
 	csr0 = sbni_inb(sc, CSR0);
 	ifp->if_baudrate =
 		(csr0 & 0x01 ? 500000 : 2000000) / (1 << flags.rate);
 
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
-	    IFF_NEEDSEPOCH;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 
 	mtx_init(&sc->lock, ifp->if_xname, MTX_NETWORK_LOCK, MTX_DEF);
 	callout_init_mtx(&sc->wch, &sc->lock, 0);
 	ether_ifattach(ifp, sc->enaddr);
 	/* device attach does transition from UNCONFIGURED to IDLE state */
 
 	if_printf(ifp, "speed %ju, rxl ", (uintmax_t)ifp->if_baudrate);
 	if (sc->delta_rxl)
 		printf("auto\n");
 	else
 		printf("%d (fixed)\n", sc->cur_rxl_index);
 	return (0);
 }
 
 void
 sbni_detach(struct sbni_softc *sc)
 {
 
 	SBNI_LOCK(sc);
 	sbni_stop(sc);
 	SBNI_UNLOCK(sc);
 	callout_drain(&sc->wch);
 	ether_ifdetach(sc->ifp);
 	if (sc->irq_handle)
 		bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handle);
 	mtx_destroy(&sc->lock);
 	if_free(sc->ifp);
 }
 
 void
 sbni_release_resources(struct sbni_softc *sc)
 {
 
 	if (sc->irq_res)
 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
 		    sc->irq_res);
 	if (sc->io_res && sc->io_off == 0)
 		bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io_rid,
 		    sc->io_res);
 }
 
 /* -------------------------------------------------------------------------- */
 
 static void
 sbni_init(void *xsc)
 {
 	struct sbni_softc *sc;
 
 	sc = (struct sbni_softc *)xsc;
 	SBNI_LOCK(sc);
 	sbni_init_locked(sc);
 	SBNI_UNLOCK(sc);
 }
 
 static void
 sbni_init_locked(struct sbni_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->ifp;
 
 	/*
 	 * kludge to avoid multiple initialization when more than once
 	 * protocols configured
 	 */
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	card_start(sc);
 	callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	/* attempt to start output */
 	sbni_start_locked(ifp);
 }
 
 static void
 sbni_start(struct ifnet *ifp)
 {
 	struct sbni_softc *sc = ifp->if_softc;
 
 	SBNI_LOCK(sc);
 	sbni_start_locked(ifp);
 	SBNI_UNLOCK(sc);
 }
 
 static void
 sbni_start_locked(struct ifnet *ifp)
 {
 	struct sbni_softc *sc = ifp->if_softc;
 
 	if (sc->tx_frameno == 0)
 		prepare_to_send(sc);
 }
 
 
 static void
 sbni_stop(struct sbni_softc *sc)
 {
 	sbni_outb(sc, CSR0, 0);
 	drop_xmit_queue(sc);
 
 	if (sc->rx_buf_p) {
 		m_freem(sc->rx_buf_p);
 		sc->rx_buf_p = NULL;
 	}
 
 	callout_stop(&sc->wch);
 	sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 }
 
 /* -------------------------------------------------------------------------- */
 
 /* interrupt handler */
 
 /*
  * 	SBNI12D-10, -11/ISA boards within "common interrupt" mode could not
  * be looked as two independent single-channel devices. Every channel seems
  * as Ethernet interface but interrupt handler must be common. Really, first
  * channel ("master") driver only registers the handler. In it's struct softc
  * it has got pointer to "slave" channel's struct softc and handles that's
  * interrupts too.
  *	softc of successfully attached ISA SBNI boards is linked to list.
  * While next board driver is initialized, it scans this list. If one
  * has found softc with same irq and ioaddr different by 4 then it assumes
  * this board to be "master".
  */ 
 
 void
 sbni_intr(void *arg)
 {
 	struct sbni_softc *sc;
 	int repeat;
 
 	sc = (struct sbni_softc *)arg;
 
 	do {
 		repeat = 0;
 		SBNI_LOCK(sc);
 		if (sbni_inb(sc, CSR0) & (RC_RDY | TR_RDY)) {
 			handle_channel(sc);
 			repeat = 1;
 		}
 		SBNI_UNLOCK(sc);
 		if (sc->slave_sc) {
 			/* second channel present */
 			SBNI_LOCK(sc->slave_sc);
 			if (sbni_inb(sc->slave_sc, CSR0) & (RC_RDY | TR_RDY)) {
 				handle_channel(sc->slave_sc);
 				repeat = 1;
 			}
 			SBNI_UNLOCK(sc->slave_sc);
 		}
 	} while (repeat);
 }
 
 
 static void
 handle_channel(struct sbni_softc *sc)
 {
 	int req_ans;
 	u_char csr0;
 
 	sbni_outb(sc, CSR0, (sbni_inb(sc, CSR0) & ~EN_INT) | TR_REQ);
 
 	sc->timer_ticks = CHANGE_LEVEL_START_TICKS;
 	for (;;) {
 		csr0 = sbni_inb(sc, CSR0);
 		if ((csr0 & (RC_RDY | TR_RDY)) == 0)
 			break;
 
 		req_ans = !(sc->state & FL_PREV_OK);
 
 		if (csr0 & RC_RDY)
 			req_ans = recv_frame(sc);
 
 		/*
 		 * TR_RDY always equals 1 here because we have owned the marker,
 		 * and we set TR_REQ when disabled interrupts
 		 */
 		csr0 = sbni_inb(sc, CSR0);
 		if ((csr0 & TR_RDY) == 0 || (csr0 & RC_RDY) != 0)
 			if_printf(sc->ifp, "internal error!\n");
 
 		/* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */
 		if (req_ans || sc->tx_frameno != 0)
 			send_frame(sc);
 		else {
 			/* send the marker without any data */
 			sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) & ~TR_REQ);
 		}
 	}
 
 	sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | EN_INT);
 }
 
 
 /*
  * Routine returns 1 if it need to acknoweledge received frame.
  * Empty frame received without errors won't be acknoweledged.
  */
 
 static int
 recv_frame(struct sbni_softc *sc)
 {
 	u_int32_t crc;
 	u_int framelen, frameno, ack;
 	u_int is_first, frame_ok;
 
 	crc = CRC32_INITIAL;
 	if (check_fhdr(sc, &framelen, &frameno, &ack, &is_first, &crc)) {
 		frame_ok = framelen > 4 ?
 		    upload_data(sc, framelen, frameno, is_first, crc) :
 		    skip_tail(sc, framelen, crc);
 		if (frame_ok)
 			interpret_ack(sc, ack);
 	} else {
 		framelen = 0;
 		frame_ok = 0;
 	}
 
 	sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) ^ CT_ZER);
 	if (frame_ok) {
 		sc->state |= FL_PREV_OK;
 		if (framelen > 4)
 			sc->in_stats.all_rx_number++;
 	} else {
 		sc->state &= ~FL_PREV_OK;
 		change_level(sc);
 		sc->in_stats.all_rx_number++;
 		sc->in_stats.bad_rx_number++;
 	}
 
 	return (!frame_ok || framelen > 4);
 }
 
 
 static void
 send_frame(struct sbni_softc *sc)
 {
 	u_int32_t crc;
 	u_char csr0;
 
 	crc = CRC32_INITIAL;
 	if (sc->state & FL_NEED_RESEND) {
 
 		/* if frame was sended but not ACK'ed - resend it */
 		if (sc->trans_errors) {
 			sc->trans_errors--;
 			if (sc->framelen != 0)
 				sc->in_stats.resend_tx_number++;
 		} else {
 			/* cannot xmit with many attempts */
 			drop_xmit_queue(sc);
 			goto do_send;
 		}
 	} else
 		sc->trans_errors = TR_ERROR_COUNT;
 
 	send_frame_header(sc, &crc);
 	sc->state |= FL_NEED_RESEND;
 	/*
 	 * FL_NEED_RESEND will be cleared after ACK, but if empty
 	 * frame sended then in prepare_to_send next frame
 	 */
 
 
 	if (sc->framelen) {
 		download_data(sc, &crc);
 		sc->in_stats.all_tx_number++;
 		sc->state |= FL_WAIT_ACK;
 	}
 
 	sbni_outsb(sc, (u_char *)&crc, sizeof crc);
 
 do_send:
 	csr0 = sbni_inb(sc, CSR0);
 	sbni_outb(sc, CSR0, csr0 & ~TR_REQ);
 
 	if (sc->tx_frameno) {
 		/* next frame exists - request to send */
 		sbni_outb(sc, CSR0, csr0 | TR_REQ);
 	}
 }
 
 
 static void
 download_data(struct sbni_softc *sc, u_int32_t *crc_p)
 {
 	struct mbuf *m;
 	caddr_t	data_p;
 	u_int data_len, pos, slice;
 
 	data_p = NULL;		/* initialized to avoid warn */
 	pos = 0;
 
 	for (m = sc->tx_buf_p;  m != NULL && pos < sc->pktlen;  m = m->m_next) {
 		if (pos + m->m_len > sc->outpos) {
 			data_len = m->m_len - (sc->outpos - pos);
 			data_p = mtod(m, caddr_t) + (sc->outpos - pos);
 
 			goto do_copy;
 		} else
 			pos += m->m_len;
 	}
 
 	data_len = 0;
 
 do_copy:
 	pos = 0;
 	do {
 		if (data_len) {
 			slice = min(data_len, sc->framelen - pos);
 			sbni_outsb(sc, data_p, slice);
 			*crc_p = calc_crc32(*crc_p, data_p, slice);
 
 			pos += slice;
 			if (data_len -= slice)
 				data_p += slice;
 			else {
 				do {
 					m = m->m_next;
 				} while (m != NULL && m->m_len == 0);
 
 				if (m) {
 					data_len = m->m_len;
 					data_p = mtod(m, caddr_t);
 				}
 			}
 		} else {
 			/* frame too short - zero padding */
 
 			pos = sc->framelen - pos;
 			while (pos--) {
 				sbni_outb(sc, DAT, 0);
 				*crc_p = CRC32(0, *crc_p);
 			}
 			return;
 		}
 	} while (pos < sc->framelen);
 }
 
 
 static int
 upload_data(struct sbni_softc *sc, u_int framelen, u_int frameno,
 	    u_int is_first, u_int32_t crc)
 {
 	int frame_ok;
 
 	if (is_first) {
 		sc->wait_frameno = frameno;
 		sc->inppos = 0;
 	}
 
 	if (sc->wait_frameno == frameno) {
 
 		if (sc->inppos + framelen  <=  ETHER_MAX_LEN) {
 			frame_ok = append_frame_to_pkt(sc, framelen, crc);
 
 		/*
 		 * if CRC is right but framelen incorrect then transmitter
 		 * error was occurred... drop entire packet
 		 */
 		} else if ((frame_ok = skip_tail(sc, framelen, crc)) != 0) {
 			sc->wait_frameno = 0;
 			sc->inppos = 0;
 			if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
 			/* now skip all frames until is_first != 0 */
 		}
 	} else
 		frame_ok = skip_tail(sc, framelen, crc);
 
 	if (is_first && !frame_ok) {
 		/*
 		 * Frame has been violated, but we have stored
 		 * is_first already... Drop entire packet.
 		 */
 		sc->wait_frameno = 0;
 		if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
 	}
 
 	return (frame_ok);
 }
 
 
 static __inline void	send_complete(struct sbni_softc *);
 
 static __inline void
 send_complete(struct sbni_softc *sc)
 {
 	m_freem(sc->tx_buf_p);
 	sc->tx_buf_p = NULL;
 	if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1);
 }
 
 
 static void
 interpret_ack(struct sbni_softc *sc, u_int ack)
 {
 	if (ack == FRAME_SENT_OK) {
 		sc->state &= ~FL_NEED_RESEND;
 
 		if (sc->state & FL_WAIT_ACK) {
 			sc->outpos += sc->framelen;
 
 			if (--sc->tx_frameno) {
 				sc->framelen = min(
 				    sc->maxframe, sc->pktlen - sc->outpos);
 			} else {
 				send_complete(sc);
 				prepare_to_send(sc);
 			}
 		}
 	}
 
 	sc->state &= ~FL_WAIT_ACK;
 }
 
 
 /*
  * Glue received frame with previous fragments of packet.
  * Indicate packet when last frame would be accepted.
  */
 
 static int
 append_frame_to_pkt(struct sbni_softc *sc, u_int framelen, u_int32_t crc)
 {
 	caddr_t p;
 
 	if (sc->inppos + framelen > ETHER_MAX_LEN)
 		return (0);
 
 	if (!sc->rx_buf_p && !get_rx_buf(sc))
 		return (0);
 
 	p = sc->rx_buf_p->m_data + sc->inppos;
 	sbni_insb(sc, p, framelen);
 	if (calc_crc32(crc, p, framelen) != CRC32_REMAINDER)
 		return (0);
 
 	sc->inppos += framelen - 4;
 	if (--sc->wait_frameno == 0) {		/* last frame received */
 		indicate_pkt(sc);
 		if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
 	}
 
 	return (1);
 }
 
 
 /*
  * Prepare to start output on adapter. Current priority must be set to splimp
  * before this routine is called.
  * Transmitter will be actually activated when marker has been accepted.
  */
 
 static void
 prepare_to_send(struct sbni_softc *sc)
 {
 	struct mbuf *m;
 	u_int len;
 
 	/* sc->tx_buf_p == NULL here! */
 	if (sc->tx_buf_p)
 		printf("sbni: memory leak!\n");
 
 	sc->outpos = 0;
 	sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
 
 	for (;;) {
 		IF_DEQUEUE(&sc->ifp->if_snd, sc->tx_buf_p);
 		if (!sc->tx_buf_p) {
 			/* nothing to transmit... */
 			sc->pktlen     = 0;
 			sc->tx_frameno = 0;
 			sc->framelen   = 0;
 			sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 			return;
 		}
 
 		for (len = 0, m = sc->tx_buf_p;  m;  m = m->m_next)
 			len += m->m_len;
 
 		if (len != 0)
 			break;
 		m_freem(sc->tx_buf_p);
 	}
 
 	if (len < SBNI_MIN_LEN)
 		len = SBNI_MIN_LEN;
 
 	sc->pktlen	= len;
 	sc->tx_frameno	= howmany(len, sc->maxframe);
 	sc->framelen	= min(len, sc->maxframe);
 
 	sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | TR_REQ);
 	sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	BPF_MTAP(sc->ifp, sc->tx_buf_p);
 }
 
 
 static void
 drop_xmit_queue(struct sbni_softc *sc)
 {
 	struct mbuf *m;
 
 	if (sc->tx_buf_p) {
 		m_freem(sc->tx_buf_p);
 		sc->tx_buf_p = NULL;
 		if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
 	}
 
 	for (;;) {
 		IF_DEQUEUE(&sc->ifp->if_snd, m);
 		if (m == NULL)
 			break;
 		m_freem(m);
 		if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
 	}
 
 	sc->tx_frameno	= 0;
 	sc->framelen	= 0;
 	sc->outpos	= 0;
 	sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 }
 
 
 static void
 send_frame_header(struct sbni_softc *sc, u_int32_t *crc_p)
 {
 	u_int32_t crc;
 	u_int len_field;
 	u_char value;
 
 	crc = *crc_p;
 	len_field = sc->framelen + 6;	/* CRC + frameno + reserved */
 
 	if (sc->state & FL_NEED_RESEND)
 		len_field |= FRAME_RETRY;	/* non-first attempt... */
 
 	if (sc->outpos == 0)
 		len_field |= FRAME_FIRST;
 
 	len_field |= (sc->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD;
 	sbni_outb(sc, DAT, SBNI_SIG);
 
 	value = (u_char)len_field;
 	sbni_outb(sc, DAT, value);
 	crc = CRC32(value, crc);
 	value = (u_char)(len_field >> 8);
 	sbni_outb(sc, DAT, value);
 	crc = CRC32(value, crc);
 
 	sbni_outb(sc, DAT, sc->tx_frameno);
 	crc = CRC32(sc->tx_frameno, crc);
 	sbni_outb(sc, DAT, 0);
 	crc = CRC32(0, crc);
 	*crc_p = crc;
 }
 
 
 /*
  * if frame tail not needed (incorrect number or received twice),
  * it won't store, but CRC will be calculated
  */
 
 static int
 skip_tail(struct sbni_softc *sc, u_int tail_len, u_int32_t crc)
 {
 	while (tail_len--)
 		crc = CRC32(sbni_inb(sc, DAT), crc);
 
 	return (crc == CRC32_REMAINDER);
 }
 
 
 static int
 check_fhdr(struct sbni_softc *sc, u_int *framelen, u_int *frameno,
 	   u_int *ack, u_int *is_first, u_int32_t *crc_p)
 {
 	u_int32_t crc;
 	u_char value;
 
 	crc = *crc_p;
 	if (sbni_inb(sc, DAT) != SBNI_SIG)
 		return (0);
 
 	value = sbni_inb(sc, DAT);
 	*framelen = (u_int)value;
 	crc = CRC32(value, crc);
 	value = sbni_inb(sc, DAT);
 	*framelen |= ((u_int)value) << 8;
 	crc = CRC32(value, crc);
 
 	*ack = *framelen & FRAME_ACK_MASK;
 	*is_first = (*framelen & FRAME_FIRST) != 0;
 
 	if ((*framelen &= FRAME_LEN_MASK) < 6 || *framelen > SBNI_MAX_FRAME - 3)
 		return (0);
 
 	value = sbni_inb(sc, DAT);
 	*frameno = (u_int)value;
 	crc = CRC32(value, crc);
 
 	crc = CRC32(sbni_inb(sc, DAT), crc);		/* reserved byte */
 	*framelen -= 2;
 
 	*crc_p = crc;
 	return (1);
 }
 
 
 static int
 get_rx_buf(struct sbni_softc *sc)
 {
 	struct mbuf *m;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		if_printf(sc->ifp, "cannot allocate header mbuf\n");
 		return (0);
 	}
 
 	/*
 	 * We always put the received packet in a single buffer -
 	 * either with just an mbuf header or in a cluster attached
 	 * to the header. The +2 is to compensate for the alignment
 	 * fixup below.
 	 */
 	if (ETHER_MAX_LEN + 2 > MHLEN) {
 		/* Attach an mbuf cluster */
 		if (!(MCLGET(m, M_NOWAIT))) {
 			m_freem(m);
 			return (0);
 		}
 	}
 	m->m_pkthdr.len = m->m_len = ETHER_MAX_LEN + 2;
 
 	/*
 	 * The +2 is to longword align the start of the real packet.
 	 * (sizeof ether_header == 14)
 	 * This is important for NFS.
 	 */
 	m_adj(m, 2);
 	sc->rx_buf_p = m;
 	return (1);
 }
 
 
 static void
 indicate_pkt(struct sbni_softc *sc)
 {
 	struct ifnet *ifp = sc->ifp;
 	struct mbuf *m;
 
 	m = sc->rx_buf_p;
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.len   = m->m_len = sc->inppos;
 	sc->rx_buf_p = NULL;
 
 	SBNI_UNLOCK(sc);
 	(*ifp->if_input)(ifp, m);
 	SBNI_LOCK(sc);
 }
 
 /* -------------------------------------------------------------------------- */
 
 /*
  * Routine checks periodically wire activity and regenerates marker if
  * connect was inactive for a long time.
  */
 
 static void
 sbni_timeout(void *xsc)
 {
 	struct sbni_softc *sc;
 	u_char csr0;
 
 	sc = (struct sbni_softc *)xsc;
 	SBNI_ASSERT_LOCKED(sc);
 
 	csr0 = sbni_inb(sc, CSR0);
 	if (csr0 & RC_CHK) {
 
 		if (sc->timer_ticks) {
 			if (csr0 & (RC_RDY | BU_EMP))
 				/* receiving not active */
 				sc->timer_ticks--;
 		} else {
 			sc->in_stats.timeout_number++;
 			if (sc->delta_rxl)
 				timeout_change_level(sc);
 
 			sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES);
 			csr0 = sbni_inb(sc, CSR0);
 		}
 	}
 
 	sbni_outb(sc, CSR0, csr0 | RC_CHK);
 	callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc);
 }
 
 /* -------------------------------------------------------------------------- */
 
 static void
 card_start(struct sbni_softc *sc)
 {
 	sc->timer_ticks = CHANGE_LEVEL_START_TICKS;
 	sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
 	sc->state |= FL_PREV_OK;
 
 	sc->inppos = 0;
 	sc->wait_frameno = 0;
 
 	sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES);
 	sbni_outb(sc, CSR0, EN_INT);
 }
 
 /* -------------------------------------------------------------------------- */
 
 static u_char rxl_tab[] = {
 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
 	0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f
 };
 
 #define SIZE_OF_TIMEOUT_RXL_TAB 4
 static u_char timeout_rxl_tab[] = {
 	0x03, 0x05, 0x08, 0x0b
 };
 
 static void
 set_initial_values(struct sbni_softc *sc, struct sbni_flags flags)
 {
 	if (flags.fixed_rxl) {
 		sc->delta_rxl = 0; /* disable receive level autodetection */
 		sc->cur_rxl_index = flags.rxl;
 	} else {
 		sc->delta_rxl = DEF_RXL_DELTA;
 		sc->cur_rxl_index = DEF_RXL;
 	}
    
 	sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE;
 	sc->csr1.rxl  = rxl_tab[sc->cur_rxl_index];
 	sc->maxframe  = DEFAULT_FRAME_LEN;
    
 	/*
 	 * generate Ethernet address (0x00ff01xxxxxx)
 	 */
 	*(u_int16_t *) sc->enaddr = htons(0x00ff);
 	if (flags.mac_addr) {
 		*(u_int32_t *) (sc->enaddr + 2) =
 		    htonl(flags.mac_addr | 0x01000000);
 	} else {
 		*(u_char *) (sc->enaddr + 2) = 0x01;
 		read_random(sc->enaddr + 3, 3);
 	}
 }
 
 
 #ifdef SBNI_DUAL_COMPOUND
 void
 sbni_add(struct sbni_softc *sc)
 {
 
 	mtx_lock(&headlist_lock);
 	sc->link = sbni_headlist;
 	sbni_headlist = sc;
 	mtx_unlock(&headlist_lock);
 }
 
 struct sbni_softc *
 connect_to_master(struct sbni_softc *sc)
 {
 	struct sbni_softc *p, *p_prev;
 
 	mtx_lock(&headlist_lock);
 	for (p = sbni_headlist, p_prev = NULL; p; p_prev = p, p = p->link) {
 		if (rman_get_start(p->io_res) == rman_get_start(sc->io_res) + 4 ||
 		    rman_get_start(p->io_res) == rman_get_start(sc->io_res) - 4) {
 			p->slave_sc = sc;
 			if (p_prev)
 				p_prev->link = p->link;
 			else
 				sbni_headlist = p->link;
 			mtx_unlock(&headlist_lock);
 			return p;
 		}
 	}
 	mtx_unlock(&headlist_lock);
 
 	return (NULL);
 }
 
 #endif	/* SBNI_DUAL_COMPOUND */
 
 
 /* Receive level auto-selection */
 
 static void
 change_level(struct sbni_softc *sc)
 {
 	if (sc->delta_rxl == 0)		/* do not auto-negotiate RxL */
 		return;
 
 	if (sc->cur_rxl_index == 0)
 		sc->delta_rxl = 1;
 	else if (sc->cur_rxl_index == 15)
 		sc->delta_rxl = -1;
 	else if (sc->cur_rxl_rcvd < sc->prev_rxl_rcvd)
 		sc->delta_rxl = -sc->delta_rxl;
 
 	sc->csr1.rxl = rxl_tab[sc->cur_rxl_index += sc->delta_rxl];
 	sbni_inb(sc, CSR0);	/* it needed for PCI cards */
 	sbni_outb(sc, CSR1, *(u_char *)&sc->csr1);
 
 	sc->prev_rxl_rcvd = sc->cur_rxl_rcvd;
 	sc->cur_rxl_rcvd  = 0;
 }
 
 
 static void
 timeout_change_level(struct sbni_softc *sc)
 {
 	sc->cur_rxl_index = timeout_rxl_tab[sc->timeout_rxl];
 	if (++sc->timeout_rxl >= 4)
 		sc->timeout_rxl = 0;
 
 	sc->csr1.rxl = rxl_tab[sc->cur_rxl_index];
 	sbni_inb(sc, CSR0);
 	sbni_outb(sc, CSR1, *(u_char *)&sc->csr1);
 
 	sc->prev_rxl_rcvd = sc->cur_rxl_rcvd;
 	sc->cur_rxl_rcvd  = 0;
 }
 
 /* -------------------------------------------------------------------------- */
 
 /*
  * Process an ioctl request. This code needs some work - it looks
  *	pretty ugly.
  */
 
 static int
 sbni_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct sbni_softc *sc;
 	struct ifreq *ifr;
 	struct thread *td;
 	struct sbni_in_stats *in_stats;
 	struct sbni_flags flags;
 	int error;
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *)data;
 	td = curthread;
 	error = 0;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		/*
 		 * If the interface is marked up and stopped, then start it.
 		 * If it is marked down and running, then stop it.
 		 */
 		SBNI_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				sbni_init_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				sbni_stop(sc);
 			}
 		}
 		SBNI_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		/*
 		 * Multicast list has changed; set the hardware filter
 		 * accordingly.
 		 */
 		error = 0;
 		/* if (ifr == NULL)
 			error = EAFNOSUPPORT; */
 		break;
 
 		/*
 		 * SBNI specific ioctl
 		 */
 	case SIOCGHWFLAGS:	/* get flags */
 		SBNI_LOCK(sc);
 		bcopy((caddr_t)IF_LLADDR(sc->ifp)+3, (caddr_t) &flags, 3);
 		flags.rxl = sc->cur_rxl_index;
 		flags.rate = sc->csr1.rate;
 		flags.fixed_rxl = (sc->delta_rxl == 0);
 		flags.fixed_rate = 1;
 		SBNI_UNLOCK(sc);
 		bcopy(&flags, &ifr->ifr_ifru, sizeof(flags));
 		break;
 
 	case SIOCGINSTATS:
 		in_stats = malloc(sizeof(struct sbni_in_stats), M_DEVBUF,
 		    M_WAITOK);
 		SBNI_LOCK(sc);
 		bcopy(&sc->in_stats, in_stats, sizeof(struct sbni_in_stats));
 		SBNI_UNLOCK(sc);
 		error = copyout(in_stats, ifr_data_get_ptr(ifr),
 		    sizeof(struct sbni_in_stats));
 		free(in_stats, M_DEVBUF);
 		break;
 
 	case SIOCSHWFLAGS:	/* set flags */
 		/* root only */
 		error = priv_check(td, PRIV_DRIVER);
 		if (error)
 			break;
 		bcopy(&ifr->ifr_ifru, &flags, sizeof(flags));
 		SBNI_LOCK(sc);
 		if (flags.fixed_rxl) {
 			sc->delta_rxl = 0;
 			sc->cur_rxl_index = flags.rxl;
 		} else {
 			sc->delta_rxl = DEF_RXL_DELTA;
 			sc->cur_rxl_index = DEF_RXL;
 		}
 		sc->csr1.rxl = rxl_tab[sc->cur_rxl_index];
 		sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE;
 		if (flags.mac_addr)
 			bcopy((caddr_t) &flags,
 			      (caddr_t) IF_LLADDR(sc->ifp)+3, 3);
 
 		/* Don't be afraid... */
 		sbni_outb(sc, CSR1, *(char*)(&sc->csr1) | PR_RES);
 		SBNI_UNLOCK(sc);
 		break;
 
 	case SIOCRINSTATS:
 		SBNI_LOCK(sc);
 		if (!(error = priv_check(td, PRIV_DRIVER)))	/* root only */
 			bzero(&sc->in_stats, sizeof(struct sbni_in_stats));
 		SBNI_UNLOCK(sc);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 /* -------------------------------------------------------------------------- */
 
 static u_int32_t
 calc_crc32(u_int32_t crc, caddr_t p, u_int len)
 {
 	while (len--)
 		crc = CRC32(*p++, crc);
 
 	return (crc);
 }
 
 static u_int32_t crc32tab[] __aligned(8) = {
 	0xD202EF8D,  0xA505DF1B,  0x3C0C8EA1,  0x4B0BBE37,
 	0xD56F2B94,  0xA2681B02,  0x3B614AB8,  0x4C667A2E,
 	0xDCD967BF,  0xABDE5729,  0x32D70693,  0x45D03605,
 	0xDBB4A3A6,  0xACB39330,  0x35BAC28A,  0x42BDF21C,
 	0xCFB5FFE9,  0xB8B2CF7F,  0x21BB9EC5,  0x56BCAE53,
 	0xC8D83BF0,  0xBFDF0B66,  0x26D65ADC,  0x51D16A4A,
 	0xC16E77DB,  0xB669474D,  0x2F6016F7,  0x58672661,
 	0xC603B3C2,  0xB1048354,  0x280DD2EE,  0x5F0AE278,
 	0xE96CCF45,  0x9E6BFFD3,  0x0762AE69,  0x70659EFF,
 	0xEE010B5C,  0x99063BCA,  0x000F6A70,  0x77085AE6,
 	0xE7B74777,  0x90B077E1,  0x09B9265B,  0x7EBE16CD,
 	0xE0DA836E,  0x97DDB3F8,  0x0ED4E242,  0x79D3D2D4,
 	0xF4DBDF21,  0x83DCEFB7,  0x1AD5BE0D,  0x6DD28E9B,
 	0xF3B61B38,  0x84B12BAE,  0x1DB87A14,  0x6ABF4A82,
 	0xFA005713,  0x8D076785,  0x140E363F,  0x630906A9,
 	0xFD6D930A,  0x8A6AA39C,  0x1363F226,  0x6464C2B0,
 	0xA4DEAE1D,  0xD3D99E8B,  0x4AD0CF31,  0x3DD7FFA7,
 	0xA3B36A04,  0xD4B45A92,  0x4DBD0B28,  0x3ABA3BBE,
 	0xAA05262F,  0xDD0216B9,  0x440B4703,  0x330C7795,
 	0xAD68E236,  0xDA6FD2A0,  0x4366831A,  0x3461B38C,
 	0xB969BE79,  0xCE6E8EEF,  0x5767DF55,  0x2060EFC3,
 	0xBE047A60,  0xC9034AF6,  0x500A1B4C,  0x270D2BDA,
 	0xB7B2364B,  0xC0B506DD,  0x59BC5767,  0x2EBB67F1,
 	0xB0DFF252,  0xC7D8C2C4,  0x5ED1937E,  0x29D6A3E8,
 	0x9FB08ED5,  0xE8B7BE43,  0x71BEEFF9,  0x06B9DF6F,
 	0x98DD4ACC,  0xEFDA7A5A,  0x76D32BE0,  0x01D41B76,
 	0x916B06E7,  0xE66C3671,  0x7F6567CB,  0x0862575D,
 	0x9606C2FE,  0xE101F268,  0x7808A3D2,  0x0F0F9344,
 	0x82079EB1,  0xF500AE27,  0x6C09FF9D,  0x1B0ECF0B,
 	0x856A5AA8,  0xF26D6A3E,  0x6B643B84,  0x1C630B12,
 	0x8CDC1683,  0xFBDB2615,  0x62D277AF,  0x15D54739,
 	0x8BB1D29A,  0xFCB6E20C,  0x65BFB3B6,  0x12B88320,
 	0x3FBA6CAD,  0x48BD5C3B,  0xD1B40D81,  0xA6B33D17,
 	0x38D7A8B4,  0x4FD09822,  0xD6D9C998,  0xA1DEF90E,
 	0x3161E49F,  0x4666D409,  0xDF6F85B3,  0xA868B525,
 	0x360C2086,  0x410B1010,  0xD80241AA,  0xAF05713C,
 	0x220D7CC9,  0x550A4C5F,  0xCC031DE5,  0xBB042D73,
 	0x2560B8D0,  0x52678846,  0xCB6ED9FC,  0xBC69E96A,
 	0x2CD6F4FB,  0x5BD1C46D,  0xC2D895D7,  0xB5DFA541,
 	0x2BBB30E2,  0x5CBC0074,  0xC5B551CE,  0xB2B26158,
 	0x04D44C65,  0x73D37CF3,  0xEADA2D49,  0x9DDD1DDF,
 	0x03B9887C,  0x74BEB8EA,  0xEDB7E950,  0x9AB0D9C6,
 	0x0A0FC457,  0x7D08F4C1,  0xE401A57B,  0x930695ED,
 	0x0D62004E,  0x7A6530D8,  0xE36C6162,  0x946B51F4,
 	0x19635C01,  0x6E646C97,  0xF76D3D2D,  0x806A0DBB,
 	0x1E0E9818,  0x6909A88E,  0xF000F934,  0x8707C9A2,
 	0x17B8D433,  0x60BFE4A5,  0xF9B6B51F,  0x8EB18589,
 	0x10D5102A,  0x67D220BC,  0xFEDB7106,  0x89DC4190,
 	0x49662D3D,  0x3E611DAB,  0xA7684C11,  0xD06F7C87,
 	0x4E0BE924,  0x390CD9B2,  0xA0058808,  0xD702B89E,
 	0x47BDA50F,  0x30BA9599,  0xA9B3C423,  0xDEB4F4B5,
 	0x40D06116,  0x37D75180,  0xAEDE003A,  0xD9D930AC,
 	0x54D13D59,  0x23D60DCF,  0xBADF5C75,  0xCDD86CE3,
 	0x53BCF940,  0x24BBC9D6,  0xBDB2986C,  0xCAB5A8FA,
 	0x5A0AB56B,  0x2D0D85FD,  0xB404D447,  0xC303E4D1,
 	0x5D677172,  0x2A6041E4,  0xB369105E,  0xC46E20C8,
 	0x72080DF5,  0x050F3D63,  0x9C066CD9,  0xEB015C4F,
 	0x7565C9EC,  0x0262F97A,  0x9B6BA8C0,  0xEC6C9856,
 	0x7CD385C7,  0x0BD4B551,  0x92DDE4EB,  0xE5DAD47D,
 	0x7BBE41DE,  0x0CB97148,  0x95B020F2,  0xE2B71064,
 	0x6FBF1D91,  0x18B82D07,  0x81B17CBD,  0xF6B64C2B,
 	0x68D2D988,  0x1FD5E91E,  0x86DCB8A4,  0xF1DB8832,
 	0x616495A3,  0x1663A535,  0x8F6AF48F,  0xF86DC419,
 	0x660951BA,  0x110E612C,  0x88073096,  0xFF000000
 };
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index ceb3ffaaf2b4..b388e43d92a6 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -1,4090 +1,4091 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* Driver for VirtIO network devices. */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/random.h>
 #include <sys/sglist.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/taskqueue.h>
 #include <sys/smp.h>
 #include <machine/smp.h>
 
 #include <vm/uma.h>
 
 #include <net/debugnet.h>
 #include <net/ethernet.h>
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 
 #include <net/bpf.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/virtio/virtio.h>
 #include <dev/virtio/virtqueue.h>
 #include <dev/virtio/network/virtio_net.h>
 #include <dev/virtio/network/if_vtnetvar.h>
 #include "virtio_if.h"
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 static int	vtnet_modevent(module_t, int, void *);
 
 static int	vtnet_probe(device_t);
 static int	vtnet_attach(device_t);
 static int	vtnet_detach(device_t);
 static int	vtnet_suspend(device_t);
 static int	vtnet_resume(device_t);
 static int	vtnet_shutdown(device_t);
 static int	vtnet_attach_completed(device_t);
 static int	vtnet_config_change(device_t);
 
 static void	vtnet_negotiate_features(struct vtnet_softc *);
 static void	vtnet_setup_features(struct vtnet_softc *);
 static int	vtnet_init_rxq(struct vtnet_softc *, int);
 static int	vtnet_init_txq(struct vtnet_softc *, int);
 static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
 static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
 static void	vtnet_free_rx_filters(struct vtnet_softc *);
 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
 static int	vtnet_setup_interface(struct vtnet_softc *);
 static int	vtnet_change_mtu(struct vtnet_softc *, int);
 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
 static uint64_t	vtnet_get_counter(struct ifnet *, ift_counter);
 
 static int	vtnet_rxq_populate(struct vtnet_rxq *);
 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
 static struct mbuf *
 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
 static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
 		    struct mbuf *, int);
 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
 		     struct virtio_net_hdr *);
 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
 static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
 static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
 static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
 		    struct virtio_net_hdr *);
 static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_intr(void *);
 static void	vtnet_rxq_tq_intr(void *, int);
 
 static int	vtnet_txq_below_threshold(struct vtnet_txq *);
 static int	vtnet_txq_notify(struct vtnet_txq *);
 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
 		    int *, int *, int *);
 static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
 		    int, struct virtio_net_hdr *);
 static struct mbuf *
 		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
 		    struct virtio_net_hdr *);
 static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
 		    struct vtnet_tx_header *);
 static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
 #ifdef VTNET_LEGACY_TX
 static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
 static void	vtnet_start(struct ifnet *);
 #else
 static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
 static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
 static void	vtnet_txq_tq_deferred(void *, int);
 #endif
 static void	vtnet_txq_start(struct vtnet_txq *);
 static void	vtnet_txq_tq_intr(void *, int);
 static int	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
 static void	vtnet_tx_start_all(struct vtnet_softc *);
 
 #ifndef VTNET_LEGACY_TX
 static void	vtnet_qflush(struct ifnet *);
 #endif
 
 static int	vtnet_watchdog(struct vtnet_txq *);
 static void	vtnet_accum_stats(struct vtnet_softc *,
 		    struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
 static void	vtnet_tick(void *);
 
 static void	vtnet_start_taskqueues(struct vtnet_softc *);
 static void	vtnet_free_taskqueues(struct vtnet_softc *);
 static void	vtnet_drain_taskqueues(struct vtnet_softc *);
 
 static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_stop_rendezvous(struct vtnet_softc *);
 static void	vtnet_stop(struct vtnet_softc *);
 static int	vtnet_virtio_reinit(struct vtnet_softc *);
 static void	vtnet_init_rx_filters(struct vtnet_softc *);
 static int	vtnet_init_rx_queues(struct vtnet_softc *);
 static int	vtnet_init_tx_queues(struct vtnet_softc *);
 static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
 static int	vtnet_reinit(struct vtnet_softc *);
 static void	vtnet_init_locked(struct vtnet_softc *);
 static void	vtnet_init(void *);
 
 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
 		    struct sglist *, int, int);
 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
 static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
 static int	vtnet_set_promisc(struct vtnet_softc *, int);
 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
 static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
 static void	vtnet_rx_filter(struct vtnet_softc *);
 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
 
 static int	vtnet_is_link_up(struct vtnet_softc *);
 static void	vtnet_update_link_status(struct vtnet_softc *);
 static int	vtnet_ifmedia_upd(struct ifnet *);
 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 static void	vtnet_get_hwaddr(struct vtnet_softc *);
 static void	vtnet_set_hwaddr(struct vtnet_softc *);
 static void	vtnet_vlan_tag_remove(struct mbuf *);
 static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
 static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
 
 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_rxq *);
 static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_txq *);
 static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
 static void	vtnet_setup_sysctl(struct vtnet_softc *);
 
 static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
 static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
 static int	vtnet_txq_enable_intr(struct vtnet_txq *);
 static void	vtnet_txq_disable_intr(struct vtnet_txq *);
 static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
 static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
 static void	vtnet_enable_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_interrupts(struct vtnet_softc *);
 
 static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
 
 DEBUGNET_DEFINE(vtnet);
 
 /* Tunables. */
 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters");
 static int vtnet_csum_disable = 0;
 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
 static int vtnet_tso_disable = 0;
 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
     0, "Disables TCP Segmentation Offload");
 static int vtnet_lro_disable = 0;
 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
     0, "Disables TCP Large Receive Offload");
 static int vtnet_mq_disable = 0;
 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
     0, "Disables Multi Queue support");
 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
     &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
 static int vtnet_rx_process_limit = 512;
 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
     &vtnet_rx_process_limit, 0,
     "Limits the number RX segments processed in a single pass");
 
 static uma_zone_t vtnet_tx_header_zone;
 
 static struct virtio_feature_desc vtnet_feature_desc[] = {
 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
 	{ VIRTIO_NET_F_STATUS,		"Status"	},
 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
 	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
 
 	{ 0, NULL }
 };
 
 static device_method_t vtnet_methods[] = {
 	/* Device methods. */
 	DEVMETHOD(device_probe,			vtnet_probe),
 	DEVMETHOD(device_attach,		vtnet_attach),
 	DEVMETHOD(device_detach,		vtnet_detach),
 	DEVMETHOD(device_suspend,		vtnet_suspend),
 	DEVMETHOD(device_resume,		vtnet_resume),
 	DEVMETHOD(device_shutdown,		vtnet_shutdown),
 
 	/* VirtIO methods. */
 	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
 	DEVMETHOD(virtio_config_change,		vtnet_config_change),
 
 	DEVMETHOD_END
 };
 
 #ifdef DEV_NETMAP
 #include <dev/netmap/if_vtnet_netmap.h>
 #endif /* DEV_NETMAP */
 
 static driver_t vtnet_driver = {
 	"vtnet",
 	vtnet_methods,
 	sizeof(struct vtnet_softc)
 };
 static devclass_t vtnet_devclass;
 
 DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass,
     vtnet_modevent, 0);
 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
     vtnet_modevent, 0);
 MODULE_VERSION(vtnet, 1);
 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 VIRTIO_SIMPLE_PNPTABLE(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
 VIRTIO_SIMPLE_PNPINFO(virtio_mmio, vtnet);
 VIRTIO_SIMPLE_PNPINFO(virtio_pci, vtnet);
 
 static int
 vtnet_modevent(module_t mod, int type, void *unused)
 {
 	int error = 0;
 	static int loaded = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (loaded++ == 0) {
 			vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
 				sizeof(struct vtnet_tx_header),
 				NULL, NULL, NULL, NULL, 0, 0);
 #ifdef DEBUGNET
 			/*
 			 * We need to allocate from this zone in the transmit path, so ensure
 			 * that we have at least one item per header available.
 			 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
 			 * buckets
 			 */
 			uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
 			uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
 #endif
 		}
 		break;
 	case MOD_QUIESCE:
 		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
 			error = EBUSY;
 		break;
 	case MOD_UNLOAD:
 		if (--loaded == 0) {
 			uma_zdestroy(vtnet_tx_header_zone);
 			vtnet_tx_header_zone = NULL;
 		}
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 vtnet_probe(device_t dev)
 {
 	return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
 }
 
 static int
 vtnet_attach(device_t dev)
 {
 	struct vtnet_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->vtnet_dev = dev;
 
 	/* Register our feature descriptions. */
 	virtio_set_feature_desc(dev, vtnet_feature_desc);
 
 	VTNET_CORE_LOCK_INIT(sc);
 	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
 
 	vtnet_setup_sysctl(sc);
 	vtnet_setup_features(sc);
 
 	error = vtnet_alloc_rx_filters(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate Rx filters\n");
 		goto fail;
 	}
 
 	error = vtnet_alloc_rxtx_queues(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate queues\n");
 		goto fail;
 	}
 
 	error = vtnet_alloc_virtqueues(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate virtqueues\n");
 		goto fail;
 	}
 
 	error = vtnet_setup_interface(sc);
 	if (error) {
 		device_printf(dev, "cannot setup interface\n");
 		goto fail;
 	}
 
 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
 	if (error) {
 		device_printf(dev, "cannot setup virtqueue interrupts\n");
 		/* BMV: This will crash if during boot! */
 		ether_ifdetach(sc->vtnet_ifp);
 		goto fail;
 	}
 
 #ifdef DEV_NETMAP
 	vtnet_netmap_attach(sc);
 #endif /* DEV_NETMAP */
 
 	vtnet_start_taskqueues(sc);
 
 fail:
 	if (error)
 		vtnet_detach(dev);
 
 	return (error);
 }
 
 static int
 vtnet_detach(device_t dev)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
 	if (device_is_attached(dev)) {
 		VTNET_CORE_LOCK(sc);
 		vtnet_stop(sc);
 		VTNET_CORE_UNLOCK(sc);
 
 		callout_drain(&sc->vtnet_tick_ch);
 		vtnet_drain_taskqueues(sc);
 
 		ether_ifdetach(ifp);
 	}
 
 #ifdef DEV_NETMAP
 	netmap_detach(ifp);
 #endif /* DEV_NETMAP */
 
 	vtnet_free_taskqueues(sc);
 
 	if (sc->vtnet_vlan_attach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
 		sc->vtnet_vlan_attach = NULL;
 	}
 	if (sc->vtnet_vlan_detach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
 		sc->vtnet_vlan_detach = NULL;
 	}
 
 	ifmedia_removeall(&sc->vtnet_media);
 
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vtnet_ifp = NULL;
 	}
 
 	vtnet_free_rxtx_queues(sc);
 	vtnet_free_rx_filters(sc);
 
 	if (sc->vtnet_ctrl_vq != NULL)
 		vtnet_free_ctrl_vq(sc);
 
 	VTNET_CORE_LOCK_DESTROY(sc);
 
 	return (0);
 }
 
 static int
 vtnet_suspend(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_stop(sc);
 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_resume(device_t dev)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK(sc);
 	if (ifp->if_flags & IFF_UP)
 		vtnet_init_locked(sc);
 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_shutdown(device_t dev)
 {
 
 	/*
 	 * Suspend already does all of what we need to
 	 * do here; we just never expect to be resumed.
 	 */
 	return (vtnet_suspend(dev));
 }
 
 static int
 vtnet_attach_completed(device_t dev)
 {
 
 	vtnet_attach_disable_promisc(device_get_softc(dev));
 
 	return (0);
 }
 
 static int
 vtnet_config_change(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_update_link_status(sc);
 	if (sc->vtnet_link_active != 0)
 		vtnet_tx_start_all(sc);
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static void
 vtnet_negotiate_features(struct vtnet_softc *sc)
 {
 	device_t dev;
 	uint64_t mask, features;
 
 	dev = sc->vtnet_dev;
 	mask = 0;
 
 	/*
 	 * TSO and LRO are only available when their corresponding checksum
 	 * offload feature is also negotiated.
 	 */
 	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
 		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
 	}
 	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
 		mask |= VTNET_TSO_FEATURES;
 	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
 		mask |= VTNET_LRO_FEATURES;
 #ifndef VTNET_LEGACY_TX
 	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
 		mask |= VIRTIO_NET_F_MQ;
 #else
 	mask |= VIRTIO_NET_F_MQ;
 #endif
 
 	features = VTNET_FEATURES & ~mask;
 	sc->vtnet_features = virtio_negotiate_features(dev, features);
 
 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
 	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
 		/*
 		 * LRO without mergeable buffers requires special care. This
 		 * is not ideal because every receive buffer must be large
 		 * enough to hold the maximum TCP packet, the Ethernet header,
 		 * and the header. This requires up to 34 descriptors with
 		 * MCLBYTES clusters. If we do not have indirect descriptors,
 		 * LRO is disabled since the virtqueue will not contain very
 		 * many receive buffers.
 		 */
 		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
 			device_printf(dev,
 			    "LRO disabled due to both mergeable buffers and "
 			    "indirect descriptors not negotiated\n");
 
 			features &= ~VTNET_LRO_FEATURES;
 			sc->vtnet_features =
 			    virtio_negotiate_features(dev, features);
 		} else
 			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
 	}
 }
 
 static void
 vtnet_setup_features(struct vtnet_softc *sc)
 {
 	device_t dev;
 
 	dev = sc->vtnet_dev;
 
 	vtnet_negotiate_features(sc);
 
 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
 		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
 	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
 		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
 		/* This feature should always be negotiated. */
 		sc->vtnet_flags |= VTNET_FLAG_MAC;
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	} else
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
 
 	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
 		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
 		sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
 	else
 		sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
 		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
 	else
 		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
 	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
 		sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
 		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
 	} else
 		sc->vtnet_max_vq_pairs = 1;
 
 	if (sc->vtnet_max_vq_pairs > 1) {
 		/*
 		 * Limit the maximum number of queue pairs to the lower of
 		 * the number of CPUs and the configured maximum.
 		 * The actual number of queues that get used may be less.
 		 */
 		int max;
 
 		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
 		if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
 			if (max > mp_ncpus)
 				max = mp_ncpus;
 			if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
 				max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
 			if (max > 1) {
 				sc->vtnet_requested_vq_pairs = max;
 				sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
 			}
 		}
 	}
 }
 
 static int
 vtnet_init_rxq(struct vtnet_softc *sc, int id)
 {
 	struct vtnet_rxq *rxq;
 
 	rxq = &sc->vtnet_rxqs[id];
 
 	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
 	    device_get_nameunit(sc->vtnet_dev), id);
 	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
 
 	rxq->vtnrx_sc = sc;
 	rxq->vtnrx_id = id;
 
 	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
 	if (rxq->vtnrx_sg == NULL)
 		return (ENOMEM);
 
 	NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
 	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
 	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
 
 	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
 }
 
 static int
 vtnet_init_txq(struct vtnet_softc *sc, int id)
 {
 	struct vtnet_txq *txq;
 
 	txq = &sc->vtnet_txqs[id];
 
 	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
 	    device_get_nameunit(sc->vtnet_dev), id);
 	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
 
 	txq->vtntx_sc = sc;
 	txq->vtntx_id = id;
 
 	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
 	if (txq->vtntx_sg == NULL)
 		return (ENOMEM);
 
 #ifndef VTNET_LEGACY_TX
 	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
 	    M_NOWAIT, &txq->vtntx_mtx);
 	if (txq->vtntx_br == NULL)
 		return (ENOMEM);
 
 	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
 #endif
 	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
 	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
 	    taskqueue_thread_enqueue, &txq->vtntx_tq);
 	if (txq->vtntx_tq == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 static int
 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
 {
 	int i, npairs, error;
 
 	npairs = sc->vtnet_max_vq_pairs;
 
 	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
 	    M_NOWAIT | M_ZERO);
 	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
 	    M_NOWAIT | M_ZERO);
 	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < npairs; i++) {
 		error = vtnet_init_rxq(sc, i);
 		if (error)
 			return (error);
 		error = vtnet_init_txq(sc, i);
 		if (error)
 			return (error);
 	}
 
 	vtnet_setup_queue_sysctl(sc);
 
 	return (0);
 }
 
 static void
 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
 {
 
 	rxq->vtnrx_sc = NULL;
 	rxq->vtnrx_id = -1;
 
 	if (rxq->vtnrx_sg != NULL) {
 		sglist_free(rxq->vtnrx_sg);
 		rxq->vtnrx_sg = NULL;
 	}
 
 	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
 		mtx_destroy(&rxq->vtnrx_mtx);
 }
 
 static void
 vtnet_destroy_txq(struct vtnet_txq *txq)
 {
 
 	txq->vtntx_sc = NULL;
 	txq->vtntx_id = -1;
 
 	if (txq->vtntx_sg != NULL) {
 		sglist_free(txq->vtntx_sg);
 		txq->vtntx_sg = NULL;
 	}
 
 #ifndef VTNET_LEGACY_TX
 	if (txq->vtntx_br != NULL) {
 		buf_ring_free(txq->vtntx_br, M_DEVBUF);
 		txq->vtntx_br = NULL;
 	}
 #endif
 
 	if (mtx_initialized(&txq->vtntx_mtx) != 0)
 		mtx_destroy(&txq->vtntx_mtx);
 }
 
 static void
 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
 {
 	int i;
 
 	if (sc->vtnet_rxqs != NULL) {
 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
 		free(sc->vtnet_rxqs, M_DEVBUF);
 		sc->vtnet_rxqs = NULL;
 	}
 
 	if (sc->vtnet_txqs != NULL) {
 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
 		free(sc->vtnet_txqs, M_DEVBUF);
 		sc->vtnet_txqs = NULL;
 	}
 }
 
 static int
 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
 {
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
 		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
 		    M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (sc->vtnet_mac_filter == NULL)
 			return (ENOMEM);
 	}
 
 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
 		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
 		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (sc->vtnet_vlan_filter == NULL)
 			return (ENOMEM);
 	}
 
 	return (0);
 }
 
 static void
 vtnet_free_rx_filters(struct vtnet_softc *sc)
 {
 
 	if (sc->vtnet_mac_filter != NULL) {
 		free(sc->vtnet_mac_filter, M_DEVBUF);
 		sc->vtnet_mac_filter = NULL;
 	}
 
 	if (sc->vtnet_vlan_filter != NULL) {
 		free(sc->vtnet_vlan_filter, M_DEVBUF);
 		sc->vtnet_vlan_filter = NULL;
 	}
 }
 
 static int
 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vq_alloc_info *info;
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i, idx, flags, nvqs, error;
 
 	dev = sc->vtnet_dev;
 	flags = 0;
 
 	nvqs = sc->vtnet_max_vq_pairs * 2;
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 		nvqs++;
 
 	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
 	if (info == NULL)
 		return (ENOMEM);
 
 	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
 		rxq = &sc->vtnet_rxqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
 		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
 		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
 
 		txq = &sc->vtnet_txqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
 		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
 		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
 	}
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
 		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
 	}
 
 	/*
 	 * Enable interrupt binding if this is multiqueue. This only matters
 	 * when per-vq MSIX is available.
 	 */
 	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
 		flags |= 0;
 
 	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
 	free(info, M_TEMP);
 
 	return (error);
 }
 
 static int
 vtnet_setup_interface(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct pfil_head_args pa;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 
 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "cannot allocate ifnet structure\n");
 		return (ENOSPC);
 	}
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_baudrate = IF_Gbps(10);	/* Approx. */
 	ifp->if_softc = sc;
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
+	    IFF_KNOWSEPOCH;
 	ifp->if_init = vtnet_init;
 	ifp->if_ioctl = vtnet_ioctl;
 	ifp->if_get_counter = vtnet_get_counter;
 #ifndef VTNET_LEGACY_TX
 	ifp->if_transmit = vtnet_txq_mq_start;
 	ifp->if_qflush = vtnet_qflush;
 #else
 	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
 	ifp->if_start = vtnet_start;
 	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
 	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 #endif
 
 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
 	    vtnet_ifmedia_sts);
 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
 
 	/* Read (or generate) the MAC address for the adapter. */
 	vtnet_get_hwaddr(sc);
 
 	ether_ifattach(ifp, sc->vtnet_hwaddr);
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
 		ifp->if_capabilities |= IFCAP_LINKSTATE;
 
 	/* Tell the upper layer(s) we support long frames. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
 		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
 			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
 		} else {
 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
 				ifp->if_capabilities |= IFCAP_TSO4;
 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
 				ifp->if_capabilities |= IFCAP_TSO6;
 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
 				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
 		}
 
 		if (ifp->if_capabilities & IFCAP_TSO)
 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
 		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
 			ifp->if_capabilities |= IFCAP_LRO;
 	}
 
 	if (ifp->if_capabilities & IFCAP_HWCSUM) {
 		/*
 		 * VirtIO does not support VLAN tagging, but we can fake
 		 * it by inserting and removing the 802.1Q header during
 		 * transmit and receive. We are then able to do checksum
 		 * offloading of VLAN frames.
 		 */
 		ifp->if_capabilities |=
 		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
 	}
 
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/*
 	 * Capabilities after here are not enabled by default.
 	 */
 
 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
 
 	vtnet_set_rx_process_limit(sc);
 	vtnet_set_tx_intr_threshold(sc);
 
 	DEBUGNET_SET(ifp, vtnet);
 
 	pa.pa_version = PFIL_VERSION;
 	pa.pa_flags = PFIL_IN;
 	pa.pa_type = PFIL_TYPE_ETHERNET;
 	pa.pa_headname = ifp->if_xname;
 	sc->vtnet_pfil = pfil_head_register(&pa);
 
 	return (0);
 }
 
 static int
 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
 {
 	struct ifnet *ifp;
 	int frame_size, clsize;
 
 	ifp = sc->vtnet_ifp;
 
 	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
 		return (EINVAL);
 
 	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
 	    new_mtu;
 
 	/*
 	 * Based on the new MTU (and hence frame size) determine which
 	 * cluster size is most appropriate for the receive queues.
 	 */
 	if (frame_size <= MCLBYTES) {
 		clsize = MCLBYTES;
 	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
 		/* Avoid going past 9K jumbos. */
 		if (frame_size > MJUM9BYTES)
 			return (EINVAL);
 		clsize = MJUM9BYTES;
 	} else
 		clsize = MJUMPAGESIZE;
 
 	ifp->if_mtu = new_mtu;
 	sc->vtnet_rx_new_clsize = clsize;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vtnet_init_locked(sc);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct vtnet_softc *sc;
 	struct ifreq *ifr;
 	int reinit, mask, error;
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifp->if_mtu != ifr->ifr_mtu) {
 			VTNET_CORE_LOCK(sc);
 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
 			VTNET_CORE_UNLOCK(sc);
 		}
 		break;
 
 	case SIOCSIFFLAGS:
 		VTNET_CORE_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) == 0) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				vtnet_stop(sc);
 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
 			    (IFF_PROMISC | IFF_ALLMULTI)) {
 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
 					vtnet_rx_filter(sc);
 				else {
 					ifp->if_flags |= IFF_PROMISC;
 					if ((ifp->if_flags ^ sc->vtnet_if_flags)
 					    & IFF_ALLMULTI)
 						error = ENOTSUP;
 				}
 			}
 		} else
 			vtnet_init_locked(sc);
 
 		if (error == 0)
 			sc->vtnet_if_flags = ifp->if_flags;
 		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
 			break;
 		VTNET_CORE_LOCK(sc);
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			vtnet_rx_filter_mac(sc);
 		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
 		VTNET_CORE_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
 		if (mask & IFCAP_TXCSUM)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 		if (mask & IFCAP_TXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
 		if (mask & IFCAP_TSO6)
 			ifp->if_capenable ^= IFCAP_TSO6;
 
 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
 		    IFCAP_VLAN_HWFILTER)) {
 			/* These Rx features require us to renegotiate. */
 			reinit = 1;
 
 			if (mask & IFCAP_RXCSUM)
 				ifp->if_capenable ^= IFCAP_RXCSUM;
 			if (mask & IFCAP_RXCSUM_IPV6)
 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 			if (mask & IFCAP_LRO)
 				ifp->if_capenable ^= IFCAP_LRO;
 			if (mask & IFCAP_VLAN_HWFILTER)
 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 		} else
 			reinit = 0;
 
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 
 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			vtnet_init_locked(sc);
 		}
 
 		VTNET_CORE_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_populate(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	int nbufs, error;
 
 #ifdef DEV_NETMAP
 	error = vtnet_netmap_rxq_populate(rxq);
 	if (error >= 0)
 		return (error);
 #endif  /* DEV_NETMAP */
 
 	vq = rxq->vtnrx_vq;
 	error = ENOSPC;
 
 	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
 		error = vtnet_rxq_new_buf(rxq);
 		if (error)
 			break;
 	}
 
 	if (nbufs > 0) {
 		virtqueue_notify(vq);
 		/*
 		 * EMSGSIZE signifies the virtqueue did not have enough
 		 * entries available to hold the last mbuf. This is not
 		 * an error.
 		 */
 		if (error == EMSGSIZE)
 			error = 0;
 	}
 
 	return (error);
 }
 
 static void
 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	struct mbuf *m;
 	int last;
 #ifdef DEV_NETMAP
 	int netmap_bufs = vtnet_netmap_queue_on(rxq->vtnrx_sc, NR_RX,
 						rxq->vtnrx_id);
 #else  /* !DEV_NETMAP */
 	int netmap_bufs = 0;
 #endif /* !DEV_NETMAP */
 
 	vq = rxq->vtnrx_vq;
 	last = 0;
 
 	while ((m = virtqueue_drain(vq, &last)) != NULL) {
 		if (!netmap_bufs)
 			m_freem(m);
 	}
 
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
 }
 
 static struct mbuf *
 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 {
 	struct mbuf *m_head, *m_tail, *m;
 	int i, clsize;
 
 	clsize = sc->vtnet_rx_clsize;
 
 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
 	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
 
 	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
 	if (m_head == NULL)
 		goto fail;
 
 	m_head->m_len = clsize;
 	m_tail = m_head;
 
 	/* Allocate the rest of the chain. */
 	for (i = 1; i < nbufs; i++) {
 		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
 		if (m == NULL)
 			goto fail;
 
 		m->m_len = clsize;
 		m_tail->m_next = m;
 		m_tail = m;
 	}
 
 	if (m_tailp != NULL)
 		*m_tailp = m_tail;
 
 	return (m_head);
 
 fail:
 	sc->vtnet_stats.mbuf_alloc_failed++;
 	m_freem(m_head);
 
 	return (NULL);
 }
 
 /*
  * Slow path for when LRO without mergeable buffers is negotiated.
  */
 static int
 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
     int len0)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m, *m_prev;
 	struct mbuf *m_new, *m_tail;
 	int len, clsize, nreplace, error;
 
 	sc = rxq->vtnrx_sc;
 	clsize = sc->vtnet_rx_clsize;
 
 	m_prev = NULL;
 	m_tail = NULL;
 	nreplace = 0;
 
 	m = m0;
 	len = len0;
 
 	/*
 	 * Since these mbuf chains are so large, we avoid allocating an
 	 * entire replacement chain if possible. When the received frame
 	 * did not consume the entire chain, the unused mbufs are moved
 	 * to the replacement chain.
 	 */
 	while (len > 0) {
 		/*
 		 * Something is seriously wrong if we received a frame
 		 * larger than the chain. Drop it.
 		 */
 		if (m == NULL) {
 			sc->vtnet_stats.rx_frame_too_large++;
 			return (EMSGSIZE);
 		}
 
 		/* We always allocate the same cluster size. */
 		KASSERT(m->m_len == clsize,
 		    ("%s: mbuf size %d is not the cluster size %d",
 		    __func__, m->m_len, clsize));
 
 		m->m_len = MIN(m->m_len, len);
 		len -= m->m_len;
 
 		m_prev = m;
 		m = m->m_next;
 		nreplace++;
 	}
 
 	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
 	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
 	    sc->vtnet_rx_nmbufs));
 
 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
 	if (m_new == NULL) {
 		m_prev->m_len = clsize;
 		return (ENOBUFS);
 	}
 
 	/*
 	 * Move any unused mbufs from the received chain onto the end
 	 * of the new chain.
 	 */
 	if (m_prev->m_next != NULL) {
 		m_tail->m_next = m_prev->m_next;
 		m_prev->m_next = NULL;
 	}
 
 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
 	if (error) {
 		/*
 		 * BAD! We could not enqueue the replacement mbuf chain. We
 		 * must restore the m0 chain to the original state if it was
 		 * modified so we can subsequently discard it.
 		 *
 		 * NOTE: The replacement is suppose to be an identical copy
 		 * to the one just dequeued so this is an unexpected error.
 		 */
 		sc->vtnet_stats.rx_enq_replacement_failed++;
 
 		if (m_tail->m_next != NULL) {
 			m_prev->m_next = m_tail->m_next;
 			m_tail->m_next = NULL;
 		}
 
 		m_prev->m_len = clsize;
 		m_freem(m_new);
 	}
 
 	return (error);
 }
 
 static int
 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m_new;
 	int error;
 
 	sc = rxq->vtnrx_sc;
 
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
 
 	if (m->m_next == NULL) {
 		/* Fast-path for the common case of just one mbuf. */
 		if (m->m_len < len)
 			return (EINVAL);
 
 		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
 		if (m_new == NULL)
 			return (ENOBUFS);
 
 		error = vtnet_rxq_enqueue_buf(rxq, m_new);
 		if (error) {
 			/*
 			 * The new mbuf is suppose to be an identical
 			 * copy of the one just dequeued so this is an
 			 * unexpected error.
 			 */
 			m_freem(m_new);
 			sc->vtnet_stats.rx_enq_replacement_failed++;
 		} else
 			m->m_len = len;
 	} else
 		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct sglist *sg;
 	struct vtnet_rx_header *rxhdr;
 	uint8_t *mdata;
 	int offset, error;
 
 	sc = rxq->vtnrx_sc;
 	sg = rxq->vtnrx_sg;
 	mdata = mtod(m, uint8_t *);
 
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
 	KASSERT(m->m_len == sc->vtnet_rx_clsize,
 	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
 	     sc->vtnet_rx_clsize));
 
 	sglist_reset(sg);
 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
 		rxhdr = (struct vtnet_rx_header *) mdata;
 		sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
 		offset = sizeof(struct vtnet_rx_header);
 	} else
 		offset = 0;
 
 	sglist_append(sg, mdata + offset, m->m_len - offset);
 	if (m->m_next != NULL) {
 		error = sglist_append_mbuf(sg, m->m_next);
 		MPASS(error == 0);
 	}
 
 	error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m;
 	int error;
 
 	sc = rxq->vtnrx_sc;
 
 	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	error = vtnet_rxq_enqueue_buf(rxq, m);
 	if (error)
 		m_freem(m);
 
 	return (error);
 }
 
 /*
  * Use the checksum offset in the VirtIO header to set the
  * correct CSUM_* flags.
  */
 static int
 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 #if defined(INET) || defined(INET6)
 	int offset = hdr->csum_start + hdr->csum_offset;
 #endif
 
 	sc = rxq->vtnrx_sc;
 
 	/* Only do a basic sanity check on the offset. */
 	switch (eth_type) {
 #if defined(INET)
 	case ETHERTYPE_IP:
 		if (__predict_false(offset < ip_start + sizeof(struct ip)))
 			return (1);
 		break;
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
 			return (1);
 		break;
 #endif
 	default:
 		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
 	/*
 	 * Use the offset to determine the appropriate CSUM_* flags. This is
 	 * a bit dirty, but we can get by with it since the checksum offsets
 	 * happen to be different. We assume the host host does not do IPv4
 	 * header checksum offloading.
 	 */
 	switch (hdr->csum_offset) {
 	case offsetof(struct udphdr, uh_sum):
 	case offsetof(struct tcphdr, th_sum):
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	default:
 		sc->vtnet_stats.rx_csum_bad_offset++;
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	int offset, proto;
 
 	sc = rxq->vtnrx_sc;
 
 	switch (eth_type) {
 #if defined(INET)
 	case ETHERTYPE_IP: {
 		struct ip *ip;
 		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
 			return (1);
 		ip = (struct ip *)(m->m_data + ip_start);
 		proto = ip->ip_p;
 		offset = ip_start + (ip->ip_hl << 2);
 		break;
 	}
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		if (__predict_false(m->m_len < ip_start +
 		    sizeof(struct ip6_hdr)))
 			return (1);
 		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
 		if (__predict_false(offset < 0))
 			return (1);
 		break;
 #endif
 	default:
 		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
 	switch (proto) {
 	case IPPROTO_TCP:
 		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
 			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	case IPPROTO_UDP:
 		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
 			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	default:
 		/*
 		 * For the remaining protocols, FreeBSD does not support
 		 * checksum offloading, so the checksum will be recomputed.
 		 */
 #if 0
 		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
 		    "protocol eth_type=%#x proto=%d csum_start=%d "
 		    "csum_offset=%d\n", __func__, eth_type, proto,
 		    hdr->csum_start, hdr->csum_offset);
 #endif
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Set the appropriate CSUM_* flags. Unfortunately, the information
  * provided is not directly useful to us. The VirtIO header gives the
  * offset of the checksum, which is all Linux needs, but this is not
  * how FreeBSD does things. We are forced to peek inside the packet
  * a bit.
  *
  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
  * could accept the offsets and let the stack figure it out.
  */
 static int
 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct ether_header *eh;
 	struct ether_vlan_header *evh;
 	uint16_t eth_type;
 	int offset, error;
 
 	eh = mtod(m, struct ether_header *);
 	eth_type = ntohs(eh->ether_type);
 	if (eth_type == ETHERTYPE_VLAN) {
 		/* BMV: We should handle nested VLAN tags too. */
 		evh = mtod(m, struct ether_vlan_header *);
 		eth_type = ntohs(evh->evl_proto);
 		offset = sizeof(struct ether_vlan_header);
 	} else
 		offset = sizeof(struct ether_header);
 
 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
 	else
 		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
 
 	return (error);
 }
 
 static void
 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
 {
 	struct mbuf *m;
 
 	while (--nbufs > 0) {
 		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
 		if (m == NULL)
 			break;
 		vtnet_rxq_discard_buf(rxq, m);
 	}
 }
 
 static void
 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	int error;
 
 	/*
 	 * Requeue the discarded mbuf. This should always be successful
 	 * since it was just dequeued.
 	 */
 	error = vtnet_rxq_enqueue_buf(rxq, m);
 	KASSERT(error == 0,
 	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
 }
 
 static int
 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m, *m_tail;
 	int len;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
 	m_tail = m_head;
 
 	while (--nbufs > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL) {
 			rxq->vtnrx_stats.vrxs_ierrors++;
 			goto fail;
 		}
 
 		if (vtnet_rxq_new_buf(rxq) != 0) {
 			rxq->vtnrx_stats.vrxs_iqdrops++;
 			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			goto fail;
 		}
 
 		if (m->m_len < len)
 			len = m->m_len;
 
 		m->m_len = len;
 		m->m_flags &= ~M_PKTHDR;
 
 		m_head->m_pkthdr.len += len;
 		m_tail->m_next = m;
 		m_tail = m;
 	}
 
 	return (0);
 
 fail:
 	sc->vtnet_stats.rx_mergeable_failed++;
 	m_freem(m_head);
 
 	return (1);
 }
 
 static void
 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct ether_header *eh;
 
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
 		eh = mtod(m, struct ether_header *);
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			vtnet_vlan_tag_remove(m);
 			/*
 			 * With the 802.1Q header removed, update the
 			 * checksum starting location accordingly.
 			 */
 			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
 		}
 	}
 
 	m->m_pkthdr.flowid = rxq->vtnrx_id;
 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 
 	/*
 	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
 	 * distinction that Linux does. Need to reevaluate if performing
 	 * offloading for the NEEDS_CSUM case is really appropriate.
 	 */
 	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
 	    VIRTIO_NET_HDR_F_DATA_VALID)) {
 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
 			rxq->vtnrx_stats.vrxs_csum++;
 		else
 			rxq->vtnrx_stats.vrxs_csum_failed++;
 	}
 
 	rxq->vtnrx_stats.vrxs_ipackets++;
 	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
 
 	VTNET_RXQ_UNLOCK(rxq);
 	(*ifp->if_input)(ifp, m);
 	VTNET_RXQ_LOCK(rxq);
 }
 
 static int
 vtnet_rxq_eof(struct vtnet_rxq *rxq)
 {
 	struct virtio_net_hdr lhdr, *hdr;
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
 	struct mbuf *m, *mr;
 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
 	int len, deq, nbufs, adjsz, count;
 	pfil_return_t pfil;
 	bool pfil_done;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
 	hdr = &lhdr;
 	deq = 0;
 	count = sc->vtnet_rx_process_limit;
 
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 
 	while (count-- > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL)
 			break;
 		deq++;
 
 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
 			rxq->vtnrx_stats.vrxs_ierrors++;
 			vtnet_rxq_discard_buf(rxq, m);
 			continue;
 		}
 
 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
 			nbufs = 1;
 			adjsz = sizeof(struct vtnet_rx_header);
 			/*
 			 * Account for our pad inserted between the header
 			 * and the actual start of the frame.
 			 */
 			len += VTNET_RX_HEADER_PAD;
 		} else {
 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
 			nbufs = mhdr->num_buffers;
 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 		}
 
 		/*
 		 * If we have enough data in first mbuf, run it through
 		 * pfil as a memory buffer before dequeueing the rest.
 		 */
 		if (PFIL_HOOKED_IN(sc->vtnet_pfil) &&
 		    len - adjsz >= ETHER_HDR_LEN + max_protohdr) {
 			pfil = pfil_run_hooks(sc->vtnet_pfil,
 			    m->m_data + adjsz, ifp,
 			    (len - adjsz) | PFIL_MEMPTR | PFIL_IN, NULL);
 			switch (pfil) {
 			case PFIL_REALLOCED:
 				mr = pfil_mem2mbuf(m->m_data + adjsz);
 				vtnet_rxq_input(rxq, mr, hdr);
 				/* FALLTHROUGH */
 			case PFIL_DROPPED:
 			case PFIL_CONSUMED:
 				vtnet_rxq_discard_buf(rxq, m);
 				if (nbufs > 1)
 					vtnet_rxq_discard_merged_bufs(rxq,
 					    nbufs);
 				continue;
 			default:
 				KASSERT(pfil == PFIL_PASS,
 				    ("Filter returned %d!\n", pfil));
 			};
 			pfil_done = true;
 		} else
 			pfil_done = false;
 
 		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
 			rxq->vtnrx_stats.vrxs_iqdrops++;
 			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			continue;
 		}
 
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.csum_flags = 0;
 
 		if (nbufs > 1) {
 			/* Dequeue the rest of chain. */
 			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
 				continue;
 		}
 
 		/*
 		 * Save copy of header before we strip it. For both mergeable
 		 * and non-mergeable, the header is at the beginning of the
 		 * mbuf data. We no longer need num_buffers, so always use a
 		 * regular header.
 		 *
 		 * BMV: Is this memcpy() expensive? We know the mbuf data is
 		 * still valid even after the m_adj().
 		 */
 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
 		m_adj(m, adjsz);
 
 		if (PFIL_HOOKED_IN(sc->vtnet_pfil) && pfil_done == false) {
 			pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN,
 			    NULL);
 			switch (pfil) {
 			case PFIL_DROPPED:
 			case PFIL_CONSUMED:
 				continue;
 			default:
 				KASSERT(pfil == PFIL_PASS,
 				    ("Filter returned %d!\n", pfil));
 			}
 		}
 
 		vtnet_rxq_input(rxq, m, hdr);
 
 		/* Must recheck after dropping the Rx lock. */
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 	}
 
 	if (deq > 0)
 		virtqueue_notify(vq);
 
 	return (count > 0 ? 0 : EAGAIN);
 }
 
 static void
 vtnet_rx_vq_intr(void *xrxq)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_rxq *rxq;
 	struct ifnet *ifp;
 	int tries, more;
 
 	rxq = xrxq;
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 	tries = 0;
 
 	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
 		 * Ignore this interrupt. Either this is a spurious interrupt
 		 * or multiqueue without per-VQ MSIX so every queue needs to
 		 * be polled (a brain dead configuration we could try harder
 		 * to avoid).
 		 */
 		vtnet_rxq_disable_intr(rxq);
 		return;
 	}
 
 #ifdef DEV_NETMAP
 	if (netmap_rx_irq(ifp, rxq->vtnrx_id, &more) != NM_IRQ_PASS)
 		return;
 #endif /* DEV_NETMAP */
 
 	VTNET_RXQ_LOCK(rxq);
 
 again:
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
 
 	more = vtnet_rxq_eof(rxq);
 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 		if (!more)
 			vtnet_rxq_disable_intr(rxq);
 		/*
 		 * This is an occasional condition or race (when !more),
 		 * so retry a few times before scheduling the taskqueue.
 		 */
 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
 			goto again;
 
 		VTNET_RXQ_UNLOCK(rxq);
 		rxq->vtnrx_stats.vrxs_rescheduled++;
 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	} else
 		VTNET_RXQ_UNLOCK(rxq);
 }
 
 static void
 vtnet_rxq_tq_intr(void *xrxq, int pending)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_rxq *rxq;
 	struct ifnet *ifp;
 	int more;
 
 	rxq = xrxq;
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_RXQ_LOCK(rxq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
 
 	more = vtnet_rxq_eof(rxq);
 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 		if (!more)
 			vtnet_rxq_disable_intr(rxq);
 		rxq->vtnrx_stats.vrxs_rescheduled++;
 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	}
 
 	VTNET_RXQ_UNLOCK(rxq);
 }
 
 static int
 vtnet_txq_below_threshold(struct vtnet_txq *txq)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 
 	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
 }
 
 static int
 vtnet_txq_notify(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 
 	vq = txq->vtntx_vq;
 
 	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
 	virtqueue_notify(vq);
 
 	if (vtnet_txq_enable_intr(txq) == 0)
 		return (0);
 
 	/*
 	 * Drain frames that were completed since last checked. If this
 	 * causes the queue to go above the threshold, the caller should
 	 * continue transmitting.
 	 */
 	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
 		virtqueue_disable_intr(vq);
 		return (1);
 	}
 
 	return (0);
 }
 
 static void
 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	int last;
 #ifdef DEV_NETMAP
 	int netmap_bufs = vtnet_netmap_queue_on(txq->vtntx_sc, NR_TX,
 						txq->vtntx_id);
 #else  /* !DEV_NETMAP */
 	int netmap_bufs = 0;
 #endif /* !DEV_NETMAP */
 
 	vq = txq->vtntx_vq;
 	last = 0;
 
 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
 		if (!netmap_bufs) {
 			m_freem(txhdr->vth_mbuf);
 			uma_zfree(vtnet_tx_header_zone, txhdr);
 		}
 	}
 
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
 }
 
 /*
  * BMV: Much of this can go away once we finally have offsets in
  * the mbuf packet header. Bug andre@.
  */
 static int
 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
     int *etype, int *proto, int *start)
 {
 	struct vtnet_softc *sc;
 	struct ether_vlan_header *evh;
 	int offset;
 
 	sc = txq->vtntx_sc;
 
 	evh = mtod(m, struct ether_vlan_header *);
 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		/* BMV: We should handle nested VLAN tags too. */
 		*etype = ntohs(evh->evl_proto);
 		offset = sizeof(struct ether_vlan_header);
 	} else {
 		*etype = ntohs(evh->evl_encap_proto);
 		offset = sizeof(struct ether_header);
 	}
 
 	switch (*etype) {
 #if defined(INET)
 	case ETHERTYPE_IP: {
 		struct ip *ip, iphdr;
 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
 			m_copydata(m, offset, sizeof(struct ip),
 			    (caddr_t) &iphdr);
 			ip = &iphdr;
 		} else
 			ip = (struct ip *)(m->m_data + offset);
 		*proto = ip->ip_p;
 		*start = offset + (ip->ip_hl << 2);
 		break;
 	}
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		*proto = -1;
 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
 		/* Assert the network stack sent us a valid packet. */
 		KASSERT(*start > offset,
 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
 		    *start, offset, *proto));
 		break;
 #endif
 	default:
 		sc->vtnet_stats.tx_csum_bad_ethtype++;
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
     int offset, struct virtio_net_hdr *hdr)
 {
 	static struct timeval lastecn;
 	static int curecn;
 	struct vtnet_softc *sc;
 	struct tcphdr *tcp, tcphdr;
 
 	sc = txq->vtntx_sc;
 
 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
 		tcp = &tcphdr;
 	} else
 		tcp = (struct tcphdr *)(m->m_data + offset);
 
 	hdr->hdr_len = offset + (tcp->th_off << 2);
 	hdr->gso_size = m->m_pkthdr.tso_segsz;
 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
 	    VIRTIO_NET_HDR_GSO_TCPV6;
 
 	if (tcp->th_flags & TH_CWR) {
 		/*
 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
 		 * ECN support is not on a per-interface basis, but globally via
 		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
 		 */
 		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
 			if (ppsratecheck(&lastecn, &curecn, 1))
 				if_printf(sc->vtnet_ifp,
 				    "TSO with ECN not negotiated with host\n");
 			return (ENOTSUP);
 		}
 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 	}
 
 	txq->vtntx_stats.vtxs_tso++;
 
 	return (0);
 }
 
 static struct mbuf *
 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	int flags, etype, csum_start, proto, error;
 
 	sc = txq->vtntx_sc;
 	flags = m->m_pkthdr.csum_flags;
 
 	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
 	if (error)
 		goto drop;
 
 	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
 	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
 		/*
 		 * We could compare the IP protocol vs the CSUM_ flag too,
 		 * but that really should not be necessary.
 		 */
 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
 		hdr->csum_start = csum_start;
 		hdr->csum_offset = m->m_pkthdr.csum_data;
 		txq->vtntx_stats.vtxs_csum++;
 	}
 
 	if (flags & CSUM_TSO) {
 		if (__predict_false(proto != IPPROTO_TCP)) {
 			/* Likely failed to correctly parse the mbuf. */
 			sc->vtnet_stats.tx_tso_not_tcp++;
 			goto drop;
 		}
 
 		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
 		    ("%s: mbuf %p TSO without checksum offload %#x",
 		    __func__, m, flags));
 
 		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
 		if (error)
 			goto drop;
 	}
 
 	return (m);
 
 drop:
 	m_freem(m);
 	return (NULL);
 }
 
 static int
 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
     struct vtnet_tx_header *txhdr)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct sglist *sg;
 	struct mbuf *m;
 	int error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	sg = txq->vtntx_sg;
 	m = *m_head;
 
 	sglist_reset(sg);
 	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
 	KASSERT(error == 0 && sg->sg_nseg == 1,
 	    ("%s: error %d adding header to sglist", __func__, error));
 
 	error = sglist_append_mbuf(sg, m);
 	if (error) {
 		m = m_defrag(m, M_NOWAIT);
 		if (m == NULL)
 			goto fail;
 
 		*m_head = m;
 		sc->vtnet_stats.tx_defragged++;
 
 		error = sglist_append_mbuf(sg, m);
 		if (error)
 			goto fail;
 	}
 
 	txhdr->vth_mbuf = m;
 	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
 
 	return (error);
 
 fail:
 	sc->vtnet_stats.tx_defrag_failed++;
 	m_freem(*m_head);
 	*m_head = NULL;
 
 	return (ENOBUFS);
 }
 
 static int
 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
 {
 	struct vtnet_tx_header *txhdr;
 	struct virtio_net_hdr *hdr;
 	struct mbuf *m;
 	int error;
 
 	m = *m_head;
 	M_ASSERTPKTHDR(m);
 
 	txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
 	if (txhdr == NULL) {
 		m_freem(m);
 		*m_head = NULL;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Always use the non-mergeable header, regardless if the feature
 	 * was negotiated. For transmit, num_buffers is always zero. The
 	 * vtnet_hdr_size is used to enqueue the correct header size.
 	 */
 	hdr = &txhdr->vth_uhdr.hdr;
 
 	if (m->m_flags & M_VLANTAG) {
 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 		m->m_flags &= ~M_VLANTAG;
 	}
 
 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
 		m = vtnet_txq_offload(txq, m, hdr);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
 	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
 	if (error == 0)
 		return (0);
 
 fail:
 	uma_zfree(vtnet_tx_header_zone, txhdr);
 
 	return (error);
 }
 
 #ifdef VTNET_LEGACY_TX
 
 static void
 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
 	int tries, enq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	tries = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->vtnet_link_active == 0)
 		return;
 
 	vtnet_txq_eof(txq);
 
 again:
 	enq = 0;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (virtqueue_full(vq))
 			break;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
 		if (m0 == NULL)
 			break;
 
 		if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
 			if (m0 != NULL)
 				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
 			break;
 		}
 
 		enq++;
 		ETHER_BPF_MTAP(ifp, m0);
 	}
 
 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 		if (tries++ < VTNET_NOTIFY_RETRIES)
 			goto again;
 
 		txq->vtntx_stats.vtxs_rescheduled++;
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 }
 
 static void
 vtnet_start(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 
 	sc = ifp->if_softc;
 	txq = &sc->vtnet_txqs[0];
 
 	VTNET_TXQ_LOCK(txq);
 	vtnet_start_locked(txq, ifp);
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 #else /* !VTNET_LEGACY_TX */
 
 static int
 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct buf_ring *br;
 	struct ifnet *ifp;
 	int enq, tries, error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	br = txq->vtntx_br;
 	ifp = sc->vtnet_ifp;
 	tries = 0;
 	error = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->vtnet_link_active == 0) {
 		if (m != NULL)
 			error = drbr_enqueue(ifp, br, m);
 		return (error);
 	}
 
 	if (m != NULL) {
 		error = drbr_enqueue(ifp, br, m);
 		if (error)
 			return (error);
 	}
 
 	vtnet_txq_eof(txq);
 
 again:
 	enq = 0;
 
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (virtqueue_full(vq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
 		if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
 				drbr_advance(ifp, br);
 			break;
 		}
 		drbr_advance(ifp, br);
 
 		enq++;
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 		if (tries++ < VTNET_NOTIFY_RETRIES)
 			goto again;
 
 		txq->vtntx_stats.vtxs_rescheduled++;
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	int i, npairs, error;
 
 	sc = ifp->if_softc;
 	npairs = sc->vtnet_act_vq_pairs;
 
 	/* check if flowid is set */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % npairs;
 	else
 		i = curcpu % npairs;
 
 	txq = &sc->vtnet_txqs[i];
 
 	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
 		error = vtnet_txq_mq_start_locked(txq, m);
 		VTNET_TXQ_UNLOCK(txq);
 	} else {
 		error = drbr_enqueue(ifp, txq->vtntx_br, m);
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
 	}
 
 	return (error);
 }
 
 static void
 vtnet_txq_tq_deferred(void *xtxq, int pending)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 
 	VTNET_TXQ_LOCK(txq);
 	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
 		vtnet_txq_mq_start_locked(txq, NULL);
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 #endif /* VTNET_LEGACY_TX */
 
 static void
 vtnet_txq_start(struct vtnet_txq *txq)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 #ifdef VTNET_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		vtnet_start_locked(txq, ifp);
 #else
 	if (!drbr_empty(ifp, txq->vtntx_br))
 		vtnet_txq_mq_start_locked(txq, NULL);
 #endif
 }
 
 static void
 vtnet_txq_tq_intr(void *xtxq, int pending)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
 	vtnet_txq_start(txq);
 
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 static int
 vtnet_txq_eof(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	struct mbuf *m;
 	int deq;
 
 	vq = txq->vtntx_vq;
 	deq = 0;
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 		m = txhdr->vth_mbuf;
 		deq++;
 
 		txq->vtntx_stats.vtxs_opackets++;
 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
 		if (m->m_flags & M_MCAST)
 			txq->vtntx_stats.vtxs_omcasts++;
 
 		m_freem(m);
 		uma_zfree(vtnet_tx_header_zone, txhdr);
 	}
 
 	if (virtqueue_empty(vq))
 		txq->vtntx_watchdog = 0;
 
 	return (deq);
 }
 
 static void
 vtnet_tx_vq_intr(void *xtxq)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
 		 * Ignore this interrupt. Either this is a spurious interrupt
 		 * or multiqueue without per-VQ MSIX so every queue needs to
 		 * be polled (a brain dead configuration we could try harder
 		 * to avoid).
 		 */
 		vtnet_txq_disable_intr(txq);
 		return;
 	}
 
 #ifdef DEV_NETMAP
 	if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
 		return;
 #endif /* DEV_NETMAP */
 
 	VTNET_TXQ_LOCK(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
 	vtnet_txq_start(txq);
 
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 static void
 vtnet_tx_start_all(struct vtnet_softc *sc)
 {
 	struct vtnet_txq *txq;
 	int i;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 
 		VTNET_TXQ_LOCK(txq);
 		vtnet_txq_start(txq);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 }
 
 #ifndef VTNET_LEGACY_TX
 static void
 vtnet_qflush(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct mbuf *m;
 	int i;
 
 	sc = ifp->if_softc;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 
 		VTNET_TXQ_LOCK(txq);
 		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
 			m_freem(m);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 
 	if_qflush(ifp);
 }
 #endif
 
 static int
 vtnet_watchdog(struct vtnet_txq *txq)
 {
 	struct ifnet *ifp;
 
 	ifp = txq->vtntx_sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
 	if (txq->vtntx_watchdog == 1) {
 		/*
 		 * Only drain completed frames if the watchdog is about to
 		 * expire. If any frames were drained, there may be enough
 		 * free descriptors now available to transmit queued frames.
 		 * In that case, the timer will immediately be decremented
 		 * below, but the timeout is generous enough that should not
 		 * be a problem.
 		 */
 		if (vtnet_txq_eof(txq) != 0)
 			vtnet_txq_start(txq);
 	}
 
 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 		VTNET_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VTNET_TXQ_UNLOCK(txq);
 
 	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 	return (1);
 }
 
 static void
 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
     struct vtnet_txq_stats *txacc)
 {
 
 	bzero(rxacc, sizeof(struct vtnet_rxq_stats));
 	bzero(txacc, sizeof(struct vtnet_txq_stats));
 
 	for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		struct vtnet_rxq_stats *rxst;
 		struct vtnet_txq_stats *txst;
 
 		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
 		rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
 		rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
 		rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
 		rxacc->vrxs_csum += rxst->vrxs_csum;
 		rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
 		rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
 
 		txst = &sc->vtnet_txqs[i].vtntx_stats;
 		txacc->vtxs_opackets += txst->vtxs_opackets;
 		txacc->vtxs_obytes += txst->vtxs_obytes;
 		txacc->vtxs_csum += txst->vtxs_csum;
 		txacc->vtxs_tso += txst->vtxs_tso;
 		txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
 	}
 }
 
 static uint64_t
 vtnet_get_counter(if_t ifp, ift_counter cnt)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_rxq_stats rxaccum;
 	struct vtnet_txq_stats txaccum;
 
 	sc = if_getsoftc(ifp);
 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
 
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		return (rxaccum.vrxs_ipackets);
 	case IFCOUNTER_IQDROPS:
 		return (rxaccum.vrxs_iqdrops);
 	case IFCOUNTER_IERRORS:
 		return (rxaccum.vrxs_ierrors);
 	case IFCOUNTER_OPACKETS:
 		return (txaccum.vtxs_opackets);
 #ifndef VTNET_LEGACY_TX
 	case IFCOUNTER_OBYTES:
 		return (txaccum.vtxs_obytes);
 	case IFCOUNTER_OMCASTS:
 		return (txaccum.vtxs_omcasts);
 #endif
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 static void
 vtnet_tick(void *xsc)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	int i, timedout;
 
 	sc = xsc;
 	ifp = sc->vtnet_ifp;
 	timedout = 0;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
 
 	if (timedout != 0) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vtnet_init_locked(sc);
 	} else
 		callout_schedule(&sc->vtnet_tick_ch, hz);
 }
 
 static void
 vtnet_start_taskqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i, error;
 
 	dev = sc->vtnet_dev;
 
 	/*
 	 * Errors here are very difficult to recover from - we cannot
 	 * easily fail because, if this is during boot, we will hang
 	 * when freeing any successfully started taskqueues because
 	 * the scheduler isn't up yet.
 	 *
 	 * Most drivers just ignore the return value - it only fails
 	 * with ENOMEM so an error is not likely.
 	 */
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
 		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
 		if (error) {
 			device_printf(dev, "failed to start rx taskq %d\n",
 			    rxq->vtnrx_id);
 		}
 
 		txq = &sc->vtnet_txqs[i];
 		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
 		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
 		if (error) {
 			device_printf(dev, "failed to start tx taskq %d\n",
 			    txq->vtntx_id);
 		}
 	}
 }
 
 static void
 vtnet_free_taskqueues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		if (rxq->vtnrx_tq != NULL) {
 			taskqueue_free(rxq->vtnrx_tq);
 			rxq->vtnrx_tq = NULL;
 		}
 
 		txq = &sc->vtnet_txqs[i];
 		if (txq->vtntx_tq != NULL) {
 			taskqueue_free(txq->vtntx_tq);
 			txq->vtntx_tq = NULL;
 		}
 	}
 }
 
 static void
 vtnet_drain_taskqueues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		if (rxq->vtnrx_tq != NULL)
 			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 
 		txq = &sc->vtnet_txqs[i];
 		if (txq->vtntx_tq != NULL) {
 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
 #ifndef VTNET_LEGACY_TX
 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
 #endif
 		}
 	}
 }
 
 static void
 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		vtnet_rxq_free_mbufs(rxq);
 
 		txq = &sc->vtnet_txqs[i];
 		vtnet_txq_free_mbufs(txq);
 	}
 }
 
 static void
 vtnet_stop_rendezvous(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	/*
 	 * Lock and unlock the per-queue mutex so we known the stop
 	 * state is visible. Doing only the active queues should be
 	 * sufficient, but it does not cost much extra to do all the
 	 * queues. Note we hold the core mutex here too.
 	 */
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		VTNET_RXQ_LOCK(rxq);
 		VTNET_RXQ_UNLOCK(rxq);
 
 		txq = &sc->vtnet_txqs[i];
 		VTNET_TXQ_LOCK(txq);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 }
 
 static void
 vtnet_stop(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	sc->vtnet_link_active = 0;
 	callout_stop(&sc->vtnet_tick_ch);
 
 	/* Only advisory. */
 	vtnet_disable_interrupts(sc);
 
 	/*
 	 * Stop the host adapter. This resets it to the pre-initialized
 	 * state. It will not generate any interrupts until after it is
 	 * reinitialized.
 	 */
 	virtio_stop(dev);
 	vtnet_stop_rendezvous(sc);
 
 	/* Free any mbufs left in the virtqueues. */
 	vtnet_drain_rxtx_queues(sc);
 }
 
 static int
 vtnet_virtio_reinit(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 	uint64_t features;
 	int mask, error;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 	features = sc->vtnet_features;
 
 	mask = 0;
 #if defined(INET)
 	mask |= IFCAP_RXCSUM;
 #endif
 #if defined (INET6)
 	mask |= IFCAP_RXCSUM_IPV6;
 #endif
 
 	/*
 	 * Re-negotiate with the host, removing any disabled receive
 	 * features. Transmit features are disabled only on our side
 	 * via if_capenable and if_hwassist.
 	 */
 
 	if (ifp->if_capabilities & mask) {
 		/*
 		 * We require both IPv4 and IPv6 offloading to be enabled
 		 * in order to negotiated it: VirtIO does not distinguish
 		 * between the two.
 		 */
 		if ((ifp->if_capenable & mask) != mask)
 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
 	}
 
 	if (ifp->if_capabilities & IFCAP_LRO) {
 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
 			features &= ~VTNET_LRO_FEATURES;
 	}
 
 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
 	}
 
 	error = virtio_reinit(dev, features);
 	if (error)
 		device_printf(dev, "virtio reinit error %d\n", error);
 
 	return (error);
 }
 
 static void
 vtnet_init_rx_filters(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vtnet_ifp;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
 		/* Restore promiscuous and all-multicast modes. */
 		vtnet_rx_filter(sc);
 		/* Restore filtered MAC addresses. */
 		vtnet_rx_filter_mac(sc);
 	}
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		vtnet_rx_filter_vlan(sc);
 }
 
 static int
 vtnet_init_rx_queues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vtnet_rxq *rxq;
 	int i, clsize, error;
 
 	dev = sc->vtnet_dev;
 
 	/*
 	 * Use the new cluster size if one has been set (via a MTU
 	 * change). Otherwise, use the standard 2K clusters.
 	 *
 	 * BMV: It might make sense to use page sized clusters as
 	 * the default (depending on the features negotiated).
 	 */
 	if (sc->vtnet_rx_new_clsize != 0) {
 		clsize = sc->vtnet_rx_new_clsize;
 		sc->vtnet_rx_new_clsize = 0;
 	} else
 		clsize = MCLBYTES;
 
 	sc->vtnet_rx_clsize = clsize;
 	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
 
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
 	    sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
 	    ("%s: too many rx mbufs %d for %d segments", __func__,
 	    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 
 		/* Hold the lock to satisfy asserts. */
 		VTNET_RXQ_LOCK(rxq);
 		error = vtnet_rxq_populate(rxq);
 		VTNET_RXQ_UNLOCK(rxq);
 
 		if (error) {
 			device_printf(dev,
 			    "cannot allocate mbufs for Rx queue %d\n", i);
 			return (error);
 		}
 	}
 
 	return (0);
 }
 
 static int
 vtnet_init_tx_queues(struct vtnet_softc *sc)
 {
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 		txq->vtntx_watchdog = 0;
 	}
 
 	return (0);
 }
 
 static int
 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
 {
 	int error;
 
 	error = vtnet_init_rx_queues(sc);
 	if (error)
 		return (error);
 
 	error = vtnet_init_tx_queues(sc);
 	if (error)
 		return (error);
 
 	return (0);
 }
 
 static void
 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int npairs;
 
 	dev = sc->vtnet_dev;
 
 	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
 		sc->vtnet_act_vq_pairs = 1;
 		return;
 	}
 
 	npairs = sc->vtnet_requested_vq_pairs;
 
 	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
 		device_printf(dev,
 		    "cannot set active queue pairs to %d\n", npairs);
 		npairs = 1;
 	}
 
 	sc->vtnet_act_vq_pairs = npairs;
 }
 
 static int
 vtnet_reinit(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 	int error;
 
 	ifp = sc->vtnet_ifp;
 
 	/* Use the current MAC address. */
 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
 	vtnet_set_hwaddr(sc);
 
 	vtnet_set_active_vq_pairs(sc);
 
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_IP_TSO;
 	if (ifp->if_capenable & IFCAP_TSO6)
 		ifp->if_hwassist |= CSUM_IP6_TSO;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 		vtnet_init_rx_filters(sc);
 
 	error = vtnet_init_rxtx_queues(sc);
 	if (error)
 		return (error);
 
 	vtnet_enable_interrupts(sc);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
 	return (0);
 }
 
 static void
 vtnet_init_locked(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	vtnet_stop(sc);
 
 	/* Reinitialize with the host. */
 	if (vtnet_virtio_reinit(sc) != 0)
 		goto fail;
 
 	if (vtnet_reinit(sc) != 0)
 		goto fail;
 
 	virtio_reinit_complete(dev);
 
 	vtnet_update_link_status(sc);
 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
 
 	return;
 
 fail:
 	vtnet_stop(sc);
 }
 
 static void
 vtnet_init(void *xsc)
 {
 	struct vtnet_softc *sc;
 
 	sc = xsc;
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_init_locked(sc);
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
 {
 	struct virtqueue *vq;
 
 	vq = sc->vtnet_ctrl_vq;
 
 	/*
 	 * The control virtqueue is only polled and therefore it should
 	 * already be empty.
 	 */
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: ctrl vq %p not empty", __func__, vq));
 }
 
 static void
 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
     struct sglist *sg, int readable, int writable)
 {
 	struct virtqueue *vq;
 
 	vq = sc->vtnet_ctrl_vq;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
 	    ("%s: CTRL_VQ feature not negotiated", __func__));
 
 	if (!virtqueue_empty(vq))
 		return;
 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
 		return;
 
 	/*
 	 * Poll for the response, but the command is likely already
 	 * done when we return from the notify.
 	 */
 	virtqueue_notify(vq);
 	virtqueue_poll(vq, NULL);
 }
 
 static int
 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
 {
 	struct virtio_net_ctrl_hdr hdr __aligned(2);
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	uint8_t ack;
 	int error;
 
 	hdr.class = VIRTIO_NET_CTRL_MAC;
 	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
 	ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr;
 		uint8_t pad1;
 		struct virtio_net_ctrl_mq mq;
 		uint8_t pad2;
 		uint8_t ack;
 	} s __aligned(2);
 	int error;
 
 	s.hdr.class = VIRTIO_NET_CTRL_MQ;
 	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
 	s.mq.virtqueue_pairs = npairs;
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding MQ message to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr;
 		uint8_t pad1;
 		uint8_t onoff;
 		uint8_t pad2;
 		uint8_t ack;
 	} s __aligned(2);
 	int error;
 
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
 	    ("%s: CTRL_RX feature not negotiated", __func__));
 
 	s.hdr.class = VIRTIO_NET_CTRL_RX;
 	s.hdr.cmd = cmd;
 	s.onoff = !!on;
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding Rx message to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_set_promisc(struct vtnet_softc *sc, int on)
 {
 
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
 }
 
 static int
 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
 {
 
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
 }
 
 /*
  * The device defaults to promiscuous mode for backwards compatibility.
  * Turn it off at attach time if possible.
  */
 static void
 vtnet_attach_disable_promisc(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK(sc);
 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
 		ifp->if_flags |= IFF_PROMISC;
 	} else if (vtnet_set_promisc(sc, 0) != 0) {
 		ifp->if_flags |= IFF_PROMISC;
 		device_printf(sc->vtnet_dev,
 		    "cannot disable default promiscuous mode\n");
 	}
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_rx_filter(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
 		device_printf(dev, "cannot %s promiscuous mode\n",
 		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
 
 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
 		device_printf(dev, "cannot %s all-multicast mode\n",
 		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
 }
 
 static u_int
 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
 {
 	struct vtnet_softc *sc = arg;
 
 	if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
 		return (0);
 
 	if (ucnt < VTNET_MAX_MAC_ENTRIES)
 		bcopy(LLADDR(sdl),
 		    &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
 		    ETHER_ADDR_LEN);
 
 	return (1);
 }
 
 static u_int
 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
 {
 	struct vtnet_mac_filter *filter = arg;
 
 	if (mcnt < VTNET_MAX_MAC_ENTRIES)
 		bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
 		    ETHER_ADDR_LEN);
 
 	return (1);
 }
 
 static void
 vtnet_rx_filter_mac(struct vtnet_softc *sc)
 {
 	struct virtio_net_ctrl_hdr hdr __aligned(2);
 	struct vtnet_mac_filter *filter;
 	struct sglist_seg segs[4];
 	struct sglist sg;
 	struct ifnet *ifp;
 	bool promisc, allmulti;
 	u_int ucnt, mcnt;
 	int error;
 	uint8_t ack;
 
 	ifp = sc->vtnet_ifp;
 	filter = sc->vtnet_mac_filter;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
 	    ("%s: CTRL_RX feature not negotiated", __func__));
 
 	/* Unicast MAC addresses: */
 	ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
 	promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
 
 	if (promisc) {
 		filter->vmf_unicast.nentries = 0;
 		if_printf(ifp, "more than %d MAC addresses assigned, "
 		    "falling back to promiscuous mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	} else
 		filter->vmf_unicast.nentries = ucnt;
 
 	/* Multicast MAC addresses: */
 	mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
 	allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
 
 	if (allmulti) {
 		filter->vmf_multicast.nentries = 0;
 		if_printf(ifp, "more than %d multicast MAC addresses "
 		    "assigned, falling back to all-multicast mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	} else
 		filter->vmf_multicast.nentries = mcnt;
 
 	if (promisc && allmulti)
 		goto out;
 
 	hdr.class = VIRTIO_NET_CTRL_MAC;
 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
 	ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 4, segs);
 	error = 0;
 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &filter->vmf_unicast,
 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &filter->vmf_multicast,
 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 4,
 	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
 	if (ack != VIRTIO_NET_OK)
 		if_printf(ifp, "error setting host MAC filter table\n");
 
 out:
 	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
 		if_printf(ifp, "cannot enable promiscuous mode\n");
 	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
 		if_printf(ifp, "cannot enable all-multicast mode\n");
 }
 
 static int
 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr;
 		uint8_t pad1;
 		uint16_t tag;
 		uint8_t pad2;
 		uint8_t ack;
 	} s __aligned(2);
 	int error;
 
 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
 	s.tag = tag;
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding VLAN message to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static void
 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
 {
 	uint32_t w;
 	uint16_t tag;
 	int i, bit;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
 	    ("%s: VLAN_FILTER feature not negotiated", __func__));
 
 	/* Enable the filter for each configured VLAN. */
 	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
 		w = sc->vtnet_vlan_filter[i];
 
 		while ((bit = ffs(w) - 1) != -1) {
 			w &= ~(1 << bit);
 			tag = sizeof(w) * CHAR_BIT * i + bit;
 
 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
 				device_printf(sc->vtnet_dev,
 				    "cannot enable VLAN %d filter\n", tag);
 			}
 		}
 	}
 }
 
 static void
 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	struct ifnet *ifp;
 	int idx, bit;
 
 	ifp = sc->vtnet_ifp;
 	idx = (tag >> 5) & 0x7F;
 	bit = tag & 0x1F;
 
 	if (tag == 0 || tag > 4095)
 		return;
 
 	VTNET_CORE_LOCK(sc);
 
 	if (add)
 		sc->vtnet_vlan_filter[idx] |= (1 << bit);
 	else
 		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
 	    ifp->if_drv_flags & IFF_DRV_RUNNING &&
 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
 		device_printf(sc->vtnet_dev,
 		    "cannot %s VLAN %d %s the host filter table\n",
 		    add ? "add" : "remove", tag, add ? "to" : "from");
 	}
 
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 {
 
 	if (ifp->if_softc != arg)
 		return;
 
 	vtnet_update_vlan_filter(arg, 1, tag);
 }
 
 static void
 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 {
 
 	if (ifp->if_softc != arg)
 		return;
 
 	vtnet_update_vlan_filter(arg, 0, tag);
 }
 
 static int
 vtnet_is_link_up(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 	uint16_t status;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
 		status = VIRTIO_NET_S_LINK_UP;
 	else
 		status = virtio_read_dev_config_2(dev,
 		    offsetof(struct virtio_net_config, status));
 
 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
 }
 
 static void
 vtnet_update_link_status(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 	int link;
 
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	link = vtnet_is_link_up(sc);
 
 	/* Notify if the link status has changed. */
 	if (link != 0 && sc->vtnet_link_active == 0) {
 		sc->vtnet_link_active = 1;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (link == 0 && sc->vtnet_link_active != 0) {
 		sc->vtnet_link_active = 0;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
 static int
 vtnet_ifmedia_upd(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct ifmedia *ifm;
 
 	sc = ifp->if_softc;
 	ifm = &sc->vtnet_media;
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	return (0);
 }
 
 static void
 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct vtnet_softc *sc;
 
 	sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	VTNET_CORE_LOCK(sc);
 	if (vtnet_is_link_up(sc) != 0) {
 		ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_active |= VTNET_MEDIATYPE;
 	} else
 		ifmr->ifm_active |= IFM_NONE;
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_set_hwaddr(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int i;
 
 	dev = sc->vtnet_dev;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
 			device_printf(dev, "unable to set MAC address\n");
 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
 		for (i = 0; i < ETHER_ADDR_LEN; i++) {
 			virtio_write_dev_config_1(dev,
 			    offsetof(struct virtio_net_config, mac) + i,
 			    sc->vtnet_hwaddr[i]);
 		}
 	}
 }
 
 static void
 vtnet_get_hwaddr(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int i;
 
 	dev = sc->vtnet_dev;
 
 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
 		/*
 		 * Generate a random locally administered unicast address.
 		 *
 		 * It would be nice to generate the same MAC address across
 		 * reboots, but it seems all the hosts currently available
 		 * support the MAC feature, so this isn't too important.
 		 */
 		sc->vtnet_hwaddr[0] = 0xB2;
 		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
 		vtnet_set_hwaddr(sc);
 		return;
 	}
 
 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
 		    offsetof(struct virtio_net_config, mac) + i);
 	}
 }
 
 static void
 vtnet_vlan_tag_remove(struct mbuf *m)
 {
 	struct ether_vlan_header *evh;
 
 	evh = mtod(m, struct ether_vlan_header *);
 	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
 	m->m_flags |= M_VLANTAG;
 
 	/* Strip the 802.1Q header. */
 	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
 }
 
 static void
 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
 {
 	int limit;
 
 	limit = vtnet_tunable_int(sc, "rx_process_limit",
 	    vtnet_rx_process_limit);
 	if (limit < 0)
 		limit = INT_MAX;
 	sc->vtnet_rx_process_limit = limit;
 }
 
 static void
 vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
 {
 	int size, thresh;
 
 	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
 
 	/*
 	 * The Tx interrupt is disabled until the queue free count falls
 	 * below our threshold. Completed frames are drained from the Tx
 	 * virtqueue before transmitting new frames and in the watchdog
 	 * callout, so the frequency of Tx interrupts is greatly reduced,
 	 * at the cost of not freeing mbufs as quickly as they otherwise
 	 * would be.
 	 *
 	 * N.B. We assume all the Tx queues are the same size.
 	 */
 	thresh = size / 4;
 
 	/*
 	 * Without indirect descriptors, leave enough room for the most
 	 * segments we handle.
 	 */
 	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
 	    thresh < sc->vtnet_tx_nsegs)
 		thresh = sc->vtnet_tx_nsegs;
 
 	sc->vtnet_tx_intr_thresh = thresh;
 }
 
 static void
 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 {
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	struct vtnet_rxq_stats *stats;
 	char namebuf[16];
 
 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 	    CTLFLAG_RD, NULL, "Receive Queue");
 	list = SYSCTL_CHILDREN(node);
 
 	stats = &rxq->vtnrx_stats;
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
 	    &stats->vrxs_ipackets, "Receive packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
 	    &stats->vrxs_ibytes, "Receive bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
 	    &stats->vrxs_iqdrops, "Receive drops");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
 	    &stats->vrxs_ierrors, "Receive errors");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 	    &stats->vrxs_csum, "Receive checksum offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
 	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 	    &stats->vrxs_rescheduled,
 	    "Receive interrupt handler rescheduled");
 }
 
 static void
 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_txq *txq)
 {
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	struct vtnet_txq_stats *stats;
 	char namebuf[16];
 
 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 	    CTLFLAG_RD, NULL, "Transmit Queue");
 	list = SYSCTL_CHILDREN(node);
 
 	stats = &txq->vtntx_stats;
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
 	    &stats->vtxs_opackets, "Transmit packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
 	    &stats->vtxs_obytes, "Transmit bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
 	    &stats->vtxs_omcasts, "Transmit multicasts");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 	    &stats->vtxs_csum, "Transmit checksum offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
 	    &stats->vtxs_tso, "Transmit segmentation offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 	    &stats->vtxs_rescheduled,
 	    "Transmit interrupt handler rescheduled");
 }
 
 static void
 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 	int i;
 
 	dev = sc->vtnet_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
 		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
 	}
 }
 
 static void
 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_softc *sc)
 {
 	struct vtnet_statistics *stats;
 	struct vtnet_rxq_stats rxaccum;
 	struct vtnet_txq_stats txaccum;
 
 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
 
 	stats = &sc->vtnet_stats;
 	stats->rx_csum_offloaded = rxaccum.vrxs_csum;
 	stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
 	stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
 	stats->tx_csum_offloaded = txaccum.vtxs_csum;
 	stats->tx_tso_offloaded = txaccum.vtxs_tso;
 	stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
 	    "Mbuf cluster allocation failures");
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
 	    CTLFLAG_RD, &stats->rx_frame_too_large,
 	    "Received frame larger than the mbuf chain");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
 	    "Enqueuing the replacement receive mbuf failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
 	    "Mergeable buffers receive failures");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
 	    "Received checksum offloaded buffer with unsupported "
 	    "Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
 	    "Received checksum offloaded buffer with incorrect IP protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
 	    "Received checksum offloaded buffer with incorrect offset");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
 	    "Received checksum offloaded buffer with incorrect protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
 	    CTLFLAG_RD, &stats->rx_csum_failed,
 	    "Received buffer checksum offload failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
 	    "Received buffer checksum offload succeeded");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
 	    "Times the receive interrupt task rescheduled itself");
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
 	    "Aborted transmit of checksum offloaded buffer with unknown "
 	    "Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
 	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
 	    "Aborted transmit of TSO buffer with non TCP protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
 	    CTLFLAG_RD, &stats->tx_defragged,
 	    "Transmit mbufs defragged");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
 	    CTLFLAG_RD, &stats->tx_defrag_failed,
 	    "Aborted transmit of buffer because defrag failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
 	    CTLFLAG_RD, &stats->tx_csum_offloaded,
 	    "Offloaded checksum of transmitted buffer");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
 	    CTLFLAG_RD, &stats->tx_tso_offloaded,
 	    "Segmentation offload of transmitted buffer");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
 	    "Times the transmit interrupt task rescheduled itself");
 }
 
 static void
 vtnet_setup_sysctl(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 
 	dev = sc->vtnet_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
 	    "Maximum number of supported virtqueue pairs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
 	    "Requested number of virtqueue pairs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
 	    "Number of active virtqueue pairs");
 
 	vtnet_setup_stat_sysctl(ctx, child, sc);
 }
 
 static int
 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
 {
 
 	return (virtqueue_enable_intr(rxq->vtnrx_vq));
 }
 
 static void
 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 {
 
 	virtqueue_disable_intr(rxq->vtnrx_vq);
 }
 
 static int
 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 
 	vq = txq->vtntx_vq;
 
 	if (vtnet_txq_below_threshold(txq) != 0)
 		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 
 	/*
 	 * The free count is above our threshold. Keep the Tx interrupt
 	 * disabled until the queue is fuller.
 	 */
 	return (0);
 }
 
 static void
 vtnet_txq_disable_intr(struct vtnet_txq *txq)
 {
 
 	virtqueue_disable_intr(txq->vtntx_vq);
 }
 
 static void
 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
 }
 
 static void
 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
 }
 
 static void
 vtnet_enable_interrupts(struct vtnet_softc *sc)
 {
 
 	vtnet_enable_rx_interrupts(sc);
 	vtnet_enable_tx_interrupts(sc);
 }
 
 static void
 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
 }
 
 static void
 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
 }
 
 static void
 vtnet_disable_interrupts(struct vtnet_softc *sc)
 {
 
 	vtnet_disable_rx_interrupts(sc);
 	vtnet_disable_tx_interrupts(sc);
 }
 
 static int
 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
 {
 	char path[64];
 
 	snprintf(path, sizeof(path),
 	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
 	TUNABLE_INT_FETCH(path, &def);
 
 	return (def);
 }
 
 #ifdef DEBUGNET
 static void
 vtnet_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
 {
 	struct vtnet_softc *sc;
 
 	sc = if_getsoftc(ifp);
 
 	VTNET_CORE_LOCK(sc);
 	*nrxr = sc->vtnet_max_vq_pairs;
 	*ncl = DEBUGNET_MAX_IN_FLIGHT;
 	*clsize = sc->vtnet_rx_clsize;
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused)
 {
 }
 
 static int
 vtnet_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	int error;
 
 	sc = if_getsoftc(ifp);
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (EBUSY);
 
 	txq = &sc->vtnet_txqs[0];
 	error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
 	if (error == 0)
 		(void)vtnet_txq_notify(txq);
 	return (error);
 }
 
 static int
 vtnet_debugnet_poll(struct ifnet *ifp, int count)
 {
 	struct vtnet_softc *sc;
 	int i;
 
 	sc = if_getsoftc(ifp);
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (EBUSY);
 
 	(void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 		(void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
 	return (0);
 }
 #endif /* DEBUGNET */
diff --git a/sys/mips/nlm/dev/net/xlpge.c b/sys/mips/nlm/dev/net/xlpge.c
index e1beb9ad79aa..ac0c4d6e843d 100644
--- a/sys/mips/nlm/dev/net/xlpge.c
+++ b/sys/mips/nlm/dev/net/xlpge.c
@@ -1,1544 +1,1543 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003-2012 Broadcom Corporation
  * All Rights Reserved
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #define __RMAN_RESOURCE_VISIBLE
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/bpf.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <dev/pci/pcivar.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 
 #include <machine/reg.h>
 #include <machine/cpu.h>
 #include <machine/mips_opcode.h>
 #include <machine/asm.h>
 #include <machine/cpuregs.h>
 
 #include <machine/intr_machdep.h>
 #include <machine/clock.h>	/* for DELAY */
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <mips/nlm/hal/haldefs.h>
 #include <mips/nlm/hal/iomap.h>
 #include <mips/nlm/hal/mips-extns.h>
 #include <mips/nlm/hal/cop2.h>
 #include <mips/nlm/hal/fmn.h>
 #include <mips/nlm/hal/sys.h>
 #include <mips/nlm/hal/nae.h>
 #include <mips/nlm/hal/mdio.h>
 #include <mips/nlm/hal/sgmii.h>
 #include <mips/nlm/hal/xaui.h>
 #include <mips/nlm/hal/poe.h>
 #include <ucore_app_bin.h>
 #include <mips/nlm/hal/ucore_loader.h>
 #include <mips/nlm/xlp.h>
 #include <mips/nlm/board.h>
 #include <mips/nlm/msgring.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include "miidevs.h"
 #include <dev/mii/brgphyreg.h>
 #include "miibus_if.h"
 #include <sys/sysctl.h>
 
 #include <mips/nlm/dev/net/xlpge.h>
 
 /*#define XLP_DRIVER_LOOPBACK*/
 
 static struct nae_port_config nae_port_config[64];
 
 int poe_cl_tbl[MAX_POE_CLASSES] = {
 	0x0, 0x249249,
 	0x492492, 0x6db6db,
 	0x924924, 0xb6db6d,
 	0xdb6db6, 0xffffff
 };
 
 /* #define DUMP_PACKET */
 
 static uint64_t
 nlm_paddr_ld(uint64_t paddr)
 {
 	uint64_t xkaddr = 0x9800000000000000 | paddr;
 
 	return (nlm_load_dword_daddr(xkaddr));
 }
 
 struct nlm_xlp_portdata ifp_ports[64];
 static uma_zone_t nl_tx_desc_zone;
 
 /* This implementation will register the following tree of device
  * registration:
  *                      pcibus
  *                       |
  *                      xlpnae (1 instance - virtual entity)
  *                       |
  *                     xlpge
  *      (18 sgmii / 4 xaui / 2 interlaken instances)
  *                       |
  *                    miibus
  */
 
 static int nlm_xlpnae_probe(device_t);
 static int nlm_xlpnae_attach(device_t);
 static int nlm_xlpnae_detach(device_t);
 static int nlm_xlpnae_suspend(device_t);
 static int nlm_xlpnae_resume(device_t);
 static int nlm_xlpnae_shutdown(device_t);
 
 static device_method_t nlm_xlpnae_methods[] = {
 	/* Methods from the device interface */
 	DEVMETHOD(device_probe,		nlm_xlpnae_probe),
 	DEVMETHOD(device_attach,	nlm_xlpnae_attach),
 	DEVMETHOD(device_detach,	nlm_xlpnae_detach),
 	DEVMETHOD(device_suspend,	nlm_xlpnae_suspend),
 	DEVMETHOD(device_resume,	nlm_xlpnae_resume),
 	DEVMETHOD(device_shutdown,	nlm_xlpnae_shutdown),
 
 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 
 	DEVMETHOD_END
 };
 
 static driver_t nlm_xlpnae_driver = {
 	"xlpnae",
 	nlm_xlpnae_methods,
 	sizeof(struct nlm_xlpnae_softc)
 };
 
 static devclass_t nlm_xlpnae_devclass;
 
 static int nlm_xlpge_probe(device_t);
 static int nlm_xlpge_attach(device_t);
 static int nlm_xlpge_detach(device_t);
 static int nlm_xlpge_suspend(device_t);
 static int nlm_xlpge_resume(device_t);
 static int nlm_xlpge_shutdown(device_t);
 
 /* mii override functions */
 static int nlm_xlpge_mii_read(device_t, int, int);
 static int nlm_xlpge_mii_write(device_t, int, int, int);
 static void nlm_xlpge_mii_statchg(device_t);
 
 static device_method_t nlm_xlpge_methods[] = {
 	/* Methods from the device interface */
 	DEVMETHOD(device_probe,		nlm_xlpge_probe),
 	DEVMETHOD(device_attach,	nlm_xlpge_attach),
 	DEVMETHOD(device_detach,	nlm_xlpge_detach),
 	DEVMETHOD(device_suspend,	nlm_xlpge_suspend),
 	DEVMETHOD(device_resume,	nlm_xlpge_resume),
 	DEVMETHOD(device_shutdown,	nlm_xlpge_shutdown),
 
 	/* Methods from the nexus bus needed for explicitly
 	 * probing children when driver is loaded as a kernel module
 	 */
 	DEVMETHOD(miibus_readreg,	nlm_xlpge_mii_read),
 	DEVMETHOD(miibus_writereg,	nlm_xlpge_mii_write),
 	DEVMETHOD(miibus_statchg,	nlm_xlpge_mii_statchg),
 
 	/* Terminate method list */
 	DEVMETHOD_END
 };
 
 static driver_t nlm_xlpge_driver = {
 	"xlpge",
 	nlm_xlpge_methods,
 	sizeof(struct nlm_xlpge_softc)
 };
 
 static devclass_t nlm_xlpge_devclass;
 
 DRIVER_MODULE(xlpnae, pci, nlm_xlpnae_driver, nlm_xlpnae_devclass, 0, 0);
 DRIVER_MODULE(xlpge, xlpnae, nlm_xlpge_driver, nlm_xlpge_devclass, 0, 0);
 DRIVER_MODULE(miibus, xlpge, miibus_driver, miibus_devclass, 0, 0);
 
 MODULE_DEPEND(pci, xlpnae, 1, 1, 1);
 MODULE_DEPEND(xlpnae, xlpge, 1, 1, 1);
 MODULE_DEPEND(xlpge, ether, 1, 1, 1);
 MODULE_DEPEND(xlpge, miibus, 1, 1, 1);
 
 #define SGMII_RCV_CONTEXT_WIDTH 8
 
 /* prototypes */
 static void nlm_xlpge_msgring_handler(int vc, int size,
     int code, int srcid, struct nlm_fmn_msg *msg, void *data);
 static void nlm_xlpge_submit_rx_free_desc(struct nlm_xlpge_softc *sc, int num);
 static void nlm_xlpge_init(void *addr);
 static void nlm_xlpge_port_disable(struct nlm_xlpge_softc *sc);
 static void nlm_xlpge_port_enable(struct nlm_xlpge_softc *sc);
 
 /* globals */
 int dbg_on = 1;
 int cntx2port[524];
 
 static __inline void
 atomic_incr_long(unsigned long *addr)
 {
 	atomic_add_long(addr, 1);
 }
 
 /*
  * xlpnae driver implementation
  */
 static int
 nlm_xlpnae_probe(device_t dev)
 {
 	if (pci_get_vendor(dev) != PCI_VENDOR_NETLOGIC ||
 	    pci_get_device(dev) != PCI_DEVICE_ID_NLM_NAE)
 		return (ENXIO);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void
 nlm_xlpnae_print_frin_desc_carving(struct nlm_xlpnae_softc *sc)
 {
 	int intf;
 	uint32_t value;
 	int start, size;
 
 	/* XXXJC: use max_ports instead of 20 ? */
 	for (intf = 0; intf < 20; intf++) {
 		nlm_write_nae_reg(sc->base, NAE_FREE_IN_FIFO_CFG,
 		    (0x80000000 | intf));
 		value = nlm_read_nae_reg(sc->base, NAE_FREE_IN_FIFO_CFG);
 		size = 2 * ((value >> 20) & 0x3ff);
 		start = 2 * ((value >> 8) & 0x1ff);
 	}
 }
 
 static void
 nlm_config_egress(struct nlm_xlpnae_softc *sc, int nblock,
     int context_base, int hwport, int max_channels)
 {
 	int offset, num_channels;
 	uint32_t data;
 
 	num_channels = sc->portcfg[hwport].num_channels;
 
 	data = (2048 << 12) | (hwport << 4) | 1;
 	nlm_write_nae_reg(sc->base, NAE_TX_IF_BURSTMAX_CMD, data);
 
 	data = ((context_base + num_channels - 1) << 22) |
 	    (context_base << 12) | (hwport << 4) | 1;
 	nlm_write_nae_reg(sc->base, NAE_TX_DDR_ACTVLIST_CMD, data);
 
 	config_egress_fifo_carvings(sc->base, hwport,
 	    context_base, num_channels, max_channels, sc->portcfg);
 	config_egress_fifo_credits(sc->base, hwport,
 	    context_base, num_channels, max_channels, sc->portcfg);
 
 	data = nlm_read_nae_reg(sc->base, NAE_DMA_TX_CREDIT_TH);
 	data |= (1 << 25) | (1 << 24);
 	nlm_write_nae_reg(sc->base, NAE_DMA_TX_CREDIT_TH, data);
 
 	for (offset = 0; offset < num_channels; offset++) {
 		nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD1,
 		    NAE_DRR_QUANTA);
 		data = (hwport << 15) | ((context_base + offset) << 5);
 		if (sc->cmplx_type[nblock] == ILC)
 			data |= (offset << 20);
 		nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD0, data | 1);
 		nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD0, data);
 	}
 }
 
 static int
 xlpnae_get_maxchannels(struct nlm_xlpnae_softc *sc)
 {
 	int maxchans = 0;
 	int i;
 
 	for (i = 0; i < sc->max_ports; i++) {
 		if (sc->portcfg[i].type == UNKNOWN)
 			continue;
 		maxchans += sc->portcfg[i].num_channels;
 	}
 
 	return (maxchans);
 }
 
 static void
 nlm_setup_interface(struct nlm_xlpnae_softc *sc, int nblock,
     int port, uint32_t cur_flow_base, uint32_t flow_mask,
     int max_channels, int context)
 {
 	uint64_t nae_base = sc->base;
 	int mtu = 1536;			/* XXXJC: don't hard code */
 	uint32_t ucore_mask;
 
 	if (sc->cmplx_type[nblock] == XAUIC)
 		nlm_config_xaui(nae_base, nblock, mtu,
 		    mtu, sc->portcfg[port].vlan_pri_en);
 	nlm_config_freein_fifo_uniq_cfg(nae_base,
 	    port, sc->portcfg[port].free_desc_sizes);
 	nlm_config_ucore_iface_mask_cfg(nae_base,
 	    port, sc->portcfg[port].ucore_mask);
 
 	nlm_program_flow_cfg(nae_base, port, cur_flow_base, flow_mask);
 
 	if (sc->cmplx_type[nblock] == SGMIIC)
 		nlm_configure_sgmii_interface(nae_base, nblock, port, mtu, 0);
 
 	nlm_config_egress(sc, nblock, context, port, max_channels);
 
 	nlm_nae_init_netior(nae_base, sc->nblocks);
 	nlm_nae_open_if(nae_base, nblock, sc->cmplx_type[nblock], port,
 	    sc->portcfg[port].free_desc_sizes);
 
 	/*  XXXJC: check mask calculation */
 	ucore_mask = (1 << sc->nucores) - 1;
 	nlm_nae_init_ucore(nae_base, port, ucore_mask);
 }
 
 static void
 nlm_setup_interfaces(struct nlm_xlpnae_softc *sc)
 {
 	uint64_t nae_base;
 	uint32_t cur_slot, cur_slot_base;
 	uint32_t cur_flow_base, port, flow_mask;
 	int max_channels;
 	int i, context;
 
 	cur_slot = 0;
 	cur_slot_base = 0;
 	cur_flow_base = 0;
 	nae_base = sc->base;
 	flow_mask = nlm_get_flow_mask(sc->total_num_ports);
 	/* calculate max_channels */
 	max_channels = xlpnae_get_maxchannels(sc);
 
 	port = 0;
 	context = 0;
 	for (i = 0; i < sc->max_ports; i++) {
 		if (sc->portcfg[i].type == UNKNOWN)
 			continue;
 		nlm_setup_interface(sc, sc->portcfg[i].block, i, cur_flow_base,
 		    flow_mask, max_channels, context);
 		cur_flow_base += sc->per_port_num_flows;
 		context += sc->portcfg[i].num_channels;
 	}
 }
 
 static void
 nlm_xlpnae_init(int node, struct nlm_xlpnae_softc *sc)
 {
 	uint64_t nae_base;
 	uint32_t ucoremask = 0;
 	uint32_t val;
 	int i;
 
 	nae_base = sc->base;
 
 	nlm_nae_flush_free_fifo(nae_base, sc->nblocks);
 	nlm_deflate_frin_fifo_carving(nae_base, sc->max_ports);
 	nlm_reset_nae(node);
 
 	for (i = 0; i < sc->nucores; i++)	/* XXXJC: code repeated below */
 		ucoremask |= (0x1 << i);
 	printf("Loading 0x%x ucores with microcode\n", ucoremask);
 	nlm_ucore_load_all(nae_base, ucoremask, 1);
 
 	val = nlm_set_device_frequency(node, DFS_DEVICE_NAE, sc->freq);
 	printf("Setup NAE frequency to %dMHz\n", val);
 
 	nlm_mdio_reset_all(nae_base);
 
 	printf("Initialze SGMII PCS for blocks 0x%x\n", sc->sgmiimask);
 	nlm_sgmii_pcs_init(nae_base, sc->sgmiimask);
 
 	printf("Initialze XAUI PCS for blocks 0x%x\n", sc->xauimask);
 	nlm_xaui_pcs_init(nae_base, sc->xauimask);
 
 	/* clear NETIOR soft reset */
 	nlm_write_nae_reg(nae_base, NAE_LANE_CFG_SOFTRESET, 0x0);
 
 	/* Disable RX enable bit in RX_CONFIG */
 	val = nlm_read_nae_reg(nae_base, NAE_RX_CONFIG);
 	val &= 0xfffffffe;
 	nlm_write_nae_reg(nae_base, NAE_RX_CONFIG, val);
 
 	if (nlm_is_xlp8xx_ax() == 0) {
 		val = nlm_read_nae_reg(nae_base, NAE_TX_CONFIG);
 		val &= ~(1 << 3);
 		nlm_write_nae_reg(nae_base, NAE_TX_CONFIG, val);
 	}
 
 	nlm_setup_poe_class_config(nae_base, MAX_POE_CLASSES,
 	    sc->ncontexts, poe_cl_tbl);
 
 	nlm_setup_vfbid_mapping(nae_base);
 
 	nlm_setup_flow_crc_poly(nae_base, sc->flow_crc_poly);
 
 	nlm_setup_rx_cal_cfg(nae_base, sc->max_ports, sc->portcfg);
 	/* note: xlp8xx Ax does not have Tx Calendering */
 	if (!nlm_is_xlp8xx_ax())
 		nlm_setup_tx_cal_cfg(nae_base, sc->max_ports, sc->portcfg);
 
 	nlm_setup_interfaces(sc);
 	nlm_config_poe(sc->poe_base, sc->poedv_base);
 
 	if (sc->hw_parser_en)
 		nlm_enable_hardware_parser(nae_base);
 
 	if (sc->prepad_en)
 		nlm_prepad_enable(nae_base, sc->prepad_size);
 
 	if (sc->ieee_1588_en)
 		nlm_setup_1588_timer(sc->base, sc->portcfg);
 }
 
 static void
 nlm_xlpnae_update_pde(void *dummy __unused)
 {
 	struct nlm_xlpnae_softc *sc;
 	uint32_t dv[NUM_WORDS_PER_DV];
 	device_t dev;
 	int vec;
 
 	dev = devclass_get_device(devclass_find("xlpnae"), 0);
 	sc = device_get_softc(dev);
 
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 0);
 	for (vec = 0; vec < NUM_DIST_VEC; vec++) {
 		if (nlm_get_poe_distvec(vec, dv) != 0)
 			continue;
 
 		nlm_write_poe_distvec(sc->poedv_base, vec, dv);
 	}
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 1);
 }
 
 SYSINIT(nlm_xlpnae_update_pde, SI_SUB_SMP, SI_ORDER_ANY,
     nlm_xlpnae_update_pde, NULL);
 
 /* configuration common for sgmii, xaui, ilaken goes here */
 static void
 nlm_setup_portcfg(struct nlm_xlpnae_softc *sc, struct xlp_nae_ivars *naep,
     int block, int port)
 {
 	int i;
 	uint32_t ucore_mask = 0;
 	struct xlp_block_ivars *bp;
 	struct xlp_port_ivars *p;
 
 	bp = &(naep->block_ivars[block]);
 	p  = &(bp->port_ivars[port & 0x3]);
 
 	sc->portcfg[port].node = p->node;
 	sc->portcfg[port].block = p->block;
 	sc->portcfg[port].port = p->port;
 	sc->portcfg[port].type = p->type;
 	sc->portcfg[port].mdio_bus = p->mdio_bus;
 	sc->portcfg[port].phy_addr = p->phy_addr;
 	sc->portcfg[port].loopback_mode = p->loopback_mode;
 	sc->portcfg[port].num_channels = p->num_channels;
 	if (p->free_desc_sizes != MCLBYTES) {
 		printf("[%d, %d] Error: free_desc_sizes %d != %d\n",
 		    block, port, p->free_desc_sizes, MCLBYTES);
 		return;
 	}
 	sc->portcfg[port].free_desc_sizes = p->free_desc_sizes;
 	for (i = 0; i < sc->nucores; i++)	/* XXXJC: configure this */
 		ucore_mask |= (0x1 << i);
 	sc->portcfg[port].ucore_mask = ucore_mask;
 	sc->portcfg[port].vlan_pri_en = p->vlan_pri_en;
 	sc->portcfg[port].num_free_descs = p->num_free_descs;
 	sc->portcfg[port].iface_fifo_size = p->iface_fifo_size;
 	sc->portcfg[port].rxbuf_size = p->rxbuf_size;
 	sc->portcfg[port].rx_slots_reqd = p->rx_slots_reqd;
 	sc->portcfg[port].tx_slots_reqd = p->tx_slots_reqd;
 	sc->portcfg[port].pseq_fifo_size = p->pseq_fifo_size;
 
 	sc->portcfg[port].stg2_fifo_size = p->stg2_fifo_size;
 	sc->portcfg[port].eh_fifo_size = p->eh_fifo_size;
 	sc->portcfg[port].frout_fifo_size = p->frout_fifo_size;
 	sc->portcfg[port].ms_fifo_size = p->ms_fifo_size;
 	sc->portcfg[port].pkt_fifo_size = p->pkt_fifo_size;
 	sc->portcfg[port].pktlen_fifo_size = p->pktlen_fifo_size;
 	sc->portcfg[port].max_stg2_offset = p->max_stg2_offset;
 	sc->portcfg[port].max_eh_offset = p->max_eh_offset;
 	sc->portcfg[port].max_frout_offset = p->max_frout_offset;
 	sc->portcfg[port].max_ms_offset = p->max_ms_offset;
 	sc->portcfg[port].max_pmem_offset = p->max_pmem_offset;
 	sc->portcfg[port].stg1_2_credit = p->stg1_2_credit;
 	sc->portcfg[port].stg2_eh_credit = p->stg2_eh_credit;
 	sc->portcfg[port].stg2_frout_credit = p->stg2_frout_credit;
 	sc->portcfg[port].stg2_ms_credit = p->stg2_ms_credit;
 	sc->portcfg[port].ieee1588_inc_intg = p->ieee1588_inc_intg;
 	sc->portcfg[port].ieee1588_inc_den = p->ieee1588_inc_den;
 	sc->portcfg[port].ieee1588_inc_num = p->ieee1588_inc_num;
 	sc->portcfg[port].ieee1588_userval = p->ieee1588_userval;
 	sc->portcfg[port].ieee1588_ptpoff = p->ieee1588_ptpoff;
 	sc->portcfg[port].ieee1588_tmr1 = p->ieee1588_tmr1;
 	sc->portcfg[port].ieee1588_tmr2 = p->ieee1588_tmr2;
 	sc->portcfg[port].ieee1588_tmr3 = p->ieee1588_tmr3;
 
 	sc->total_free_desc += sc->portcfg[port].free_desc_sizes;
 	sc->total_num_ports++;
 }
 
 static int
 nlm_xlpnae_attach(device_t dev)
 {
 	struct xlp_nae_ivars	*nae_ivars;
 	struct nlm_xlpnae_softc *sc;
 	device_t tmpd;
 	uint32_t dv[NUM_WORDS_PER_DV];
 	int port, i, j, nchan, nblock, node, qstart, qnum;
 	int offset, context, txq_base, rxvcbase;
 	uint64_t poe_pcibase, nae_pcibase;
 
 	node = pci_get_slot(dev) / 8;
 	nae_ivars = &xlp_board_info.nodes[node].nae_ivars;
 
 	sc = device_get_softc(dev);
 	sc->xlpnae_dev = dev;
 	sc->node = nae_ivars->node;
 	sc->base = nlm_get_nae_regbase(sc->node);
 	sc->poe_base = nlm_get_poe_regbase(sc->node);
 	sc->poedv_base = nlm_get_poedv_regbase(sc->node);
 	sc->portcfg = nae_port_config;
 	sc->blockmask = nae_ivars->blockmask;
 	sc->ilmask = nae_ivars->ilmask;
 	sc->xauimask = nae_ivars->xauimask;
 	sc->sgmiimask = nae_ivars->sgmiimask;
 	sc->nblocks = nae_ivars->nblocks;
 	sc->freq = nae_ivars->freq;
 
 	/* flow table generation is done by CRC16 polynomial */
 	sc->flow_crc_poly = nae_ivars->flow_crc_poly;
 
 	sc->hw_parser_en = nae_ivars->hw_parser_en;
 	sc->prepad_en = nae_ivars->prepad_en;
 	sc->prepad_size = nae_ivars->prepad_size;
 	sc->ieee_1588_en = nae_ivars->ieee_1588_en;
 
 	nae_pcibase = nlm_get_nae_pcibase(sc->node);
 	sc->ncontexts = nlm_read_reg(nae_pcibase, XLP_PCI_DEVINFO_REG5);
 	sc->nucores = nlm_num_uengines(nae_pcibase);
 
 	for (nblock = 0; nblock < sc->nblocks; nblock++) {
 		sc->cmplx_type[nblock] = nae_ivars->block_ivars[nblock].type;
 		sc->portmask[nblock] = nae_ivars->block_ivars[nblock].portmask;
 	}
 
 	for (i = 0; i < sc->ncontexts; i++)
 		cntx2port[i] = 18;	/* 18 is an invalid port */
 
 	if (sc->nblocks == 5)
 		sc->max_ports = 18;	/* 8xx has a block 4 with 2 ports */
 	else
 		sc->max_ports = sc->nblocks * PORTS_PER_CMPLX;
 
 	for (i = 0; i < sc->max_ports; i++)
 		sc->portcfg[i].type = UNKNOWN; /* Port Not Present */
 	/*
 	 * Now setup all internal fifo carvings based on
 	 * total number of ports in the system
 	 */
 	sc->total_free_desc = 0;
 	sc->total_num_ports = 0;
 	port = 0;
 	context = 0;
 	txq_base = nlm_qidstart(nae_pcibase);
 	rxvcbase = txq_base + sc->ncontexts;
 	for (i = 0; i < sc->nblocks; i++) {
 		uint32_t portmask;
 
 		if ((nae_ivars->blockmask & (1 << i)) == 0) {
 			port += 4;
 			continue;
 		}
 		portmask = nae_ivars->block_ivars[i].portmask;
 		for (j = 0; j < PORTS_PER_CMPLX; j++, port++) {
 			if ((portmask & (1 << j)) == 0)
 				continue;
 			nlm_setup_portcfg(sc, nae_ivars, i, port);
 			nchan = sc->portcfg[port].num_channels;
 			for (offset = 0; offset < nchan; offset++)
 				cntx2port[context + offset] = port;
 			sc->portcfg[port].txq = txq_base + context;
 			sc->portcfg[port].rxfreeq = rxvcbase + port;
 			context += nchan;
 		}
 	}
 
 	poe_pcibase = nlm_get_poe_pcibase(sc->node);
 	sc->per_port_num_flows =
 	    nlm_poe_max_flows(poe_pcibase) / sc->total_num_ports;
 
 	/* zone for P2P descriptors */
 	nl_tx_desc_zone = uma_zcreate("NL Tx Desc",
 	    sizeof(struct xlpge_tx_desc), NULL, NULL, NULL, NULL,
 	    NAE_CACHELINE_SIZE, 0);
 
 	/* NAE FMN messages have CMS src station id's in the
 	 * range of qstart to qnum.
 	 */
 	qstart = nlm_qidstart(nae_pcibase);
 	qnum = nlm_qnum(nae_pcibase);
 	if (register_msgring_handler(qstart, qstart + qnum - 1,
 	    nlm_xlpge_msgring_handler, sc)) {
 		panic("Couldn't register NAE msgring handler\n");
 	}
 
 	/* POE FMN messages have CMS src station id's in the
 	 * range of qstart to qnum.
 	 */
 	qstart = nlm_qidstart(poe_pcibase);
 	qnum = nlm_qnum(poe_pcibase);
 	if (register_msgring_handler(qstart, qstart + qnum - 1,
 	    nlm_xlpge_msgring_handler, sc)) {
 		panic("Couldn't register POE msgring handler\n");
 	}
 
 	nlm_xlpnae_init(node, sc);
 
 	for (i = 0; i < sc->max_ports; i++) {
 		char desc[32];
 		int block, port;
 
 		if (sc->portcfg[i].type == UNKNOWN)
 			continue;
 		block = sc->portcfg[i].block;
 		port = sc->portcfg[i].port;
 		tmpd = device_add_child(dev, "xlpge", i);
 		device_set_ivars(tmpd,
 		    &(nae_ivars->block_ivars[block].port_ivars[port]));
 		sprintf(desc, "XLP NAE Port %d,%d", block, port);
 		device_set_desc_copy(tmpd, desc);
 	}
 	nlm_setup_iface_fifo_cfg(sc->base, sc->max_ports, sc->portcfg);
 	nlm_setup_rx_base_config(sc->base, sc->max_ports, sc->portcfg);
 	nlm_setup_rx_buf_config(sc->base, sc->max_ports, sc->portcfg);
 	nlm_setup_freein_fifo_cfg(sc->base, sc->portcfg);
 	nlm_program_nae_parser_seq_fifo(sc->base, sc->max_ports, sc->portcfg);
 
 	nlm_xlpnae_print_frin_desc_carving(sc);
 	bus_generic_probe(dev);
 	bus_generic_attach(dev);
 
 	/*
 	 * Enable only boot cpu at this point, full distribution comes
 	 * only after SMP is started
 	 */
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 0);
 	nlm_calc_poe_distvec(0x1, 0, 0, 0, 0x1 << XLPGE_RX_VC, dv);
 	nlm_write_poe_distvec(sc->poedv_base, 0, dv);
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 1);
 
 	return (0);
 }
 
 static int
 nlm_xlpnae_detach(device_t dev)
 {
 	/*  TODO - free zone here */
 	return (0);
 }
 
 static int
 nlm_xlpnae_suspend(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpnae_resume(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpnae_shutdown(device_t dev)
 {
 	return (0);
 }
 
 /*
  * xlpge driver implementation
  */
 
 static void
 nlm_xlpge_mac_set_rx_mode(struct nlm_xlpge_softc *sc)
 {
 	if (sc->if_flags & IFF_PROMISC) {
 		if (sc->type == SGMIIC)
 			nlm_nae_setup_rx_mode_sgmii(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 1 /* promisc */);
 		else
 			nlm_nae_setup_rx_mode_xaui(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 1 /* promisc */);
 	} else {
 		if (sc->type == SGMIIC)
 			nlm_nae_setup_rx_mode_sgmii(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 0 /* promisc */);
 		else
 			nlm_nae_setup_rx_mode_xaui(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 0 /* promisc */);
 	}
 }
 
 static int
 nlm_xlpge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct mii_data		*mii;
 	struct nlm_xlpge_softc	*sc;
 	struct ifreq		*ifr;
 	int			error;
 
 	sc = ifp->if_softc;
 	error = 0;
 	ifr = (struct ifreq *)data;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		XLPGE_LOCK(sc);
 		sc->if_flags = ifp->if_flags;
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				nlm_xlpge_init(sc);
 			else
 				nlm_xlpge_port_enable(sc);
 			nlm_xlpge_mac_set_rx_mode(sc);
 			sc->link = NLM_LINK_UP;
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				nlm_xlpge_port_disable(sc);
 			sc->link = NLM_LINK_DOWN;
 		}
 		XLPGE_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		if (sc->mii_bus != NULL) {
 			mii = device_get_softc(sc->mii_bus);
 			error = ifmedia_ioctl(ifp, ifr, &mii->mii_media,
 			    command);
 		}
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static int
 xlpge_tx(struct ifnet *ifp, struct mbuf *mbuf_chain)
 {
 	struct nlm_fmn_msg	msg;
 	struct xlpge_tx_desc	*p2p;
 	struct nlm_xlpge_softc	*sc;
 	struct mbuf	*m;
 	vm_paddr_t      paddr;
 	int		fbid, dst, pos, err;
 	int		ret = 0, tx_msgstatus, retries;
 
 	err = 0;
 	if (mbuf_chain == NULL)
 		return (0);
 
 	sc = ifp->if_softc;
 	p2p = NULL;
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING) ||
 	    ifp->if_drv_flags & IFF_DRV_OACTIVE) {
 		err = ENXIO;
 		goto fail;
 	}
 
 	/* free a few in coming messages on the fb vc */
 	xlp_handle_msg_vc(1 << XLPGE_FB_VC, 2);
 
 	/* vfb id table is setup to map cpu to vc 3 of the cpu */
 	fbid = nlm_cpuid();
 	dst = sc->txq;
 
 	pos = 0;
 	p2p = uma_zalloc(nl_tx_desc_zone, M_NOWAIT);
 	if (p2p == NULL) {
 		printf("alloc fail\n");
 		err = ENOBUFS;
 		goto fail;
 	}
 
 	for (m = mbuf_chain; m != NULL; m = m->m_next) {
 		vm_offset_t buf = (vm_offset_t) m->m_data;
 		int	len = m->m_len;
 		int	frag_sz;
 		uint64_t desc;
 
 		/*printf("m_data = %p len %d\n", m->m_data, len); */
 		while (len) {
 			if (pos == XLP_NTXFRAGS - 3) {
 				device_printf(sc->xlpge_dev,
 				    "packet defrag %d\n",
 				    m_length(mbuf_chain, NULL));
 				err = ENOBUFS; /* TODO fix error */
 				goto fail;
 			}
 			paddr = vtophys(buf);
 			frag_sz = PAGE_SIZE - (buf & PAGE_MASK);
 			if (len < frag_sz)
 				frag_sz = len;
 			desc = nae_tx_desc(P2D_NEOP, 0, 127,
 			    frag_sz, paddr);
 			p2p->frag[pos] = htobe64(desc);
 			pos++;
 			len -= frag_sz;
 			buf += frag_sz;
 		}
 	}
 
 	KASSERT(pos != 0, ("Zero-length mbuf chain?\n"));
 
 	/* Make the last one P2D EOP */
 	p2p->frag[pos-1] |= htobe64((uint64_t)P2D_EOP << 62);
 
 	/* stash useful pointers in the desc */
 	p2p->frag[XLP_NTXFRAGS-3] = 0xf00bad;
 	p2p->frag[XLP_NTXFRAGS-2] = (uintptr_t)p2p;
 	p2p->frag[XLP_NTXFRAGS-1] = (uintptr_t)mbuf_chain;
 
 	paddr = vtophys(p2p);
 	msg.msg[0] = nae_tx_desc(P2P, 0, fbid, pos, paddr);
 
 	for (retries = 16;  retries > 0; retries--) {
 		ret = nlm_fmn_msgsend(dst, 1, FMN_SWCODE_NAE, &msg);
 		if (ret == 0)
 			return (0);
 	}
 
 fail:
 	if (ret != 0) {
 		tx_msgstatus = nlm_read_c2_txmsgstatus();
 		if ((tx_msgstatus >> 24) & 0x1)
 			device_printf(sc->xlpge_dev, "Transmit queue full - ");
 		if ((tx_msgstatus >> 3) & 0x1)
 			device_printf(sc->xlpge_dev, "ECC error - ");
 		if ((tx_msgstatus >> 2) & 0x1)
 			device_printf(sc->xlpge_dev, "Pending Sync - ");
 		if ((tx_msgstatus >> 1) & 0x1)
 			device_printf(sc->xlpge_dev,
 			    "Insufficient input queue credits - ");
 		if (tx_msgstatus & 0x1)
 			device_printf(sc->xlpge_dev,
 			    "Insufficient output queue credits - ");
 	}
 	device_printf(sc->xlpge_dev, "Send failed! err = %d\n", err);
 	if (p2p)
 		uma_zfree(nl_tx_desc_zone, p2p);
 	m_freem(mbuf_chain);
 	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	return (err);
 }
 
 
 static int
 nlm_xlpge_gmac_config_speed(struct nlm_xlpge_softc *sc)
 {
 	struct mii_data *mii;
 
 	if (sc->type == XAUIC || sc->type == ILC)
 		return (0);
 
 	if (sc->mii_bus) {
 		mii = device_get_softc(sc->mii_bus);
 		mii_pollstat(mii);
 	}
 
 	return (0);
 }
 
 static void
 nlm_xlpge_port_disable(struct nlm_xlpge_softc *sc)
 {
 	struct ifnet   *ifp;
 
 	ifp = sc->xlpge_if;
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	callout_stop(&sc->xlpge_callout);
 	nlm_mac_disable(sc->base_addr, sc->block, sc->type, sc->port);
 }
 
 static void
 nlm_mii_pollstat(void *arg)
 {
 	struct nlm_xlpge_softc *sc = (struct nlm_xlpge_softc *)arg;
 	struct mii_data *mii = NULL;
 
 	if (sc->mii_bus) {
 		mii = device_get_softc(sc->mii_bus);
 
 		KASSERT(mii != NULL, ("mii ptr is NULL"));
 
 		mii_pollstat(mii);
 
 		callout_reset(&sc->xlpge_callout, hz,
 		    nlm_mii_pollstat, sc);
 	}
 }
 
 static void
 nlm_xlpge_port_enable(struct nlm_xlpge_softc *sc)
 {
 	if ((sc->type != SGMIIC) && (sc->type != XAUIC))
 		return;
 	nlm_mac_enable(sc->base_addr, sc->block, sc->type, sc->port);
 	nlm_mii_pollstat((void *)sc);
 }
 
 static void
 nlm_xlpge_init(void *addr)
 {
 	struct nlm_xlpge_softc *sc;
 	struct ifnet   *ifp;
 	struct mii_data *mii = NULL;
 
 	sc = (struct nlm_xlpge_softc *)addr;
 	ifp = sc->xlpge_if;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	if (sc->mii_bus) {
 		mii = device_get_softc(sc->mii_bus);
 		mii_mediachg(mii);
 	}
 
 	nlm_xlpge_gmac_config_speed(sc);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	nlm_xlpge_port_enable(sc);
 
 	/* start the callout */
 	callout_reset(&sc->xlpge_callout, hz, nlm_mii_pollstat, sc);
 }
 
 /*
  * Read the MAC address from FDT or board eeprom.
  */
 static void
 xlpge_read_mac_addr(struct nlm_xlpge_softc *sc)
 {
 
 	xlpge_get_macaddr(sc->dev_addr);
 	/* last octet is port specific */
 	sc->dev_addr[5] += (sc->block * 4) + sc->port;
 
 	if (sc->type == SGMIIC)
 		nlm_nae_setup_mac_addr_sgmii(sc->base_addr, sc->block,
 		    sc->port, sc->type, sc->dev_addr);
 	else if (sc->type == XAUIC)
 		nlm_nae_setup_mac_addr_xaui(sc->base_addr, sc->block,
 		    sc->port, sc->type, sc->dev_addr);
 }
 
 
 static int
 xlpge_mediachange(struct ifnet *ifp)
 {
 	return (0);
 }
 
 static void
 xlpge_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct nlm_xlpge_softc *sc;
 	struct mii_data *md;
 
 	md = NULL;
 	sc = ifp->if_softc;
 
 	if (sc->mii_bus)
 		md = device_get_softc(sc->mii_bus);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (sc->link == NLM_LINK_DOWN)
 		return;
 
 	if (md != NULL)
 		ifmr->ifm_active = md->mii_media.ifm_cur->ifm_media;
 	ifmr->ifm_status |= IFM_ACTIVE;
 }
 
 static int
 nlm_xlpge_ifinit(struct nlm_xlpge_softc *sc)
 {
 	struct ifnet *ifp;
 	device_t dev;
 	int port = sc->block * 4 + sc->port;
 
 	dev = sc->xlpge_dev;
 	ifp = sc->xlpge_if = if_alloc(IFT_ETHER);
 	/*(sc->network_sc)->ifp_ports[port].xlpge_if = ifp;*/
 	ifp_ports[port].xlpge_if = ifp;
 
 	if (ifp == NULL) {
 		device_printf(dev, "cannot if_alloc()\n");
 		return (ENOSPC);
 	}
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
-	    IFF_NEEDSEPOCH;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	sc->if_flags = ifp->if_flags;
 	/*ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_VLAN_HWTAGGING;*/
 	ifp->if_capabilities = 0;
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_ioctl = nlm_xlpge_ioctl;
 	ifp->if_init  = nlm_xlpge_init ;
 	ifp->if_hwassist = 0;
 	ifp->if_snd.ifq_drv_maxlen = NLM_XLPGE_TXQ_SIZE; /* TODO: make this a sysint */
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
 	IFQ_SET_READY(&ifp->if_snd);
 
 	ifmedia_init(&sc->xlpge_mii.mii_media, 0, xlpge_mediachange,
 	    xlpge_mediastatus);
 	ifmedia_add(&sc->xlpge_mii.mii_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->xlpge_mii.mii_media, IFM_ETHER | IFM_AUTO);
 	sc->xlpge_mii.mii_media.ifm_media =
 	    sc->xlpge_mii.mii_media.ifm_cur->ifm_media;
 	xlpge_read_mac_addr(sc);
 
 	ether_ifattach(ifp, sc->dev_addr);
 
 	/* override if_transmit : per ifnet(9), do it after if_attach */
 	ifp->if_transmit = xlpge_tx;
 
 	return (0);
 }
 
 static int
 nlm_xlpge_probe(device_t dev)
 {
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void *
 get_buf(void)
 {
 	struct mbuf     *m_new;
 	uint64_t        *md;
 #ifdef INVARIANTS
 	vm_paddr_t      temp1, temp2;
 #endif
 
 	if ((m_new = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR)) == NULL)
 		return (NULL);
 	m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
 	KASSERT(((uintptr_t)m_new->m_data & (NAE_CACHELINE_SIZE - 1)) == 0,
 	    ("m_new->m_data is not cacheline aligned"));
 	md = (uint64_t *)m_new->m_data;
 	md[0] = (intptr_t)m_new;        /* Back Ptr */
 	md[1] = 0xf00bad;
 	m_adj(m_new, NAE_CACHELINE_SIZE);
 
 #ifdef INVARIANTS
 	temp1 = vtophys((vm_offset_t) m_new->m_data);
 	temp2 = vtophys((vm_offset_t) m_new->m_data + 1536);
 	KASSERT((temp1 + 1536) == temp2,
 	    ("Alloced buffer is not contiguous"));
 #endif
 	return ((void *)m_new->m_data);
 }
 
 static void
 nlm_xlpge_mii_init(device_t dev, struct nlm_xlpge_softc *sc)
 {
 	int error;
 
 	error = mii_attach(dev, &sc->mii_bus, sc->xlpge_if,
 			xlpge_mediachange, xlpge_mediastatus,
 			BMSR_DEFCAPMASK, sc->phy_addr, MII_OFFSET_ANY, 0);
 
 	if (error) {
 		device_printf(dev, "attaching PHYs failed\n");
 		sc->mii_bus = NULL;
 	}
 
 	if (sc->mii_bus != NULL) {
 		/* enable MDIO interrupts in the PHY */
 		/* XXXJC: TODO */
 	}
 }
 
 static int
 xlpge_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct nlm_xlpge_softc *sc;
 	uint32_t val;
 	int reg, field;
 
 	sc = arg1;
 	field = arg2;
 	reg = SGMII_STATS_MLR(sc->block, sc->port) + field;
 	val = nlm_read_nae_reg(sc->base_addr, reg);
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
 static void
 nlm_xlpge_setup_stats_sysctl(device_t dev, struct nlm_xlpge_softc *sc)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid_list *child;
 	struct sysctl_oid *tree;
 
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 #define XLPGE_STAT(name, offset, desc) \
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, name,	\
 	    CTLTYPE_UINT | CTLFLAG_RD, sc, offset,	\
 	    xlpge_stats_sysctl, "IU", desc)
 
 	XLPGE_STAT("tr127", nlm_sgmii_stats_tr127, "TxRx 64 - 127 Bytes");
 	XLPGE_STAT("tr255", nlm_sgmii_stats_tr255, "TxRx 128 - 255 Bytes");
 	XLPGE_STAT("tr511", nlm_sgmii_stats_tr511, "TxRx 256 - 511 Bytes");
 	XLPGE_STAT("tr1k",  nlm_sgmii_stats_tr1k,  "TxRx 512 - 1023 Bytes");
 	XLPGE_STAT("trmax", nlm_sgmii_stats_trmax, "TxRx 1024 - 1518 Bytes");
 	XLPGE_STAT("trmgv", nlm_sgmii_stats_trmgv, "TxRx 1519 - 1522 Bytes");
 
 	XLPGE_STAT("rbyt", nlm_sgmii_stats_rbyt, "Rx Bytes");
 	XLPGE_STAT("rpkt", nlm_sgmii_stats_rpkt, "Rx Packets");
 	XLPGE_STAT("rfcs", nlm_sgmii_stats_rfcs, "Rx FCS Error");
 	XLPGE_STAT("rmca", nlm_sgmii_stats_rmca, "Rx Multicast Packets");
 	XLPGE_STAT("rbca", nlm_sgmii_stats_rbca, "Rx Broadcast Packets");
 	XLPGE_STAT("rxcf", nlm_sgmii_stats_rxcf, "Rx Control Frames");
 	XLPGE_STAT("rxpf", nlm_sgmii_stats_rxpf, "Rx Pause Frames");
 	XLPGE_STAT("rxuo", nlm_sgmii_stats_rxuo, "Rx Unknown Opcode");
 	XLPGE_STAT("raln", nlm_sgmii_stats_raln, "Rx Alignment Errors");
 	XLPGE_STAT("rflr", nlm_sgmii_stats_rflr, "Rx Framelength Errors");
 	XLPGE_STAT("rcde", nlm_sgmii_stats_rcde, "Rx Code Errors");
 	XLPGE_STAT("rcse", nlm_sgmii_stats_rcse, "Rx Carrier Sense Errors");
 	XLPGE_STAT("rund", nlm_sgmii_stats_rund, "Rx Undersize Packet Errors");
 	XLPGE_STAT("rovr", nlm_sgmii_stats_rovr, "Rx Oversize Packet Errors");
 	XLPGE_STAT("rfrg", nlm_sgmii_stats_rfrg, "Rx Fragments");
 	XLPGE_STAT("rjbr", nlm_sgmii_stats_rjbr, "Rx Jabber");
 
 	XLPGE_STAT("tbyt", nlm_sgmii_stats_tbyt, "Tx Bytes");
 	XLPGE_STAT("tpkt", nlm_sgmii_stats_tpkt, "Tx Packets");
 	XLPGE_STAT("tmca", nlm_sgmii_stats_tmca, "Tx Multicast Packets");
 	XLPGE_STAT("tbca", nlm_sgmii_stats_tbca, "Tx Broadcast Packets");
 	XLPGE_STAT("txpf", nlm_sgmii_stats_txpf, "Tx Pause Frame");
 	XLPGE_STAT("tdfr", nlm_sgmii_stats_tdfr, "Tx Deferral Packets");
 	XLPGE_STAT("tedf", nlm_sgmii_stats_tedf, "Tx Excessive Deferral Pkts");
 	XLPGE_STAT("tscl", nlm_sgmii_stats_tscl, "Tx Single Collisions");
 	XLPGE_STAT("tmcl", nlm_sgmii_stats_tmcl, "Tx Multiple Collisions");
 	XLPGE_STAT("tlcl", nlm_sgmii_stats_tlcl, "Tx Late Collision Pkts");
 	XLPGE_STAT("txcl", nlm_sgmii_stats_txcl, "Tx Excessive Collisions");
 	XLPGE_STAT("tncl", nlm_sgmii_stats_tncl, "Tx Total Collisions");
 	XLPGE_STAT("tjbr", nlm_sgmii_stats_tjbr, "Tx Jabber Frames");
 	XLPGE_STAT("tfcs", nlm_sgmii_stats_tfcs, "Tx FCS Errors");
 	XLPGE_STAT("txcf", nlm_sgmii_stats_txcf, "Tx Control Frames");
 	XLPGE_STAT("tovr", nlm_sgmii_stats_tovr, "Tx Oversize Frames");
 	XLPGE_STAT("tund", nlm_sgmii_stats_tund, "Tx Undersize Frames");
 	XLPGE_STAT("tfrg", nlm_sgmii_stats_tfrg, "Tx Fragments");
 #undef XLPGE_STAT
 }
 
 static int
 nlm_xlpge_attach(device_t dev)
 {
 	struct xlp_port_ivars *pv;
 	struct nlm_xlpge_softc *sc;
 	int port;
 
 	pv = device_get_ivars(dev);
 	sc = device_get_softc(dev);
 	sc->xlpge_dev = dev;
 	sc->mii_bus = NULL;
 	sc->block = pv->block;
 	sc->node = pv->node;
 	sc->port = pv->port;
 	sc->type = pv->type;
 	sc->xlpge_if = NULL;
 	sc->phy_addr = pv->phy_addr;
 	sc->mdio_bus = pv->mdio_bus;
 	sc->portcfg = nae_port_config;
 	sc->hw_parser_en = pv->hw_parser_en;
 
 	/* default settings */
 	sc->speed = NLM_SGMII_SPEED_10;
 	sc->duplexity = NLM_SGMII_DUPLEX_FULL;
 	sc->link = NLM_LINK_DOWN;
 	sc->flowctrl = NLM_FLOWCTRL_DISABLED;
 
 	sc->network_sc = device_get_softc(device_get_parent(dev));
 	sc->base_addr = sc->network_sc->base;
 	sc->prepad_en = sc->network_sc->prepad_en;
 	sc->prepad_size = sc->network_sc->prepad_size;
 
 	callout_init(&sc->xlpge_callout, 1);
 
 	XLPGE_LOCK_INIT(sc, device_get_nameunit(dev));
 
 	port = (sc->block*4)+sc->port;
 	sc->nfree_desc = nae_port_config[port].num_free_descs;
 	sc->txq = nae_port_config[port].txq;
 	sc->rxfreeq = nae_port_config[port].rxfreeq;
 
 	nlm_xlpge_submit_rx_free_desc(sc, sc->nfree_desc);
 	if (sc->hw_parser_en)
 		nlm_enable_hardware_parser_per_port(sc->base_addr,
 		    sc->block, sc->port);
 
 	nlm_xlpge_ifinit(sc);
 	ifp_ports[port].xlpge_sc = sc;
 	nlm_xlpge_mii_init(dev, sc);
 
 	nlm_xlpge_setup_stats_sysctl(dev, sc);
 
 	return (0);
 }
 
 static int
 nlm_xlpge_detach(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpge_suspend(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpge_resume(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpge_shutdown(device_t dev)
 {
 	return (0);
 }
 
 /*
  * miibus function with custom implementation
  */
 static int
 nlm_xlpge_mii_read(device_t dev, int phyaddr, int regidx)
 {
 	struct nlm_xlpge_softc *sc;
 	int val;
 
 	sc = device_get_softc(dev);
 	if (sc->type == SGMIIC)
 		val = nlm_gmac_mdio_read(sc->base_addr, sc->mdio_bus,
 		    BLOCK_7, LANE_CFG, phyaddr, regidx);
 	else
 		val = 0xffff;
 
 	return (val);
 }
 
 static int
 nlm_xlpge_mii_write(device_t dev, int phyaddr, int regidx, int val)
 {
 	struct nlm_xlpge_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (sc->type == SGMIIC)
 		nlm_gmac_mdio_write(sc->base_addr, sc->mdio_bus, BLOCK_7,
 		    LANE_CFG, phyaddr, regidx, val);
 
 	return (0);
 }
 
 static void
 nlm_xlpge_mii_statchg(device_t dev)
 {
 	struct nlm_xlpge_softc *sc;
 	struct mii_data *mii;
 	char *speed, *duplexity;
 
 	sc = device_get_softc(dev);
 	if (sc->mii_bus == NULL)
 		return;
 
 	mii = device_get_softc(sc->mii_bus);
 	if (mii->mii_media_status & IFM_ACTIVE) {
 		if (IFM_SUBTYPE(mii->mii_media_active) ==  IFM_10_T) {
 			sc->speed = NLM_SGMII_SPEED_10;
 			speed =  "10Mbps";
 		} else if (IFM_SUBTYPE(mii->mii_media_active) == IFM_100_TX) {
 			sc->speed = NLM_SGMII_SPEED_100;
 			speed = "100Mbps";
 		} else { /* default to 1G */
 			sc->speed = NLM_SGMII_SPEED_1000;
 			speed =  "1Gbps";
 		}
 
 		if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) {
 			sc->duplexity = NLM_SGMII_DUPLEX_FULL;
 			duplexity =  "full";
 		} else {
 			sc->duplexity = NLM_SGMII_DUPLEX_HALF;
 			duplexity = "half";
 		}
 
 		printf("Port [%d, %d] setup with speed=%s duplex=%s\n",
 		    sc->block, sc->port, speed, duplexity);
 
 		nlm_nae_setup_mac(sc->base_addr, sc->block, sc->port, 0, 1, 1,
 		    sc->speed, sc->duplexity);
 	}
 }
 
 /*
  * xlpge support function implementations
  */
 static void
 nlm_xlpge_release_mbuf(uint64_t paddr)
 {
 	uint64_t	mag, desc, mbuf;
 
 	paddr += (XLP_NTXFRAGS - 3) * sizeof(uint64_t);
 	mag = nlm_paddr_ld(paddr);
 	desc = nlm_paddr_ld(paddr + sizeof(uint64_t));
 	mbuf = nlm_paddr_ld(paddr + 2 * sizeof(uint64_t));
 
 	if (mag != 0xf00bad) {
 		/* somebody else packet Error - FIXME in intialization */
 		printf("cpu %d: ERR Tx packet paddr %jx, mag %jx, desc %jx mbuf %jx\n",
 		    nlm_cpuid(), (uintmax_t)paddr, (uintmax_t)mag,
 		    (intmax_t)desc, (uintmax_t)mbuf);
 		return;
 	}
 	m_freem((struct mbuf *)(uintptr_t)mbuf);
 	uma_zfree(nl_tx_desc_zone, (void *)(uintptr_t)desc);
 }
 
 static void
 nlm_xlpge_rx(struct nlm_xlpge_softc *sc, int port, vm_paddr_t paddr, int len)
 {
 	struct ifnet	*ifp;
 	struct mbuf	*m;
 	vm_offset_t	temp;
 	unsigned long	mag;
 	int		prepad_size;
 
 	ifp = sc->xlpge_if;
 	temp = nlm_paddr_ld(paddr - NAE_CACHELINE_SIZE);
 	mag = nlm_paddr_ld(paddr - NAE_CACHELINE_SIZE + sizeof(uint64_t));
 
 	m = (struct mbuf *)(intptr_t)temp;
 	if (mag != 0xf00bad) {
 		/* somebody else packet Error - FIXME in intialization */
 		printf("cpu %d: ERR Rx packet paddr %jx, temp %p, mag %lx\n",
 		    nlm_cpuid(), (uintmax_t)paddr, (void *)temp, mag);
 		return;
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 
 #ifdef DUMP_PACKET
 	{
 		int     i = 0, j = 64;
 		unsigned char *buf = (char *)m->m_data;
 		printf("(cpu_%d: nlge_rx, !RX_COPY) Rx Packet: length=%d\n",
 				nlm_cpuid(), len);
 		if (len < j)
 			j = len;
 		if (sc->prepad_en)
 			j += ((sc->prepad_size + 1) * 16);
 		for (i = 0; i < j; i++) {
 			if (i && (i % 16) == 0)
 				printf("\n");
 			printf("%02x ", buf[i]);
 		}
 		printf("\n");
 	}
 #endif
 
 	if (sc->prepad_en) {
 		prepad_size = ((sc->prepad_size + 1) * 16);
 		m->m_data += prepad_size;
 		m->m_pkthdr.len = m->m_len = (len - prepad_size);
 	} else
 		m->m_pkthdr.len = m->m_len = len;
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 #ifdef XLP_DRIVER_LOOPBACK
 	if (port == 16 || port == 17)
 		(*ifp->if_input)(ifp, m);
 	else
 		xlpge_tx(ifp, m);
 #else
 	(*ifp->if_input)(ifp, m);
 #endif
 }
 
 void
 nlm_xlpge_submit_rx_free_desc(struct nlm_xlpge_softc *sc, int num)
 {
 	int i, size, ret, n;
 	struct nlm_fmn_msg msg;
 	void *ptr;
 
 	for(i = 0; i < num; i++) {
 		memset(&msg, 0, sizeof(msg));
 		ptr = get_buf();
 		if (!ptr) {
 			device_printf(sc->xlpge_dev, "Cannot allocate mbuf\n");
 			break;
 		}
 
 		msg.msg[0] = vtophys(ptr);
 		if (msg.msg[0] == 0) {
 			printf("Bad ptr for %p\n", ptr);
 			break;
 		}
 		size = 1;
 
 		n = 0;
 		while (1) {
 			/* on success returns 1, else 0 */
 			ret = nlm_fmn_msgsend(sc->rxfreeq, size, 0, &msg);
 			if (ret == 0)
 				break;
 			if (n++ > 10000) {
 				printf("Too many credit fails for send free desc\n");
 				break;
 			}
 		}
 	}
 }
 
 void
 nlm_xlpge_msgring_handler(int vc, int size, int code, int src_id,
     struct nlm_fmn_msg *msg, void *data)
 {
 	uint64_t phys_addr;
 	struct nlm_xlpnae_softc *sc;
 	struct nlm_xlpge_softc *xlpge_sc;
 	struct ifnet *ifp;
 	uint32_t context;
 	uint32_t port = 0;
 	uint32_t length;
 
 	sc = (struct nlm_xlpnae_softc *)data;
 	KASSERT(sc != NULL, ("Null sc in msgring handler"));
 
 	if (size == 1) { /* process transmit complete */
 		phys_addr = msg->msg[0] & 0xffffffffffULL;
 
 		/* context is SGMII_RCV_CONTEXT_NUM + three bit vlan type
 		 * or vlan priority
 		 */
 		context = (msg->msg[0] >> 40) & 0x3fff;
 		port = cntx2port[context];
 
 		if (port >= XLP_MAX_PORTS) {
 			printf("%s:%d Bad port %d (context=%d)\n",
 				__func__, __LINE__, port, context);
 			return;
 		}
 		ifp = ifp_ports[port].xlpge_if;
 		xlpge_sc = ifp_ports[port].xlpge_sc;
 
 		nlm_xlpge_release_mbuf(phys_addr);
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 
 	} else if (size > 1) { /* Recieve packet */
 		phys_addr = msg->msg[1] & 0xffffffffc0ULL;
 		length = (msg->msg[1] >> 40) & 0x3fff;
 		length -= MAC_CRC_LEN;
 
 		/* context is SGMII_RCV_CONTEXT_NUM + three bit vlan type
 		 * or vlan priority
 		 */
 		context = (msg->msg[1] >> 54) & 0x3ff;
 		port = cntx2port[context];
 
 		if (port >= XLP_MAX_PORTS) {
 			printf("%s:%d Bad port %d (context=%d)\n",
 				__func__, __LINE__, port, context);
 			return;
 		}
 
 		ifp = ifp_ports[port].xlpge_if;
 		xlpge_sc = ifp_ports[port].xlpge_sc;
 
 		nlm_xlpge_rx(xlpge_sc, port, phys_addr, length);
 		/* return back a free descriptor to NA */
 		nlm_xlpge_submit_rx_free_desc(xlpge_sc, 1);
 	}
 }
diff --git a/sys/net/if.c b/sys/net/if.c
index 42d34bb21ddb..d1c3cfba811c 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1,4575 +1,4573 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #include "opt_bpf.h"
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/domainset.h>
 #include <sys/sbuf.h>
 #include <sys/bus.h>
 #include <sys/epoch.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/refcount.h>
 #include <sys/module.h>
 #include <sys/rwlock.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/taskqueue.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/priv.h>
 
 #include <machine/stdarg.h>
 #include <vm/uma.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #ifdef INET
 #include <net/debugnet.h>
 #include <netinet/if_ether.h>
 #endif /* INET */
 #ifdef INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif /* INET6 */
 #endif /* INET || INET6 */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name
  * and ifr_ifru when it is used in SIOCGIFCONF.
  */
 _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) ==
     offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru");
 
 __read_mostly epoch_t net_epoch_preempt;
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 
 struct ifreq_buffer32 {
 	uint32_t	length;		/* (size_t) */
 	uint32_t	buffer;		/* (void *) */
 };
 
 /*
  * Interface request structure used for socket
  * ioctl's.  All interface ioctl's must have parameter
  * definitions which begin with ifr_name.  The
  * remainder may be interface specific.
  */
 struct ifreq32 {
 	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	union {
 		struct sockaddr	ifru_addr;
 		struct sockaddr	ifru_dstaddr;
 		struct sockaddr	ifru_broadaddr;
 		struct ifreq_buffer32 ifru_buffer;
 		short		ifru_flags[2];
 		short		ifru_index;
 		int		ifru_jid;
 		int		ifru_metric;
 		int		ifru_mtu;
 		int		ifru_phys;
 		int		ifru_media;
 		uint32_t	ifru_data;
 		int		ifru_cap[2];
 		u_int		ifru_fib;
 		u_char		ifru_vlan_pcp;
 	} ifr_ifru;
 };
 CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
 CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
     __offsetof(struct ifreq32, ifr_ifru));
 
 struct ifgroupreq32 {
 	char	ifgr_name[IFNAMSIZ];
 	u_int	ifgr_len;
 	union {
 		char		ifgru_group[IFNAMSIZ];
 		uint32_t	ifgru_groups;
 	} ifgr_ifgru;
 };
 
 struct ifmediareq32 {
 	char		ifm_name[IFNAMSIZ];
 	int		ifm_current;
 	int		ifm_mask;
 	int		ifm_status;
 	int		ifm_active;
 	int		ifm_count;
 	uint32_t	ifm_ulist;	/* (int *) */
 };
 #define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
 #define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
 
 #define	_CASE_IOC_IFGROUPREQ_32(cmd)				\
     _IOC_NEWTYPE((cmd), struct ifgroupreq32): case
 #else /* !COMPAT_FREEBSD32 */
 #define _CASE_IOC_IFGROUPREQ_32(cmd)
 #endif /* !COMPAT_FREEBSD32 */
 
 #define CASE_IOC_IFGROUPREQ(cmd)	\
     _CASE_IOC_IFGROUPREQ_32(cmd)	\
     (cmd)
 
 union ifreq_union {
 	struct ifreq	ifr;
 #ifdef COMPAT_FREEBSD32
 	struct ifreq32	ifr32;
 #endif
 };
 
 union ifgroupreq_union {
 	struct ifgroupreq ifgr;
 #ifdef COMPAT_FREEBSD32
 	struct ifgroupreq32 ifgr32;
 #endif
 };
 
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
 
 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
     &ifqmaxlen, 0, "max send queue size");
 
 /* Log link state change events */
 static int log_link_state_change = 1;
 
 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
 	&log_link_state_change, 0,
 	"log interface link state change events");
 
 /* Log promiscuous mode change events */
 static int log_promisc_mode_change = 1;
 
 SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
 	&log_promisc_mode_change, 1,
 	"log promiscuous mode change events");
 
 /* Interface description */
 static unsigned int ifdescr_maxlen = 1024;
 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
 	&ifdescr_maxlen, 0,
 	"administrative maximum length for interface description");
 
 static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
 
 /* global sx for non-critical path ifdescr */
 static struct sx ifdescr_sx;
 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
 
 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 /* These are external hooks for CARP. */
 void	(*carp_linkstate_p)(struct ifnet *ifp);
 void	(*carp_demote_adj_p)(int, char *);
 int	(*carp_master_p)(struct ifaddr *);
 #if defined(INET) || defined(INET6)
 int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
 int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
     const struct sockaddr *sa);
 int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);   
 int	(*carp_attach_p)(struct ifaddr *, int);
 void	(*carp_detach_p)(struct ifaddr *, bool);
 #endif
 #ifdef INET
 int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
 #endif
 #ifdef INET6
 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
 caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
     const struct in6_addr *taddr);
 #endif
 
 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
 
 /*
  * XXX: Style; these should be sorted alphabetically, and unprototyped
  * static functions should be prototyped. Currently they are sorted by
  * declaration order.
  */
 static void	if_attachdomain(void *);
 static void	if_attachdomain1(struct ifnet *);
 static int	ifconf(u_long, caddr_t);
 static void	*if_grow(void);
 static void	if_input_default(struct ifnet *, struct mbuf *);
 static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
 static void	if_route(struct ifnet *, int flag, int fam);
 static int	if_setflag(struct ifnet *, int, int, int *, int);
 static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
 static void	if_unroute(struct ifnet *, int flag, int fam);
 static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
 static void	do_link_state_change(void *, int);
 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
 static int	if_getgroupmembers(struct ifgroupreq *);
 static void	if_delgroups(struct ifnet *);
 static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
 static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
 static void	if_siocaddmulti(void *, int);
 #ifdef VIMAGE
 static int	if_vmove(struct ifnet *, struct vnet *);
 #endif
 
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
 /* ipsec helper hooks */
 VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
 VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
 
 VNET_DEFINE(int, if_index);
 int	ifqmaxlen = IFQ_MAXLEN;
 VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
 VNET_DEFINE(struct ifgrouphead, ifg_head);
 
 VNET_DEFINE_STATIC(int, if_indexlim) = 8;
 
 /* Table of ifnet by index. */
 VNET_DEFINE(struct ifnet **, ifindex_table);
 
 #define	V_if_indexlim		VNET(if_indexlim)
 #define	V_ifindex_table		VNET(ifindex_table)
 
 /*
  * The global network interface list (V_ifnet) and related state (such as
  * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
  * an rwlock.  Either may be acquired shared to stablize the list, but both
  * must be acquired writable to modify the list.  This model allows us to
  * both stablize the interface list during interrupt thread processing, but
  * also to stablize it over long-running ioctls, without introducing priority
  * inversions and deadlocks.
  */
 struct rwlock ifnet_rwlock;
 RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
 struct sx ifnet_sxlock;
 SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
 
 /*
  * The allocation of network interfaces is a rather non-atomic affair; we
  * need to select an index before we are ready to expose the interface for
  * use, so will use this pointer value to indicate reservation.
  */
 #define	IFNET_HOLD	(void *)(uintptr_t)(-1)
 
 #ifdef VIMAGE
 #define	VNET_IS_SHUTTING_DOWN(_vnet)					\
     ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
 #endif
 
 static	if_com_alloc_t *if_com_alloc[256];
 static	if_com_free_t *if_com_free[256];
 
 static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
 
 struct ifnet *
 ifnet_byindex(u_short idx)
 {
 	struct ifnet *ifp;
 
 	if (__predict_false(idx > V_if_index))
 		return (NULL);
 
 	ifp = *(struct ifnet * const volatile *)(V_ifindex_table + idx);
 	return (__predict_false(ifp == IFNET_HOLD) ? NULL : ifp);
 }
 
 struct ifnet *
 ifnet_byindex_ref(u_short idx)
 {
 	struct ifnet *ifp;
 
 	NET_EPOCH_ASSERT();
 
 	ifp = ifnet_byindex(idx);
 	if (ifp == NULL || (ifp->if_flags & IFF_DYING))
 		return (NULL);
 	if_ref(ifp);
 	return (ifp);
 }
 
 /*
  * Allocate an ifindex array entry; return 0 on success or an error on
  * failure.
  */
 static u_short
 ifindex_alloc(void **old)
 {
 	u_short idx;
 
 	IFNET_WLOCK_ASSERT();
 	/*
 	 * Try to find an empty slot below V_if_index.  If we fail, take the
 	 * next slot.
 	 */
 	for (idx = 1; idx <= V_if_index; idx++) {
 		if (V_ifindex_table[idx] == NULL)
 			break;
 	}
 
 	/* Catch if_index overflow. */
 	if (idx >= V_if_indexlim) {
 		*old = if_grow();
 		return (USHRT_MAX);
 	}
 	if (idx > V_if_index)
 		V_if_index = idx;
 	return (idx);
 }
 
 static void
 ifindex_free_locked(u_short idx)
 {
 
 	IFNET_WLOCK_ASSERT();
 
 	V_ifindex_table[idx] = NULL;
 	while (V_if_index > 0 &&
 	    V_ifindex_table[V_if_index] == NULL)
 		V_if_index--;
 }
 
 static void
 ifindex_free(u_short idx)
 {
 
 	IFNET_WLOCK();
 	ifindex_free_locked(idx);
 	IFNET_WUNLOCK();
 }
 
 static void
 ifnet_setbyindex(u_short idx, struct ifnet *ifp)
 {
 
 	V_ifindex_table[idx] = ifp;
 }
 
 struct ifaddr *
 ifaddr_byindex(u_short idx)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa = NULL;
 
 	NET_EPOCH_ASSERT();
 
 	ifp = ifnet_byindex(idx);
 	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
 		ifa_ref(ifa);
 	return (ifa);
 }
 
 /*
  * Network interface utility routines.
  *
  * Routines with ifa_ifwith* names take sockaddr *'s as
  * parameters.
  */
 
 static void
 vnet_if_init(const void *unused __unused)
 {
 	void *old;
 
 	CK_STAILQ_INIT(&V_ifnet);
 	CK_STAILQ_INIT(&V_ifg_head);
 	IFNET_WLOCK();
 	old = if_grow();				/* create initial table */
 	IFNET_WUNLOCK();
 	epoch_wait_preempt(net_epoch_preempt);
 	free(old, M_IFNET);
 	vnet_if_clone_init();
 }
 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
     NULL);
 
 #ifdef VIMAGE
 static void
 vnet_if_uninit(const void *unused __unused)
 {
 
 	VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
 	    "not empty", __func__, __LINE__, &V_ifnet));
 	VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
 	    "not empty", __func__, __LINE__, &V_ifg_head));
 
 	free((caddr_t)V_ifindex_table, M_IFNET);
 }
 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
     vnet_if_uninit, NULL);
 
 static void
 vnet_if_return(const void *unused __unused)
 {
 	struct ifnet *ifp, *nifp;
 
 	/* Return all inherited interfaces to their parent vnets. */
 	CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
 		if (ifp->if_home_vnet != ifp->if_vnet)
 			if_vmove(ifp, ifp->if_home_vnet);
 	}
 }
 VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
     vnet_if_return, NULL);
 #endif
 
 
 static void *
 if_grow(void)
 {
 	int oldlim;
 	u_int n;
 	struct ifnet **e;
 	void *old;
 
 	old = NULL;
 	IFNET_WLOCK_ASSERT();
 	oldlim = V_if_indexlim;
 	IFNET_WUNLOCK();
 	n = (oldlim << 1) * sizeof(*e);
 	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
 	IFNET_WLOCK();
 	if (V_if_indexlim != oldlim) {
 		free(e, M_IFNET);
 		return (NULL);
 	}
 	if (V_ifindex_table != NULL) {
 		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
 		old = V_ifindex_table;
 	}
 	V_if_indexlim <<= 1;
 	V_ifindex_table = e;
 	return (old);
 }
 
 /*
  * Allocate a struct ifnet and an index for an interface.  A layer 2
  * common structure will also be allocated if an allocation routine is
  * registered for the passed type.
  */
 struct ifnet *
 if_alloc_domain(u_char type, int numa_domain)
 {
 	struct ifnet *ifp;
 	u_short idx;
 	void *old;
 
 	KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
 	if (numa_domain == IF_NODOM)
 		ifp = malloc(sizeof(struct ifnet), M_IFNET,
 		    M_WAITOK | M_ZERO);
 	else
 		ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET,
 		    DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO);
  restart:
 	IFNET_WLOCK();
 	idx = ifindex_alloc(&old);
 	if (__predict_false(idx == USHRT_MAX)) {
 		IFNET_WUNLOCK();
 		epoch_wait_preempt(net_epoch_preempt);
 		free(old, M_IFNET);
 		goto restart;
 	}
 	ifnet_setbyindex(idx, IFNET_HOLD);
 	IFNET_WUNLOCK();
 	ifp->if_index = idx;
 	ifp->if_type = type;
 	ifp->if_alloctype = type;
 	ifp->if_numa_domain = numa_domain;
 #ifdef VIMAGE
 	ifp->if_vnet = curvnet;
 #endif
-	/* XXX */
-	ifp->if_flags |= IFF_NEEDSEPOCH;
 	if (if_com_alloc[type] != NULL) {
 		ifp->if_l2com = if_com_alloc[type](type, ifp);
 		if (ifp->if_l2com == NULL) {
 			free(ifp, M_IFNET);
 			ifindex_free(idx);
 			return (NULL);
 		}
 	}
 
 	IF_ADDR_LOCK_INIT(ifp);
 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
 	TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp);
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_LOCK_INIT(ifp);
 	CK_STAILQ_INIT(&ifp->if_addrhead);
 	CK_STAILQ_INIT(&ifp->if_multiaddrs);
 	CK_STAILQ_INIT(&ifp->if_groups);
 #ifdef MAC
 	mac_ifnet_init(ifp);
 #endif
 	ifq_init(&ifp->if_snd, ifp);
 
 	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
 	for (int i = 0; i < IFCOUNTERS; i++)
 		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
 	ifp->if_get_counter = if_get_counter_default;
 	ifp->if_pcp = IFNET_PCP_NONE;
 	ifnet_setbyindex(ifp->if_index, ifp);
 	return (ifp);
 }
 
 struct ifnet *
 if_alloc_dev(u_char type, device_t dev)
 {
 	int numa_domain;
 
 	if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0)
 		return (if_alloc_domain(type, IF_NODOM));
 	return (if_alloc_domain(type, numa_domain));
 }
 
 struct ifnet *
 if_alloc(u_char type)
 {
 
 	return (if_alloc_domain(type, IF_NODOM));
 }
 /*
  * Do the actual work of freeing a struct ifnet, and layer 2 common
  * structure.  This call is made when the last reference to an
  * interface is released.
  */
 static void
 if_free_internal(struct ifnet *ifp)
 {
 
 	KASSERT((ifp->if_flags & IFF_DYING),
 	    ("if_free_internal: interface not dying"));
 
 	if (if_com_free[ifp->if_alloctype] != NULL)
 		if_com_free[ifp->if_alloctype](ifp->if_l2com,
 		    ifp->if_alloctype);
 
 #ifdef MAC
 	mac_ifnet_destroy(ifp);
 #endif /* MAC */
 	IF_AFDATA_DESTROY(ifp);
 	IF_ADDR_LOCK_DESTROY(ifp);
 	ifq_delete(&ifp->if_snd);
 
 	for (int i = 0; i < IFCOUNTERS; i++)
 		counter_u64_free(ifp->if_counters[i]);
 
 	free(ifp->if_description, M_IFDESCR);
 	free(ifp->if_hw_addr, M_IFADDR);
 	if (ifp->if_numa_domain == IF_NODOM)
 		free(ifp, M_IFNET);
 	else
 		free_domain(ifp, M_IFNET);
 }
 
 static void
 if_destroy(epoch_context_t ctx)
 {
 	struct ifnet *ifp;
 
 	ifp = __containerof(ctx, struct ifnet, if_epoch_ctx);
 	if_free_internal(ifp);
 }
 
 /*
  * Deregister an interface and free the associated storage.
  */
 void
 if_free(struct ifnet *ifp)
 {
 
 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	IFNET_WLOCK();
 	KASSERT(ifp == ifnet_byindex(ifp->if_index),
 	    ("%s: freeing unallocated ifnet", ifp->if_xname));
 
 	ifindex_free_locked(ifp->if_index);
 	IFNET_WUNLOCK();
 
 	if (refcount_release(&ifp->if_refcount))
 		NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx);
 	CURVNET_RESTORE();
 }
 
 /*
  * Interfaces to keep an ifnet type-stable despite the possibility of the
  * driver calling if_free().  If there are additional references, we defer
  * freeing the underlying data structure.
  */
 void
 if_ref(struct ifnet *ifp)
 {
 
 	/* We don't assert the ifnet list lock here, but arguably should. */
 	refcount_acquire(&ifp->if_refcount);
 }
 
 void
 if_rele(struct ifnet *ifp)
 {
 
 	if (!refcount_release(&ifp->if_refcount))
 		return;
 	NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx);
 }
 
 void
 ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
 {
 	
 	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
 
 	if (ifq->ifq_maxlen == 0) 
 		ifq->ifq_maxlen = ifqmaxlen;
 
 	ifq->altq_type = 0;
 	ifq->altq_disc = NULL;
 	ifq->altq_flags &= ALTQF_CANTCHANGE;
 	ifq->altq_tbr  = NULL;
 	ifq->altq_ifp  = ifp;
 }
 
 void
 ifq_delete(struct ifaltq *ifq)
 {
 	mtx_destroy(&ifq->ifq_mtx);
 }
 
 /*
  * Perform generic interface initialization tasks and attach the interface
  * to the list of "active" interfaces.  If vmove flag is set on entry
  * to if_attach_internal(), perform only a limited subset of initialization
  * tasks, given that we are moving from one vnet to another an ifnet which
  * has already been fully initialized.
  *
  * Note that if_detach_internal() removes group membership unconditionally
  * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
  * Thus, when if_vmove() is applied to a cloned interface, group membership
  * is lost while a cloned one always joins a group whose name is
  * ifc->ifc_name.  To recover this after if_detach_internal() and
  * if_attach_internal(), the cloner should be specified to
  * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
  * attempts to join a group whose name is ifc->ifc_name.
  *
  * XXX:
  *  - The decision to return void and thus require this function to
  *    succeed is questionable.
  *  - We should probably do more sanity checking.  For instance we don't
  *    do anything to insure if_xname is unique or non-empty.
  */
 void
 if_attach(struct ifnet *ifp)
 {
 
 	if_attach_internal(ifp, 0, NULL);
 }
 
 /*
  * Compute the least common TSO limit.
  */
 void
 if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	/*
 	 * 1) If there is no limit currently, take the limit from
 	 * the network adapter.
 	 *
 	 * 2) If the network adapter has a limit below the current
 	 * limit, apply it.
 	 */
 	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
 	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
 		pmax->tsomaxbytes = ifp->if_hw_tsomax;
 	}
 	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
 	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
 		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 	}
 	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
 	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
 		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 	}
 }
 
 /*
  * Update TSO limit of a network adapter.
  *
  * Returns zero if no change. Else non-zero.
  */
 int
 if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	int retval = 0;
 	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
 		ifp->if_hw_tsomax = pmax->tsomaxbytes;
 		retval++;
 	}
 	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
 		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
 		retval++;
 	}
 	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
 		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
 		retval++;
 	}
 	return (retval);
 }
 
 static void
 if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
 {
 	unsigned socksize, ifasize;
 	int namelen, masklen;
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 
 	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
 		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
 		    ifp->if_xname);
 
 #ifdef VIMAGE
 	ifp->if_vnet = curvnet;
 	if (ifp->if_home_vnet == NULL)
 		ifp->if_home_vnet = curvnet;
 #endif
 
 	if_addgroup(ifp, IFG_ALL);
 
 	/* Restore group membership for cloned interfaces. */
 	if (vmove && ifc != NULL)
 		if_clone_addgroup(ifp, ifc);
 
 	getmicrotime(&ifp->if_lastchange);
 	ifp->if_epoch = time_uptime;
 
 	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
 	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
 	    ("transmit and qflush must both either be set or both be NULL"));
 	if (ifp->if_transmit == NULL) {
 		ifp->if_transmit = if_transmit;
 		ifp->if_qflush = if_qflush;
 	}
 	if (ifp->if_input == NULL)
 		ifp->if_input = if_input_default;
 
 	if (ifp->if_requestencap == NULL)
 		ifp->if_requestencap = if_requestencap_default;
 
 	if (!vmove) {
 #ifdef MAC
 		mac_ifnet_create(ifp);
 #endif
 
 		/*
 		 * Create a Link Level name for this device.
 		 */
 		namelen = strlen(ifp->if_xname);
 		/*
 		 * Always save enough space for any possiable name so we
 		 * can do a rename in place later.
 		 */
 		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
 		socksize = masklen + ifp->if_addrlen;
 		if (socksize < sizeof(*sdl))
 			socksize = sizeof(*sdl);
 		socksize = roundup2(socksize, sizeof(long));
 		ifasize = sizeof(*ifa) + 2 * socksize;
 		ifa = ifa_alloc(ifasize, M_WAITOK);
 		sdl = (struct sockaddr_dl *)(ifa + 1);
 		sdl->sdl_len = socksize;
 		sdl->sdl_family = AF_LINK;
 		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = ifp->if_type;
 		ifp->if_addr = ifa;
 		ifa->ifa_ifp = ifp;
 		ifa->ifa_addr = (struct sockaddr *)sdl;
 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
 		ifa->ifa_netmask = (struct sockaddr *)sdl;
 		sdl->sdl_len = masklen;
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
 		/* Reliably crash if used uninitialized. */
 		ifp->if_broadcastaddr = NULL;
 
 		if (ifp->if_type == IFT_ETHER) {
 			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
 			    M_WAITOK | M_ZERO);
 		}
 
 #if defined(INET) || defined(INET6)
 		/* Use defaults for TSO, if nothing is set */
 		if (ifp->if_hw_tsomax == 0 &&
 		    ifp->if_hw_tsomaxsegcount == 0 &&
 		    ifp->if_hw_tsomaxsegsize == 0) {
 			/*
 			 * The TSO defaults needs to be such that an
 			 * NFS mbuf list of 35 mbufs totalling just
 			 * below 64K works and that a chain of mbufs
 			 * can be defragged into at most 32 segments:
 			 */
 			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
 			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
 			ifp->if_hw_tsomaxsegcount = 35;
 			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
 
 			/* XXX some drivers set IFCAP_TSO after ethernet attach */
 			if (ifp->if_capabilities & IFCAP_TSO) {
 				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
 				    ifp->if_hw_tsomax,
 				    ifp->if_hw_tsomaxsegcount,
 				    ifp->if_hw_tsomaxsegsize);
 			}
 		}
 #endif
 	}
 #ifdef VIMAGE
 	else {
 		/*
 		 * Update the interface index in the link layer address
 		 * of the interface.
 		 */
 		for (ifa = ifp->if_addr; ifa != NULL;
 		    ifa = CK_STAILQ_NEXT(ifa, ifa_link)) {
 			if (ifa->ifa_addr->sa_family == AF_LINK) {
 				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 				sdl->sdl_index = ifp->if_index;
 			}
 		}
 	}
 #endif
 
 	IFNET_WLOCK();
 	CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
 #ifdef VIMAGE
 	curvnet->vnet_ifcnt++;
 #endif
 	IFNET_WUNLOCK();
 
 	if (domain_init_status >= 2)
 		if_attachdomain1(ifp);
 
 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
 
 	/* Announce the interface. */
 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 }
 
 static void
 if_epochalloc(void *dummy __unused)
 {
 
 	net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT);
 }
 SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL);
 
 static void
 if_attachdomain(void *dummy)
 {
 	struct ifnet *ifp;
 
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		if_attachdomain1(ifp);
 }
 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
     if_attachdomain, NULL);
 
 static void
 if_attachdomain1(struct ifnet *ifp)
 {
 	struct domain *dp;
 
 	/*
 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
 	 * cannot lock ifp->if_afdata initialization, entirely.
 	 */
 	IF_AFDATA_LOCK(ifp);
 	if (ifp->if_afdata_initialized >= domain_init_status) {
 		IF_AFDATA_UNLOCK(ifp);
 		log(LOG_WARNING, "%s called more than once on %s\n",
 		    __func__, ifp->if_xname);
 		return;
 	}
 	ifp->if_afdata_initialized = domain_init_status;
 	IF_AFDATA_UNLOCK(ifp);
 
 	/* address family dependent data region */
 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_ifattach)
 			ifp->if_afdata[dp->dom_family] =
 			    (*dp->dom_ifattach)(ifp);
 	}
 }
 
 /*
  * Remove any unicast or broadcast network addresses from an interface.
  */
 void
 if_purgeaddrs(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 
 	while (1) {
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_LINK)
 				break;
 		}
 		NET_EPOCH_EXIT(et);
 
 		if (ifa == NULL)
 			break;
 #ifdef INET
 		/* XXX: Ugly!! ad hoc just for INET */
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			struct ifaliasreq ifr;
 
 			bzero(&ifr, sizeof(ifr));
 			ifr.ifra_addr = *ifa->ifa_addr;
 			if (ifa->ifa_dstaddr)
 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
 			    NULL) == 0)
 				continue;
 		}
 #endif /* INET */
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6) {
 			in6_purgeaddr(ifa);
 			/* ifp_addrhead is already updated */
 			continue;
 		}
 #endif /* INET6 */
 		IF_ADDR_WLOCK(ifp);
 		CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 		IF_ADDR_WUNLOCK(ifp);
 		ifa_free(ifa);
 	}
 }
 
 /*
  * Remove any multicast network addresses from an interface when an ifnet
  * is going away.
  */
 static void
 if_purgemaddrs(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
 
 	IF_ADDR_WLOCK(ifp);
 	while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) {
 		ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs);
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		if_delmulti_locked(ifp, ifma, 1);
 	}
 	IF_ADDR_WUNLOCK(ifp);
 }
 
 /*
  * Detach an interface, removing it from the list of "active" interfaces.
  * If vmove flag is set on entry to if_detach_internal(), perform only a
  * limited subset of cleanup tasks, given that we are moving an ifnet from
  * one vnet to another, where it must be fully operational.
  *
  * XXXRW: There are some significant questions about event ordering, and
  * how to prevent things from starting to use the interface during detach.
  */
 void
 if_detach(struct ifnet *ifp)
 {
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	if_detach_internal(ifp, 0, NULL);
 	CURVNET_RESTORE();
 }
 
 /*
  * The vmove flag, if set, indicates that we are called from a callpath
  * that is moving an interface to a different vnet instance.
  *
  * The shutdown flag, if set, indicates that we are called in the
  * process of shutting down a vnet instance.  Currently only the
  * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
  * on a vnet instance shutdown without this flag being set, e.g., when
  * the cloned interfaces are destoyed as first thing of teardown.
  */
 static int
 if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
 {
 	struct ifaddr *ifa;
 	int i;
 	struct domain *dp;
  	struct ifnet *iter;
  	int found = 0;
 #ifdef VIMAGE
 	bool shutdown;
 
 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
 #endif
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
 		if (iter == ifp) {
 			CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link);
 			if (!vmove)
 				ifp->if_flags |= IFF_DYING;
 			found = 1;
 			break;
 		}
 	IFNET_WUNLOCK();
 	if (!found) {
 		/*
 		 * While we would want to panic here, we cannot
 		 * guarantee that the interface is indeed still on
 		 * the list given we don't hold locks all the way.
 		 */
 		return (ENOENT);
 #if 0
 		if (vmove)
 			panic("%s: ifp=%p not on the ifnet tailq %p",
 			    __func__, ifp, &V_ifnet);
 		else
 			return; /* XXX this should panic as well? */
 #endif
 	}
 
 	/*
 	 * At this point we know the interface still was on the ifnet list
 	 * and we removed it so we are in a stable state.
 	 */
 #ifdef VIMAGE
 	curvnet->vnet_ifcnt--;
 #endif
 	epoch_wait_preempt(net_epoch_preempt);
 
 	/*
 	 * Ensure all pending EPOCH(9) callbacks have been executed. This
 	 * fixes issues about late destruction of multicast options
 	 * which lead to leave group calls, which in turn access the
 	 * belonging ifnet structure:
 	 */
 	epoch_drain_callbacks(net_epoch_preempt);
 
 	/*
 	 * In any case (destroy or vmove) detach us from the groups
 	 * and remove/wait for pending events on the taskq.
 	 * XXX-BZ in theory an interface could still enqueue a taskq change?
 	 */
 	if_delgroups(ifp);
 
 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
 	taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask);
 
 	/*
 	 * Check if this is a cloned interface or not. Must do even if
 	 * shutting down as a if_vmove_reclaim() would move the ifp and
 	 * the if_clone_addgroup() will have a corrupted string overwise
 	 * from a gibberish pointer.
 	 */
 	if (vmove && ifcp != NULL)
 		*ifcp = if_clone_findifc(ifp);
 
 	if_down(ifp);
 
 #ifdef VIMAGE
 	/*
 	 * On VNET shutdown abort here as the stack teardown will do all
 	 * the work top-down for us.
 	 */
 	if (shutdown) {
 		/* Give interface users the chance to clean up. */
 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 
 		/*
 		 * In case of a vmove we are done here without error.
 		 * If we would signal an error it would lead to the same
 		 * abort as if we did not find the ifnet anymore.
 		 * if_detach() calls us in void context and does not care
 		 * about an early abort notification, so life is splendid :)
 		 */
 		goto finish_vnet_shutdown;
 	}
 #endif
 
 	/*
 	 * At this point we are not tearing down a VNET and are either
 	 * going to destroy or vmove the interface and have to cleanup
 	 * accordingly.
 	 */
 
 	/*
 	 * Remove routes and flush queues.
 	 */
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		altq_disable(&ifp->if_snd);
 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
 		altq_detach(&ifp->if_snd);
 #endif
 
 	if_purgeaddrs(ifp);
 
 #ifdef INET
 	in_ifdetach(ifp);
 #endif
 
 #ifdef INET6
 	/*
 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
 	 * before removing routing entries below, since IPv6 interface direct
 	 * routes are expected to be removed by the IPv6-specific kernel API.
 	 * Otherwise, the kernel will detect some inconsistency and bark it.
 	 */
 	in6_ifdetach(ifp);
 #endif
 	if_purgemaddrs(ifp);
 
 	/* Announce that the interface is gone. */
 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
 
 	if (!vmove) {
 		/*
 		 * Prevent further calls into the device driver via ifnet.
 		 */
 		if_dead(ifp);
 
 		/*
 		 * Clean up all addresses.
 		 */
 		IF_ADDR_WLOCK(ifp);
 		if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) {
 			ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
 			CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 			IF_ADDR_WUNLOCK(ifp);
 			ifa_free(ifa);
 		} else
 			IF_ADDR_WUNLOCK(ifp);
 	}
 
 	rt_flushifroutes(ifp);
 
 #ifdef VIMAGE
 finish_vnet_shutdown:
 #endif
 	/*
 	 * We cannot hold the lock over dom_ifdetach calls as they might
 	 * sleep, for example trying to drain a callout, thus open up the
 	 * theoretical race with re-attaching.
 	 */
 	IF_AFDATA_LOCK(ifp);
 	i = ifp->if_afdata_initialized;
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_UNLOCK(ifp);
 	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
 			(*dp->dom_ifdetach)(ifp,
 			    ifp->if_afdata[dp->dom_family]);
 			ifp->if_afdata[dp->dom_family] = NULL;
 		}
 	}
 
 	return (0);
 }
 
 #ifdef VIMAGE
 /*
  * if_vmove() performs a limited version of if_detach() in current
  * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
  * An attempt is made to shrink if_index in current vnet, find an
  * unused if_index in target vnet and calls if_grow() if necessary,
  * and finally find an unused if_xname for the target vnet.
  */
 static int
 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
 {
 	struct if_clone *ifc;
 #ifdef DEV_BPF
 	u_int bif_dlt, bif_hdrlen;
 #endif
 	void *old;
 	int rc;
 
 #ifdef DEV_BPF
  	/*
 	 * if_detach_internal() will call the eventhandler to notify
 	 * interface departure.  That will detach if_bpf.  We need to
 	 * safe the dlt and hdrlen so we can re-attach it later.
 	 */
 	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
 #endif
 
 	/*
 	 * Detach from current vnet, but preserve LLADDR info, do not
 	 * mark as dead etc. so that the ifnet can be reattached later.
 	 * If we cannot find it, we lost the race to someone else.
 	 */
 	rc = if_detach_internal(ifp, 1, &ifc);
 	if (rc != 0)
 		return (rc);
 
 	/*
 	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
 	 * the if_index for that vnet if possible.
 	 *
 	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
 	 * or we'd lock on one vnet and unlock on another.
 	 */
 	IFNET_WLOCK();
 	ifindex_free_locked(ifp->if_index);
 	IFNET_WUNLOCK();
 
 	/*
 	 * Perform interface-specific reassignment tasks, if provided by
 	 * the driver.
 	 */
 	if (ifp->if_reassign != NULL)
 		ifp->if_reassign(ifp, new_vnet, NULL);
 
 	/*
 	 * Switch to the context of the target vnet.
 	 */
 	CURVNET_SET_QUIET(new_vnet);
  restart:
 	IFNET_WLOCK();
 	ifp->if_index = ifindex_alloc(&old);
 	if (__predict_false(ifp->if_index == USHRT_MAX)) {
 		IFNET_WUNLOCK();
 		epoch_wait_preempt(net_epoch_preempt);
 		free(old, M_IFNET);
 		goto restart;
 	}
 	ifnet_setbyindex(ifp->if_index, ifp);
 	IFNET_WUNLOCK();
 
 	if_attach_internal(ifp, 1, ifc);
 
 #ifdef DEV_BPF
 	if (ifp->if_bpf == NULL)
 		bpfattach(ifp, bif_dlt, bif_hdrlen);
 #endif
 
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * Move an ifnet to or from another child prison/vnet, specified by the jail id.
  */
 static int
 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
 {
 	struct prison *pr;
 	struct ifnet *difp;
 	int error;
 	bool shutdown;
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
 	sx_sunlock(&allprison_lock);
 	if (pr == NULL)
 		return (ENXIO);
 	prison_hold_locked(pr);
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Do not try to move the iface from and to the same prison. */
 	if (pr->pr_vnet == ifp->if_vnet) {
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the named iface does not exists in the dst. prison/vnet. */
 	/* XXX Lock interfaces to avoid races. */
 	CURVNET_SET_QUIET(pr->pr_vnet);
 	difp = ifunit(ifname);
 	if (difp != NULL) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the VNET is stable. */
 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
 	if (shutdown) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EBUSY);
 	}
 	CURVNET_RESTORE();
 
 	/* Move the interface into the child jail/vnet. */
 	error = if_vmove(ifp, pr->pr_vnet);
 
 	/* Report the new if_xname back to the userland on success. */
 	if (error == 0)
 		sprintf(ifname, "%s", ifp->if_xname);
 
 	prison_free(pr);
 	return (error);
 }
 
 static int
 if_vmove_reclaim(struct thread *td, char *ifname, int jid)
 {
 	struct prison *pr;
 	struct vnet *vnet_dst;
 	struct ifnet *ifp;
 	int error;
  	bool shutdown;
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
 	sx_sunlock(&allprison_lock);
 	if (pr == NULL)
 		return (ENXIO);
 	prison_hold_locked(pr);
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Make sure the named iface exists in the source prison/vnet. */
 	CURVNET_SET(pr->pr_vnet);
 	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
 	if (ifp == NULL) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (ENXIO);
 	}
 
 	/* Do not try to move the iface from and to the same prison. */
 	vnet_dst = TD_TO_VNET(td);
 	if (vnet_dst == ifp->if_vnet) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the VNET is stable. */
 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
 	if (shutdown) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EBUSY);
 	}
 
 	/* Get interface back from child jail/vnet. */
 	error = if_vmove(ifp, vnet_dst);
 	CURVNET_RESTORE();
 
 	/* Report the new if_xname back to the userland on success. */
 	if (error == 0)
 		sprintf(ifname, "%s", ifp->if_xname);
 
 	prison_free(pr);
 	return (error);
 }
 #endif /* VIMAGE */
 
 /*
  * Add a group to an interface
  */
 int
 if_addgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list		*ifgl;
 	struct ifg_group	*ifg = NULL;
 	struct ifg_member	*ifgm;
 	int 			 new = 0;
 
 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
 	    groupname[strlen(groupname) - 1] <= '9')
 		return (EINVAL);
 
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
 			IFNET_WUNLOCK();
 			return (EEXIST);
 		}
 
 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) {
 	    	IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
 		free(ifgl, M_TEMP);
 		IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 		if (!strcmp(ifg->ifg_group, groupname))
 			break;
 
 	if (ifg == NULL) {
 		if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) {
 			free(ifgl, M_TEMP);
 			free(ifgm, M_TEMP);
 			IFNET_WUNLOCK();
 			return (ENOMEM);
 		}
 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
 		ifg->ifg_refcnt = 0;
 		CK_STAILQ_INIT(&ifg->ifg_members);
 		CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
 		new = 1;
 	}
 
 	ifg->ifg_refcnt++;
 	ifgl->ifgl_group = ifg;
 	ifgm->ifgm_ifp = ifp;
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
 	CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
 	IF_ADDR_WUNLOCK(ifp);
 
 	IFNET_WUNLOCK();
 
 	if (new)
 		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
 }
 
 /*
  * Helper function to remove a group out of an interface.  Expects the global
  * ifnet lock to be write-locked, and drops it before returning.
  */
 static void
 _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl,
     const char *groupname)
 {
 	struct ifg_member *ifgm;
 	bool freeifgl;
 
 	IFNET_WLOCK_ASSERT();
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
 	IF_ADDR_WUNLOCK(ifp);
 
 	CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
 		if (ifgm->ifgm_ifp == ifp) {
 			CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
 			    ifg_member, ifgm_next);
 			break;
 		}
 	}
 
 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 		CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group,
 		    ifg_next);
 		freeifgl = true;
 	} else {
 		freeifgl = false;
 	}
 	IFNET_WUNLOCK();
 
 	epoch_wait_preempt(net_epoch_preempt);
 	if (freeifgl) {
 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
 		free(ifgl->ifgl_group, M_TEMP);
 	}
 	free(ifgm, M_TEMP);
 	free(ifgl, M_TEMP);
 
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 }
 
 /*
  * Remove a group from an interface
  */
 int
 if_delgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list *ifgl;
 
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
 			break;
 	if (ifgl == NULL) {
 		IFNET_WUNLOCK();
 		return (ENOENT);
 	}
 
 	_if_delgroup_locked(ifp, ifgl, groupname);
 
 	return (0);
 }
 
 /*
  * Remove an interface from all groups
  */
 static void
 if_delgroups(struct ifnet *ifp)
 {
 	struct ifg_list *ifgl;
 	char groupname[IFNAMSIZ];
 
 	IFNET_WLOCK();
 	while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) {
 		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
 		_if_delgroup_locked(ifp, ifgl, groupname);
 		IFNET_WLOCK();
 	}
 	IFNET_WUNLOCK();
 }
 
 static char *
 ifgr_group_get(void *ifgrp)
 {
 	union ifgroupreq_union *ifgrup;
 
 	ifgrup = ifgrp;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]);
 #endif
 	return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]);
 }
 
 static struct ifg_req *
 ifgr_groups_get(void *ifgrp)
 {
 	union ifgroupreq_union *ifgrup;
 
 	ifgrup = ifgrp;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((struct ifg_req *)(uintptr_t)
 		    ifgrup->ifgr32.ifgr_ifgru.ifgru_groups);
 #endif
 	return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups);
 }
 
 /*
  * Stores all groups from an interface in memory pointed to by ifgr.
  */
 static int
 if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
 {
 	int			 len, error;
 	struct ifg_list		*ifgl;
 	struct ifg_req		 ifgrq, *ifgp;
 
 	NET_EPOCH_ASSERT();
 
 	if (ifgr->ifgr_len == 0) {
 		CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 			ifgr->ifgr_len += sizeof(struct ifg_req);
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr_groups_get(ifgr);
 	/* XXX: wire */
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 		if (len < sizeof(ifgrq))
 			return (EINVAL);
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
 		    sizeof(ifgrq.ifgrq_group));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req))))
 			return (error);
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 
 	return (0);
 }
 
 /*
  * Stores all members of a group in memory pointed to by igfr
  */
 static int
 if_getgroupmembers(struct ifgroupreq *ifgr)
 {
 	struct ifg_group	*ifg;
 	struct ifg_member	*ifgm;
 	struct ifg_req		 ifgrq, *ifgp;
 	int			 len, error;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 		if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
 			break;
 	if (ifg == NULL) {
 		IFNET_RUNLOCK();
 		return (ENOENT);
 	}
 
 	if (ifgr->ifgr_len == 0) {
 		CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
 			ifgr->ifgr_len += sizeof(ifgrq);
 		IFNET_RUNLOCK();
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr_groups_get(ifgr);
 	CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
 		if (len < sizeof(ifgrq)) {
 			IFNET_RUNLOCK();
 			return (EINVAL);
 		}
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
 		    sizeof(ifgrq.ifgrq_member));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 			IFNET_RUNLOCK();
 			return (error);
 		}
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 	IFNET_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Return counter values from counter(9)s stored in ifnet.
  */
 uint64_t
 if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
 {
 
 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
 	return (counter_u64_fetch(ifp->if_counters[cnt]));
 }
 
 /*
  * Increase an ifnet counter. Usually used for counters shared
  * between the stack and a driver, but function supports them all.
  */
 void
 if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
 {
 
 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
 	counter_u64_add(ifp->if_counters[cnt], inc);
 }
 
 /*
  * Copy data from ifnet to userland API structure if_data.
  */
 void
 if_data_copy(struct ifnet *ifp, struct if_data *ifd)
 {
 
 	ifd->ifi_type = ifp->if_type;
 	ifd->ifi_physical = 0;
 	ifd->ifi_addrlen = ifp->if_addrlen;
 	ifd->ifi_hdrlen = ifp->if_hdrlen;
 	ifd->ifi_link_state = ifp->if_link_state;
 	ifd->ifi_vhid = 0;
 	ifd->ifi_datalen = sizeof(struct if_data);
 	ifd->ifi_mtu = ifp->if_mtu;
 	ifd->ifi_metric = ifp->if_metric;
 	ifd->ifi_baudrate = ifp->if_baudrate;
 	ifd->ifi_hwassist = ifp->if_hwassist;
 	ifd->ifi_epoch = ifp->if_epoch;
 	ifd->ifi_lastchange = ifp->if_lastchange;
 
 	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
 	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
 	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
 	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
 	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
 	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
 	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
 	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
 	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
 	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
 	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
 	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
 }
 
 /*
  * Initialization, destruction and refcounting functions for ifaddrs.
  */
 struct ifaddr *
 ifa_alloc(size_t size, int flags)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(size >= sizeof(struct ifaddr),
 	    ("%s: invalid size %zu", __func__, size));
 
 	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
 	if (ifa == NULL)
 		return (NULL);
 
 	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 
 	refcount_init(&ifa->ifa_refcnt, 1);
 
 	return (ifa);
 
 fail:
 	/* free(NULL) is okay */
 	counter_u64_free(ifa->ifa_opackets);
 	counter_u64_free(ifa->ifa_ipackets);
 	counter_u64_free(ifa->ifa_obytes);
 	counter_u64_free(ifa->ifa_ibytes);
 	free(ifa, M_IFADDR);
 
 	return (NULL);
 }
 
 void
 ifa_ref(struct ifaddr *ifa)
 {
 
 	refcount_acquire(&ifa->ifa_refcnt);
 }
 
 static void
 ifa_destroy(epoch_context_t ctx)
 {
 	struct ifaddr *ifa;
 
 	ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx);
 	counter_u64_free(ifa->ifa_opackets);
 	counter_u64_free(ifa->ifa_ipackets);
 	counter_u64_free(ifa->ifa_obytes);
 	counter_u64_free(ifa->ifa_ibytes);
 	free(ifa, M_IFADDR);
 }
 
 void
 ifa_free(struct ifaddr *ifa)
 {
 
 	if (refcount_release(&ifa->ifa_refcnt))
 		NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx);
 }
 
 
 static int
 ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
     struct sockaddr *ia)
 {
 	struct epoch_tracker et;
 	int error;
 	struct rt_addrinfo info;
 	struct sockaddr_dl null_sdl;
 	struct ifnet *ifp;
 	struct ifaddr *rti_ifa = NULL;
 
 	ifp = ifa->ifa_ifp;
 
 	bzero(&info, sizeof(info));
 	if (cmd != RTM_DELETE)
 		info.rti_ifp = V_loif;
 	if (cmd == RTM_ADD) {
 		/* explicitly specify (loopback) ifa */
 		if (info.rti_ifp != NULL) {
 			NET_EPOCH_ENTER(et);
 			rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
 			if (rti_ifa != NULL)
 				ifa_ref(rti_ifa);
 			info.rti_ifa = rti_ifa;
 			NET_EPOCH_EXIT(et);
 		}
 	}
 	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
 	info.rti_info[RTAX_DST] = ia;
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
 	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
 
 	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
 
 	if (rti_ifa != NULL)
 		ifa_free(rti_ifa);
 
 	if (error == 0 ||
 	    (cmd == RTM_ADD && error == EEXIST) ||
 	    (cmd == RTM_DELETE && (error == ENOENT || error == ESRCH)))
 		return (error);
 
 	log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
 		__func__, otype, if_name(ifp), error);
 
 	return (error);
 }
 
 int
 ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 {
 
 	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
 }
 
 int
 ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 {
 
 	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
 }
 
 int
 ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 {
 
 	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
 }
 
 /*
  * XXX: Because sockaddr_dl has deeper structure than the sockaddr
  * structs used to represent other address families, it is necessary
  * to perform a different comparison.
  */
 
 #define	sa_dl_equal(a1, a2)	\
 	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
 	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
 	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
 	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
 	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
 
 /*
  * Locate an interface based on a complete address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithaddr(const struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (sa_equal(addr, ifa->ifa_addr)) {
 				goto done;
 			}
 			/* IP6 doesn't have broadcast */
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 int
 ifa_ifwithaddr_check(const struct sockaddr *addr)
 {
 	struct epoch_tracker et;
 	int rc;
 
 	NET_EPOCH_ENTER(et);
 	rc = (ifa_ifwithaddr(addr) != NULL);
 	NET_EPOCH_EXIT(et);
 	return (rc);
 }
 
 /*
  * Locate an interface based on the broadcast address.
  */
 /* ARGSUSED */
 struct ifaddr *
 ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Locate the point to point interface with a given destination address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			continue;
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (ifa->ifa_dstaddr != NULL &&
 			    sa_equal(addr, ifa->ifa_dstaddr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Find an interface on a specific network.  If many, choice
  * is most specific found.
  */
 struct ifaddr *
 ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
 	const char *addr_data = addr->sa_data, *cplim;
 
 	NET_EPOCH_ASSERT();
 	/*
 	 * AF_LINK addresses can be looked up directly by their index number,
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
 	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
 	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
 		return (ifaddr_byindex(sdl->sdl_index));
 	}
 
 	/*
 	 * Scan though each interface, looking for ones that have addresses
 	 * in this address family and the requested fib.
 	 */
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			const char *cp, *cp2, *cp3;
 
 			if (ifa->ifa_addr->sa_family != af)
 next:				continue;
 			if (af == AF_INET && 
 			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
 				/*
 				 * This is a bit broken as it doesn't
 				 * take into account that the remote end may
 				 * be a single node in the network we are
 				 * looking for.
 				 * The trouble is that we don't know the
 				 * netmask for the remote end.
 				 */
 				if (ifa->ifa_dstaddr != NULL &&
 				    sa_equal(addr, ifa->ifa_dstaddr)) {
 					goto done;
 				}
 			} else {
 				/*
 				 * Scan all the bits in the ifa's address.
 				 * If a bit dissagrees with what we are
 				 * looking for, mask it with the netmask
 				 * to see if it really matters.
 				 * (A byte at a time)
 				 */
 				if (ifa->ifa_netmask == 0)
 					continue;
 				cp = addr_data;
 				cp2 = ifa->ifa_addr->sa_data;
 				cp3 = ifa->ifa_netmask->sa_data;
 				cplim = ifa->ifa_netmask->sa_len
 					+ (char *)ifa->ifa_netmask;
 				while (cp3 < cplim)
 					if ((*cp++ ^ *cp2++) & *cp3++)
 						goto next; /* next address! */
 				/*
 				 * If the netmask of what we just found
 				 * is more specific than what we had before
 				 * (if we had one), or if the virtual status
 				 * of new prefix is better than of the old one,
 				 * then remember the new one before continuing
 				 * to search for an even better one.
 				 */
 				if (ifa_maybe == NULL ||
 				    ifa_preferred(ifa_maybe, ifa) ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask)) {
 					ifa_maybe = ifa;
 				}
 			}
 		}
 	}
 	ifa = ifa_maybe;
 	ifa_maybe = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Find an interface address specific to an interface best matching
  * a given address.
  */
 struct ifaddr *
 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	const char *cp, *cp2, *cp3;
 	char *cplim;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
 
 	if (af >= AF_MAX)
 		return (NULL);
 
 	NET_EPOCH_ASSERT();
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != af)
 			continue;
 		if (ifa_maybe == NULL)
 			ifa_maybe = ifa;
 		if (ifa->ifa_netmask == 0) {
 			if (sa_equal(addr, ifa->ifa_addr) ||
 			    (ifa->ifa_dstaddr &&
 			    sa_equal(addr, ifa->ifa_dstaddr)))
 				goto done;
 			continue;
 		}
 		if (ifp->if_flags & IFF_POINTOPOINT) {
 			if (sa_equal(addr, ifa->ifa_dstaddr))
 				goto done;
 		} else {
 			cp = addr->sa_data;
 			cp2 = ifa->ifa_addr->sa_data;
 			cp3 = ifa->ifa_netmask->sa_data;
 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 			for (; cp3 < cplim; cp3++)
 				if ((*cp++ ^ *cp2++) & *cp3)
 					break;
 			if (cp3 == cplim)
 				goto done;
 		}
 	}
 	ifa = ifa_maybe;
 done:
 	return (ifa);
 }
 
 /*
  * See whether new ifa is better than current one:
  * 1) A non-virtual one is preferred over virtual.
  * 2) A virtual in master state preferred over any other state.
  *
  * Used in several address selecting functions.
  */
 int
 ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
 {
 
 	return (cur->ifa_carp && (!next->ifa_carp ||
 	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
 }
 
 struct sockaddr_dl *
 link_alloc_sdl(size_t size, int flags)
 {
 
 	return (malloc(size, M_TEMP, flags));
 }
 
 void
 link_free_sdl(struct sockaddr *sa)
 {
 	free(sa, M_TEMP);
 }
 
 /*
  * Fills in given sdl with interface basic info.
  * Returns pointer to filled sdl.
  */
 struct sockaddr_dl *
 link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)paddr;
 	memset(sdl, 0, sizeof(struct sockaddr_dl));
 	sdl->sdl_len = sizeof(struct sockaddr_dl);
 	sdl->sdl_family = AF_LINK;
 	sdl->sdl_index = ifp->if_index;
 	sdl->sdl_type = iftype;
 
 	return (sdl);
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  */
 static void
 if_unroute(struct ifnet *ifp, int flag, int fam)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
 
 	ifp->if_flags &= ~flag;
 	getmicrotime(&ifp->if_lastchange);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
 	ifp->if_qflush(ifp);
 
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	rt_ifmsg(ifp);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  */
 static void
 if_route(struct ifnet *ifp, int flag, int fam)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
 
 	ifp->if_flags |= flag;
 	getmicrotime(&ifp->if_lastchange);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	rt_ifmsg(ifp);
 #ifdef INET6
 	in6_if_up(ifp);
 #endif
 }
 
 void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
 void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
 struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
 struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
 int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
 int	(*vlan_pcp_p)(struct ifnet *, uint16_t *);
 int	(*vlan_setcookie_p)(struct ifnet *, void *);
 void	*(*vlan_cookie_p)(struct ifnet *);
 
 /*
  * Handle a change in the interface link state. To avoid LORs
  * between driver lock and upper layer locks, as well as possible
  * recursions, we post event to taskqueue, and all job
  * is done in static do_link_state_change().
  */
 void
 if_link_state_change(struct ifnet *ifp, int link_state)
 {
 	/* Return if state hasn't changed. */
 	if (ifp->if_link_state == link_state)
 		return;
 
 	ifp->if_link_state = link_state;
 
 	/* XXXGL: reference ifp? */
 	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
 }
 
 static void
 do_link_state_change(void *arg, int pending)
 {
 	struct ifnet *ifp;
 	int link_state;
 
 	ifp = arg;
 	link_state = ifp->if_link_state;
 
 	CURVNET_SET(ifp->if_vnet);
 	rt_ifmsg(ifp);
 	if (ifp->if_vlantrunk != NULL)
 		(*vlan_link_state_p)(ifp);
 
 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
 	    ifp->if_l2com != NULL)
 		(*ng_ether_link_state_p)(ifp, link_state);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	if (ifp->if_bridge)
 		ifp->if_bridge_linkstate(ifp);
 	if (ifp->if_lagg)
 		(*lagg_linkstate_p)(ifp, link_state);
 
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname,
 		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
 		    NULL);
 	if (pending > 1)
 		if_printf(ifp, "%d link states coalesced\n", pending);
 	if (log_link_state_change)
 		if_printf(ifp, "link state changed to %s\n",
 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
 	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
 	CURVNET_RESTORE();
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  */
 void
 if_down(struct ifnet *ifp)
 {
 
 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  */
 void
 if_up(struct ifnet *ifp)
 {
 
 	if_route(ifp, IFF_UP, AF_UNSPEC);
 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
 }
 
 /*
  * Flush an interface queue.
  */
 void
 if_qflush(struct ifnet *ifp)
 {
 	struct mbuf *m, *n;
 	struct ifaltq *ifq;
 	
 	ifq = &ifp->if_snd;
 	IFQ_LOCK(ifq);
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(ifq))
 		ALTQ_PURGE(ifq);
 #endif
 	n = ifq->ifq_head;
 	while ((m = n) != NULL) {
 		n = m->m_nextpkt;
 		m_freem(m);
 	}
 	ifq->ifq_head = 0;
 	ifq->ifq_tail = 0;
 	ifq->ifq_len = 0;
 	IFQ_UNLOCK(ifq);
 }
 
 /*
  * Map interface name to interface structure pointer, with or without
  * returning a reference.
  */
 struct ifnet *
 ifunit_ref(const char *name)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
 		    !(ifp->if_flags & IFF_DYING))
 			break;
 	}
 	if (ifp != NULL)
 		if_ref(ifp);
 	NET_EPOCH_EXIT(et);
 	return (ifp);
 }
 
 struct ifnet *
 ifunit(const char *name)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
 			break;
 	}
 	NET_EPOCH_EXIT(et);
 	return (ifp);
 }
 
 static void *
 ifr_buffer_get_buffer(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((void *)(uintptr_t)
 		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
 #endif
 	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
 }
 
 static void
 ifr_buffer_set_buffer_null(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
 	else
 #endif
 		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
 }
 
 static size_t
 ifr_buffer_get_length(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
 #endif
 	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
 }
 
 static void
 ifr_buffer_set_length(void *data, size_t len)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
 	else
 #endif
 		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
 }
 
 void *
 ifr_data_get_ptr(void *ifrp)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = ifrp;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((void *)(uintptr_t)
 		    ifrup->ifr32.ifr_ifru.ifru_data);
 #endif
 		return (ifrup->ifr.ifr_ifru.ifru_data);
 }
 
 /*
  * Hardware specific interface ioctls.
  */
 int
 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 {
 	struct ifreq *ifr;
 	int error = 0, do_ifup = 0;
 	int new_flags, temp_flags;
 	size_t namelen, onamelen;
 	size_t descrlen;
 	char *descrbuf, *odescrbuf;
 	char new_name[IFNAMSIZ];
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 	case SIOCGIFINDEX:
 		ifr->ifr_index = ifp->if_index;
 		break;
 
 	case SIOCGIFFLAGS:
 		temp_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifr->ifr_flags = temp_flags & 0xffff;
 		ifr->ifr_flagshigh = temp_flags >> 16;
 		break;
 
 	case SIOCGIFCAP:
 		ifr->ifr_reqcap = ifp->if_capabilities;
 		ifr->ifr_curcap = ifp->if_capenable;
 		break;
 
 #ifdef MAC
 	case SIOCGIFMAC:
 		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCGIFMETRIC:
 		ifr->ifr_metric = ifp->if_metric;
 		break;
 
 	case SIOCGIFMTU:
 		ifr->ifr_mtu = ifp->if_mtu;
 		break;
 
 	case SIOCGIFPHYS:
 		/* XXXGL: did this ever worked? */
 		ifr->ifr_phys = 0;
 		break;
 
 	case SIOCGIFDESCR:
 		error = 0;
 		sx_slock(&ifdescr_sx);
 		if (ifp->if_description == NULL)
 			error = ENOMSG;
 		else {
 			/* space for terminating nul */
 			descrlen = strlen(ifp->if_description) + 1;
 			if (ifr_buffer_get_length(ifr) < descrlen)
 				ifr_buffer_set_buffer_null(ifr);
 			else
 				error = copyout(ifp->if_description,
 				    ifr_buffer_get_buffer(ifr), descrlen);
 			ifr_buffer_set_length(ifr, descrlen);
 		}
 		sx_sunlock(&ifdescr_sx);
 		break;
 
 	case SIOCSIFDESCR:
 		error = priv_check(td, PRIV_NET_SETIFDESCR);
 		if (error)
 			return (error);
 
 		/*
 		 * Copy only (length-1) bytes to make sure that
 		 * if_description is always nul terminated.  The
 		 * length parameter is supposed to count the
 		 * terminating nul in.
 		 */
 		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
 			return (ENAMETOOLONG);
 		else if (ifr_buffer_get_length(ifr) == 0)
 			descrbuf = NULL;
 		else {
 			descrbuf = malloc(ifr_buffer_get_length(ifr),
 			    M_IFDESCR, M_WAITOK | M_ZERO);
 			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
 			    ifr_buffer_get_length(ifr) - 1);
 			if (error) {
 				free(descrbuf, M_IFDESCR);
 				break;
 			}
 		}
 
 		sx_xlock(&ifdescr_sx);
 		odescrbuf = ifp->if_description;
 		ifp->if_description = descrbuf;
 		sx_xunlock(&ifdescr_sx);
 
 		getmicrotime(&ifp->if_lastchange);
 		free(odescrbuf, M_IFDESCR);
 		break;
 
 	case SIOCGIFFIB:
 		ifr->ifr_fib = ifp->if_fib;
 		break;
 
 	case SIOCSIFFIB:
 		error = priv_check(td, PRIV_NET_SETIFFIB);
 		if (error)
 			return (error);
 		if (ifr->ifr_fib >= rt_numfibs)
 			return (EINVAL);
 
 		ifp->if_fib = ifr->ifr_fib;
 		break;
 
 	case SIOCSIFFLAGS:
 		error = priv_check(td, PRIV_NET_SETIFFLAGS);
 		if (error)
 			return (error);
 		/*
 		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
 		 * check, so we don't need special handling here yet.
 		 */
 		new_flags = (ifr->ifr_flags & 0xffff) |
 		    (ifr->ifr_flagshigh << 16);
 		if (ifp->if_flags & IFF_UP &&
 		    (new_flags & IFF_UP) == 0) {
 			if_down(ifp);
 		} else if (new_flags & IFF_UP &&
 		    (ifp->if_flags & IFF_UP) == 0) {
 			do_ifup = 1;
 		}
 		/* See if permanently promiscuous mode bit is about to flip */
 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
 			if (new_flags & IFF_PPROMISC)
 				ifp->if_flags |= IFF_PROMISC;
 			else if (ifp->if_pcount == 0)
 				ifp->if_flags &= ~IFF_PROMISC;
 			if (log_promisc_mode_change)
                                 if_printf(ifp, "permanently promiscuous mode %s\n",
                                     ((new_flags & IFF_PPROMISC) ?
                                      "enabled" : "disabled"));
 		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
 		if (ifp->if_ioctl) {
 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
 		}
 		if (do_ifup)
 			if_up(ifp);
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFCAP:
 		error = priv_check(td, PRIV_NET_SETIFCAP);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
 			return (EINVAL);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 #ifdef MAC
 	case SIOCSIFMAC:
 		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCSIFNAME:
 		error = priv_check(td, PRIV_NET_SETIFNAME);
 		if (error)
 			return (error);
 		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
 		    NULL);
 		if (error != 0)
 			return (error);
 		if (new_name[0] == '\0')
 			return (EINVAL);
 		if (new_name[IFNAMSIZ-1] != '\0') {
 			new_name[IFNAMSIZ-1] = '\0';
 			if (strlen(new_name) == IFNAMSIZ-1)
 				return (EINVAL);
 		}
 		if (strcmp(new_name, ifp->if_xname) == 0)
 			break;
 		if (ifunit(new_name) != NULL)
 			return (EEXIST);
 
 		/*
 		 * XXX: Locking.  Nothing else seems to lock if_flags,
 		 * and there are numerous other races with the
 		 * ifunit() checks not being atomic with namespace
 		 * changes (renames, vmoves, if_attach, etc).
 		 */
 		ifp->if_flags |= IFF_RENAMING;
 		
 		/* Announce the departure of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 
 		if_printf(ifp, "changing name to '%s'\n", new_name);
 
 		IF_ADDR_WLOCK(ifp);
 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
 		ifa = ifp->if_addr;
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		namelen = strlen(new_name);
 		onamelen = sdl->sdl_nlen;
 		/*
 		 * Move the address if needed.  This is safe because we
 		 * allocate space for a name of length IFNAMSIZ when we
 		 * create this in if_attach().
 		 */
 		if (namelen != onamelen) {
 			bcopy(sdl->sdl_data + onamelen,
 			    sdl->sdl_data + namelen, sdl->sdl_alen);
 		}
 		bcopy(new_name, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
 		bzero(sdl->sdl_data, onamelen);
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		IF_ADDR_WUNLOCK(ifp);
 
 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 		/* Announce the return of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 
 		ifp->if_flags &= ~IFF_RENAMING;
 		break;
 
 #ifdef VIMAGE
 	case SIOCSIFVNET:
 		error = priv_check(td, PRIV_NET_SETIFVNET);
 		if (error)
 			return (error);
 		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
 		break;
 #endif
 
 	case SIOCSIFMETRIC:
 		error = priv_check(td, PRIV_NET_SETIFMETRIC);
 		if (error)
 			return (error);
 		ifp->if_metric = ifr->ifr_metric;
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYS:
 		error = priv_check(td, PRIV_NET_SETIFPHYS);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFMTU:
 	{
 		u_long oldmtu = ifp->if_mtu;
 
 		error = priv_check(td, PRIV_NET_SETIFMTU);
 		if (error)
 			return (error);
 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
 			return (EINVAL);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0) {
 			getmicrotime(&ifp->if_lastchange);
 			rt_ifmsg(ifp);
 #ifdef INET
 			DEBUGNET_NOTIFY_MTU(ifp);
 #endif
 		}
 		/*
 		 * If the link MTU changed, do network layer specific procedure.
 		 */
 		if (ifp->if_mtu != oldmtu) {
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
 			rt_updatemtu(ifp);
 		}
 		break;
 	}
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (cmd == SIOCADDMULTI)
 			error = priv_check(td, PRIV_NET_ADDMULTI);
 		else
 			error = priv_check(td, PRIV_NET_DELMULTI);
 		if (error)
 			return (error);
 
 		/* Don't allow group membership on non-multicast interfaces. */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
 			return (EOPNOTSUPP);
 
 		/* Don't let users screw up protocols' entries. */
 		if (ifr->ifr_addr.sa_family != AF_LINK)
 			return (EINVAL);
 
 		if (cmd == SIOCADDMULTI) {
 			struct epoch_tracker et;
 			struct ifmultiaddr *ifma;
 
 			/*
 			 * Userland is only permitted to join groups once
 			 * via the if_addmulti() KPI, because it cannot hold
 			 * struct ifmultiaddr * between calls. It may also
 			 * lose a race while we check if the membership
 			 * already exists.
 			 */
 			NET_EPOCH_ENTER(et);
 			ifma = if_findmulti(ifp, &ifr->ifr_addr);
 			NET_EPOCH_EXIT(et);
 			if (ifma != NULL)
 				error = EADDRINUSE;
 			else
 				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 		} else {
 			error = if_delmulti(ifp, &ifr->ifr_addr);
 		}
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYADDR:
 	case SIOCDIFPHYADDR:
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
 	case SIOCSIFMEDIA:
 	case SIOCSIFGENERIC:
 		error = priv_check(td, PRIV_NET_HWIOCTL);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCGIFSTATUS:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 	case SIOCGIFMEDIA:
 	case SIOCGIFXMEDIA:
 	case SIOCGIFGENERIC:
 	case SIOCGIFRSSKEY:
 	case SIOCGIFRSSHASH:
 	case SIOCGIFDOWNREASON:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		break;
 
 	case SIOCSIFLLADDR:
 		error = priv_check(td, PRIV_NET_SETLLADDR);
 		if (error)
 			return (error);
 		error = if_setlladdr(ifp,
 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
 		break;
 
 	case SIOCGHWADDR:
 		error = if_gethwaddr(ifp, ifr);
 		break;
 
 	case CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
 		error = priv_check(td, PRIV_NET_ADDIFGROUP);
 		if (error)
 			return (error);
 		if ((error = if_addgroup(ifp,
 		    ifgr_group_get((struct ifgroupreq *)data))))
 			return (error);
 		break;
 
 	case CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
 	{
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		error = if_getgroup((struct ifgroupreq *)data, ifp);
 		NET_EPOCH_EXIT(et);
 		break;
 	}
 
 	case CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
 		error = priv_check(td, PRIV_NET_DELIFGROUP);
 		if (error)
 			return (error);
 		if ((error = if_delgroup(ifp,
 		    ifgr_group_get((struct ifgroupreq *)data))))
 			return (error);
 		break;
 
 	default:
 		error = ENOIOCTL;
 		break;
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD32
 struct ifconf32 {
 	int32_t	ifc_len;
 	union {
 		uint32_t	ifcu_buf;
 		uint32_t	ifcu_req;
 	} ifc_ifcu;
 };
 #define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
 #endif
 
 #ifdef COMPAT_FREEBSD32
 static void
 ifmr_init(struct ifmediareq *ifmr, caddr_t data)
 {
 	struct ifmediareq32 *ifmr32;
 
 	ifmr32 = (struct ifmediareq32 *)data;
 	memcpy(ifmr->ifm_name, ifmr32->ifm_name,
 	    sizeof(ifmr->ifm_name));
 	ifmr->ifm_current = ifmr32->ifm_current;
 	ifmr->ifm_mask = ifmr32->ifm_mask;
 	ifmr->ifm_status = ifmr32->ifm_status;
 	ifmr->ifm_active = ifmr32->ifm_active;
 	ifmr->ifm_count = ifmr32->ifm_count;
 	ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist;
 }
 
 static void
 ifmr_update(const struct ifmediareq *ifmr, caddr_t data)
 {
 	struct ifmediareq32 *ifmr32;
 
 	ifmr32 = (struct ifmediareq32 *)data;
 	ifmr32->ifm_current = ifmr->ifm_current;
 	ifmr32->ifm_mask = ifmr->ifm_mask;
 	ifmr32->ifm_status = ifmr->ifm_status;
 	ifmr32->ifm_active = ifmr->ifm_active;
 	ifmr32->ifm_count = ifmr->ifm_count;
 }
 #endif
 
 /*
  * Interface ioctls.
  */
 int
 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 {
 #ifdef COMPAT_FREEBSD32
 	caddr_t saved_data = NULL;
 	struct ifmediareq ifmr;
 	struct ifmediareq *ifmrp = NULL;
 #endif
 	struct ifnet *ifp;
 	struct ifreq *ifr;
 	int error;
 	int oif_flags;
 #ifdef VIMAGE
 	bool shutdown;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 #ifdef VIMAGE
 	/* Make sure the VNET is stable. */
 	shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet);
 	if (shutdown) {
 		CURVNET_RESTORE();
 		return (EBUSY);
 	}
 #endif
 
 	switch (cmd) {
 	case SIOCGIFCONF:
 		error = ifconf(cmd, data);
 		goto out_noref;
 
 #ifdef COMPAT_FREEBSD32
 	case SIOCGIFCONF32:
 		{
 			struct ifconf32 *ifc32;
 			struct ifconf ifc;
 
 			ifc32 = (struct ifconf32 *)data;
 			ifc.ifc_len = ifc32->ifc_len;
 			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
 
 			error = ifconf(SIOCGIFCONF, (void *)&ifc);
 			if (error == 0)
 				ifc32->ifc_len = ifc.ifc_len;
 			goto out_noref;
 		}
 #endif
 	}
 
 #ifdef COMPAT_FREEBSD32
 	switch (cmd) {
 	case SIOCGIFMEDIA32:
 	case SIOCGIFXMEDIA32:
 		ifmrp = &ifmr;
 		ifmr_init(ifmrp, data);
 		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
 		saved_data = data;
 		data = (caddr_t)ifmrp;
 	}
 #endif
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 #ifdef VIMAGE
 	case SIOCSIFRVNET:
 		error = priv_check(td, PRIV_NET_SETIFVNET);
 		if (error == 0)
 			error = if_vmove_reclaim(td, ifr->ifr_name,
 			    ifr->ifr_jid);
 		goto out_noref;
 #endif
 	case SIOCIFCREATE:
 	case SIOCIFCREATE2:
 		error = priv_check(td, PRIV_NET_IFCREATE);
 		if (error == 0)
 			error = if_clone_create(ifr->ifr_name,
 			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
 			    ifr_data_get_ptr(ifr) : NULL);
 		goto out_noref;
 	case SIOCIFDESTROY:
 		error = priv_check(td, PRIV_NET_IFDESTROY);
 		if (error == 0)
 			error = if_clone_destroy(ifr->ifr_name);
 		goto out_noref;
 
 	case SIOCIFGCLONERS:
 		error = if_clone_list((struct if_clonereq *)data);
 		goto out_noref;
 
 	case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
 		error = if_getgroupmembers((struct ifgroupreq *)data);
 		goto out_noref;
 
 #if defined(INET) || defined(INET6)
 	case SIOCSVH:
 	case SIOCGVH:
 		if (carp_ioctl_p == NULL)
 			error = EPROTONOSUPPORT;
 		else
 			error = (*carp_ioctl_p)(ifr, cmd, td);
 		goto out_noref;
 #endif
 	}
 
 	ifp = ifunit_ref(ifr->ifr_name);
 	if (ifp == NULL) {
 		error = ENXIO;
 		goto out_noref;
 	}
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error != ENOIOCTL)
 		goto out_ref;
 
 	oif_flags = ifp->if_flags;
 	if (so->so_proto == NULL) {
 		error = EOPNOTSUPP;
 		goto out_ref;
 	}
 
 	/*
 	 * Pass the request on to the socket control method, and if the
 	 * latter returns EOPNOTSUPP, directly to the interface.
 	 *
 	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
 	 * trust SIOCSIFADDR et al to come from an already privileged
 	 * layer, and do not perform any credentials checks or input
 	 * validation.
 	 */
 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
 	    ifp, td));
 	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
 	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
 	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 
 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 #ifdef INET6
 		if (ifp->if_flags & IFF_UP)
 			in6_if_up(ifp);
 #endif
 	}
 
 out_ref:
 	if_rele(ifp);
 out_noref:
 #ifdef COMPAT_FREEBSD32
 	if (ifmrp != NULL) {
 		KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
 		    ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx",
 		     cmd));
 		data = saved_data;
 		ifmr_update(ifmrp, data);
 	}
 #endif
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * The code common to handling reference counted flags,
  * e.g., in ifpromisc() and if_allmulti().
  * The "pflag" argument can specify a permanent mode flag to check,
  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
  *
  * Only to be used on stack-owned flags, not driver-owned flags.
  */
 static int
 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
 {
 	struct ifreq ifr;
 	int error;
 	int oldflags, oldcount;
 
 	/* Sanity checks to catch programming errors */
 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
 	    ("%s: setting driver-owned flag %d", __func__, flag));
 
 	if (onswitch)
 		KASSERT(*refcount >= 0,
 		    ("%s: increment negative refcount %d for flag %d",
 		    __func__, *refcount, flag));
 	else
 		KASSERT(*refcount > 0,
 		    ("%s: decrement non-positive refcount %d for flag %d",
 		    __func__, *refcount, flag));
 
 	/* In case this mode is permanent, just touch refcount */
 	if (ifp->if_flags & pflag) {
 		*refcount += onswitch ? 1 : -1;
 		return (0);
 	}
 
 	/* Save ifnet parameters for if_ioctl() may fail */
 	oldcount = *refcount;
 	oldflags = ifp->if_flags;
 	
 	/*
 	 * See if we aren't the only and touching refcount is enough.
 	 * Actually toggle interface flag if we are the first or last.
 	 */
 	if (onswitch) {
 		if ((*refcount)++)
 			return (0);
 		ifp->if_flags |= flag;
 	} else {
 		if (--(*refcount))
 			return (0);
 		ifp->if_flags &= ~flag;
 	}
 
 	/* Call down the driver since we've changed interface flags */
 	if (ifp->if_ioctl == NULL) {
 		error = EOPNOTSUPP;
 		goto recover;
 	}
 	ifr.ifr_flags = ifp->if_flags & 0xffff;
 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 	if (error)
 		goto recover;
 	/* Notify userland that interface flags have changed */
 	rt_ifmsg(ifp);
 	return (0);
 
 recover:
 	/* Recover after driver error */
 	*refcount = oldcount;
 	ifp->if_flags = oldflags;
 	return (error);
 }
 
 /*
  * Set/clear promiscuous mode on interface ifp based on the truth value
  * of pswitch.  The calls are reference counted so that only the first
  * "on" request actually has an effect, as does the final "off" request.
  * Results are undefined if the "off" and "on" requests are not matched.
  */
 int
 ifpromisc(struct ifnet *ifp, int pswitch)
 {
 	int error;
 	int oldflags = ifp->if_flags;
 
 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
 			   &ifp->if_pcount, pswitch);
 	/* If promiscuous mode status has changed, log a message */
 	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
             log_promisc_mode_change)
 		if_printf(ifp, "promiscuous mode %s\n",
 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
 	return (error);
 }
 
 /*
  * Return interface configuration
  * of system.  List may be used
  * in later ioctl's (above) to get
  * other information.
  */
 /*ARGSUSED*/
 static int
 ifconf(u_long cmd, caddr_t data)
 {
 	struct ifconf *ifc = (struct ifconf *)data;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 	struct sbuf *sb;
 	int error, full = 0, valid_len, max_len;
 
 	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
 	max_len = MAXPHYS - 1;
 
 	/* Prevent hostile input from being able to crash the system */
 	if (ifc->ifc_len <= 0)
 		return (EINVAL);
 
 again:
 	if (ifc->ifc_len <= max_len) {
 		max_len = ifc->ifc_len;
 		full = 1;
 	}
 	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
 	max_len = 0;
 	valid_len = 0;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		struct epoch_tracker et;
 		int addrs;
 
 		/*
 		 * Zero the ifr to make sure we don't disclose the contents
 		 * of the stack.
 		 */
 		memset(&ifr, 0, sizeof(ifr));
 
 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
 		    >= sizeof(ifr.ifr_name)) {
 			sbuf_delete(sb);
 			IFNET_RUNLOCK();
 			return (ENAMETOOLONG);
 		}
 
 		addrs = 0;
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa = ifa->ifa_addr;
 
 			if (prison_if(curthread->td_ucred, sa) != 0)
 				continue;
 			addrs++;
 			if (sa->sa_len <= sizeof(*sa)) {
 				if (sa->sa_len < sizeof(*sa)) {
 					memset(&ifr.ifr_ifru.ifru_addr, 0,
 					    sizeof(ifr.ifr_ifru.ifru_addr));
 					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
 					    sa->sa_len);
 				} else
 					ifr.ifr_ifru.ifru_addr = *sa;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
 				max_len += sizeof(ifr);
 			} else {
 				sbuf_bcat(sb, &ifr,
 				    offsetof(struct ifreq, ifr_addr));
 				max_len += offsetof(struct ifreq, ifr_addr);
 				sbuf_bcat(sb, sa, sa->sa_len);
 				max_len += sa->sa_len;
 			}
 
 			if (sbuf_error(sb) == 0)
 				valid_len = sbuf_len(sb);
 		}
 		NET_EPOCH_EXIT(et);
 		if (addrs == 0) {
 			sbuf_bcat(sb, &ifr, sizeof(ifr));
 			max_len += sizeof(ifr);
 
 			if (sbuf_error(sb) == 0)
 				valid_len = sbuf_len(sb);
 		}
 	}
 	IFNET_RUNLOCK();
 
 	/*
 	 * If we didn't allocate enough space (uncommon), try again.  If
 	 * we have already allocated as much space as we are allowed,
 	 * return what we've got.
 	 */
 	if (valid_len != max_len && !full) {
 		sbuf_delete(sb);
 		goto again;
 	}
 
 	ifc->ifc_len = valid_len;
 	sbuf_finish(sb);
 	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
 	sbuf_delete(sb);
 	return (error);
 }
 
 /*
  * Just like ifpromisc(), but for all-multicast-reception mode.
  */
 int
 if_allmulti(struct ifnet *ifp, int onswitch)
 {
 
 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
 }
 
 struct ifmultiaddr *
 if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (sa->sa_family == AF_LINK) {
 			if (sa_dl_equal(ifma->ifma_addr, sa))
 				break;
 		} else {
 			if (sa_equal(ifma->ifma_addr, sa))
 				break;
 		}
 	}
 
 	return ifma;
 }
 
 /*
  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
  * the ifnet multicast address list here, so the caller must do that and
  * other setup work (such as notifying the device driver).  The reference
  * count is initialized to 1.
  */
 static struct ifmultiaddr *
 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
     int mflags)
 {
 	struct ifmultiaddr *ifma;
 	struct sockaddr *dupsa;
 
 	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
 	    M_ZERO);
 	if (ifma == NULL)
 		return (NULL);
 
 	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		free(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(sa, dupsa, sa->sa_len);
 	ifma->ifma_addr = dupsa;
 
 	ifma->ifma_ifp = ifp;
 	ifma->ifma_refcount = 1;
 	ifma->ifma_protospec = NULL;
 
 	if (llsa == NULL) {
 		ifma->ifma_lladdr = NULL;
 		return (ifma);
 	}
 
 	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		free(ifma->ifma_addr, M_IFMADDR);
 		free(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(llsa, dupsa, llsa->sa_len);
 	ifma->ifma_lladdr = dupsa;
 
 	return (ifma);
 }
 
 /*
  * if_freemulti: free ifmultiaddr structure and possibly attached related
  * addresses.  The caller is responsible for implementing reference
  * counting, notifying the driver, handling routing messages, and releasing
  * any dependent link layer state.
  */
 #ifdef MCAST_VERBOSE
 extern void kdb_backtrace(void);
 #endif
 static void
 if_freemulti_internal(struct ifmultiaddr *ifma)
 {
 
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
 	    ifma->ifma_refcount));
 
 	if (ifma->ifma_lladdr != NULL)
 		free(ifma->ifma_lladdr, M_IFMADDR);
 #ifdef MCAST_VERBOSE
 	kdb_backtrace();
 	printf("%s freeing ifma: %p\n", __func__, ifma);
 #endif
 	free(ifma->ifma_addr, M_IFMADDR);
 	free(ifma, M_IFMADDR);
 }
 
 static void
 if_destroymulti(epoch_context_t ctx)
 {
 	struct ifmultiaddr *ifma;
 
 	ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx);
 	if_freemulti_internal(ifma);
 }
 
 void
 if_freemulti(struct ifmultiaddr *ifma)
 {
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d",
 	    ifma->ifma_refcount));
 
 	NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx);
 }
 
 
 /*
  * Register an additional multicast address with a network interface.
  *
  * - If the address is already present, bump the reference count on the
  *   address and return.
  * - If the address is not link-layer, look up a link layer address.
  * - Allocate address structures for one or both addresses, and attach to the
  *   multicast address list on the interface.  If automatically adding a link
  *   layer address, the protocol address will own a reference to the link
  *   layer address, to be freed when it is freed.
  * - Notify the network device driver of an addition to the multicast address
  *   list.
  *
  * 'sa' points to caller-owned memory with the desired multicast address.
  *
  * 'retifma' will be used to return a pointer to the resulting multicast
  * address reference, if desired.
  */
 int
 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
     struct ifmultiaddr **retifma)
 {
 	struct ifmultiaddr *ifma, *ll_ifma;
 	struct sockaddr *llsa;
 	struct sockaddr_dl sdl;
 	int error;
 
 #ifdef INET
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 #ifdef INET6
 	IN6_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 	/*
 	 * If the address is already present, return a new reference to it;
 	 * otherwise, allocate storage and set up a new address.
 	 */
 	IF_ADDR_WLOCK(ifp);
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL) {
 		ifma->ifma_refcount++;
 		if (retifma != NULL)
 			*retifma = ifma;
 		IF_ADDR_WUNLOCK(ifp);
 		return (0);
 	}
 
 	/*
 	 * The address isn't already present; resolve the protocol address
 	 * into a link layer address, and then look that up, bump its
 	 * refcount or allocate an ifma for that also.
 	 * Most link layer resolving functions returns address data which
 	 * fits inside default sockaddr_dl structure. However callback
 	 * can allocate another sockaddr structure, in that case we need to
 	 * free it later.
 	 */
 	llsa = NULL;
 	ll_ifma = NULL;
 	if (ifp->if_resolvemulti != NULL) {
 		/* Provide called function with buffer size information */
 		sdl.sdl_len = sizeof(sdl);
 		llsa = (struct sockaddr *)&sdl;
 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
 		if (error)
 			goto unlock_out;
 	}
 
 	/*
 	 * Allocate the new address.  Don't hook it up yet, as we may also
 	 * need to allocate a link layer multicast address.
 	 */
 	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
 	if (ifma == NULL) {
 		error = ENOMEM;
 		goto free_llsa_out;
 	}
 
 	/*
 	 * If a link layer address is found, we'll need to see if it's
 	 * already present in the address list, or allocate is as well.
 	 * When this block finishes, the link layer address will be on the
 	 * list.
 	 */
 	if (llsa != NULL) {
 		ll_ifma = if_findmulti(ifp, llsa);
 		if (ll_ifma == NULL) {
 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
 			if (ll_ifma == NULL) {
 				--ifma->ifma_refcount;
 				if_freemulti(ifma);
 				error = ENOMEM;
 				goto free_llsa_out;
 			}
 			ll_ifma->ifma_flags |= IFMA_F_ENQUEUED;
 			CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
 			    ifma_link);
 		} else
 			ll_ifma->ifma_refcount++;
 		ifma->ifma_llifma = ll_ifma;
 	}
 
 	/*
 	 * We now have a new multicast address, ifma, and possibly a new or
 	 * referenced link layer address.  Add the primary address to the
 	 * ifnet address list.
 	 */
 	ifma->ifma_flags |= IFMA_F_ENQUEUED;
 	CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 
 	if (retifma != NULL)
 		*retifma = ifma;
 
 	/*
 	 * Must generate the message while holding the lock so that 'ifma'
 	 * pointer is still valid.
 	 */
 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 	IF_ADDR_WUNLOCK(ifp);
 
 	/*
 	 * We are certain we have added something, so call down to the
 	 * interface to let them know about it.
 	 */
 	if (ifp->if_ioctl != NULL) {
 		if (THREAD_CAN_SLEEP())
 			(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 		else
 			taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask);
 	}
 
 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 		link_free_sdl(llsa);
 
 	return (0);
 
 free_llsa_out:
 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 		link_free_sdl(llsa);
 
 unlock_out:
 	IF_ADDR_WUNLOCK(ifp);
 	return (error);
 }
 
 static void
 if_siocaddmulti(void *arg, int pending)
 {
 	struct ifnet *ifp;
 
 	ifp = arg;
 #ifdef DIAGNOSTIC
 	if (pending > 1)
 		if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending);
 #endif
 	CURVNET_SET(ifp->if_vnet);
 	(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 	CURVNET_RESTORE();
 }
 
 /*
  * Delete a multicast group membership by network-layer group address.
  *
  * Returns ENOENT if the entry could not be found. If ifp no longer
  * exists, results are undefined. This entry point should only be used
  * from subsystems which do appropriate locking to hold ifp for the
  * duration of the call.
  * Network-layer protocol domains must use if_delmulti_ifma().
  */
 int
 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 	int lastref;
 
 	KASSERT(ifp, ("%s: NULL ifp", __func__));
 
 	IF_ADDR_WLOCK(ifp);
 	lastref = 0;
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL)
 		lastref = if_delmulti_locked(ifp, ifma, 0);
 	IF_ADDR_WUNLOCK(ifp);
 
 	if (ifma == NULL)
 		return (ENOENT);
 
 	if (lastref && ifp->if_ioctl != NULL) {
 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 	}
 
 	return (0);
 }
 
 /*
  * Delete all multicast group membership for an interface.
  * Should be used to quickly flush all multicast filters.
  */
 void
 if_delallmulti(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
 	struct ifmultiaddr *next;
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 		if_delmulti_locked(ifp, ifma, 0);
 	IF_ADDR_WUNLOCK(ifp);
 }
 
 void
 if_delmulti_ifma(struct ifmultiaddr *ifma)
 {
 	if_delmulti_ifma_flags(ifma, 0);
 }
 
 /*
  * Delete a multicast group membership by group membership pointer.
  * Network-layer protocol domains must use this routine.
  *
  * It is safe to call this routine if the ifp disappeared.
  */
 void
 if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
 {
 	struct ifnet *ifp;
 	int lastref;
 	MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma);
 #ifdef INET
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 	ifp = ifma->ifma_ifp;
 #ifdef DIAGNOSTIC
 	if (ifp == NULL) {
 		printf("%s: ifma_ifp seems to be detached\n", __func__);
 	} else {
 		struct epoch_tracker et;
 		struct ifnet *oifp;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
 			if (ifp == oifp)
 				break;
 		NET_EPOCH_EXIT(et);
 		if (ifp != oifp)
 			ifp = NULL;
 	}
 #endif
 	/*
 	 * If and only if the ifnet instance exists: Acquire the address lock.
 	 */
 	if (ifp != NULL)
 		IF_ADDR_WLOCK(ifp);
 
 	lastref = if_delmulti_locked(ifp, ifma, flags);
 
 	if (ifp != NULL) {
 		/*
 		 * If and only if the ifnet instance exists:
 		 *  Release the address lock.
 		 *  If the group was left: update the hardware hash filter.
 		 */
 		IF_ADDR_WUNLOCK(ifp);
 		if (lastref && ifp->if_ioctl != NULL) {
 			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 		}
 	}
 }
 
 /*
  * Perform deletion of network-layer and/or link-layer multicast address.
  *
  * Return 0 if the reference count was decremented.
  * Return 1 if the final reference was released, indicating that the
  * hardware hash filter should be reprogrammed.
  */
 static int
 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
 {
 	struct ifmultiaddr *ll_ifma;
 
 	if (ifp != NULL && ifma->ifma_ifp != NULL) {
 		KASSERT(ifma->ifma_ifp == ifp,
 		    ("%s: inconsistent ifp %p", __func__, ifp));
 		IF_ADDR_WLOCK_ASSERT(ifp);
 	}
 
 	ifp = ifma->ifma_ifp;
 	MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : "");
 
 	/*
 	 * If the ifnet is detaching, null out references to ifnet,
 	 * so that upper protocol layers will notice, and not attempt
 	 * to obtain locks for an ifnet which no longer exists. The
 	 * routing socket announcement must happen before the ifnet
 	 * instance is detached from the system.
 	 */
 	if (detaching) {
 #ifdef DIAGNOSTIC
 		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
 #endif
 		/*
 		 * ifp may already be nulled out if we are being reentered
 		 * to delete the ll_ifma.
 		 */
 		if (ifp != NULL) {
 			rt_newmaddrmsg(RTM_DELMADDR, ifma);
 			ifma->ifma_ifp = NULL;
 		}
 	}
 
 	if (--ifma->ifma_refcount > 0)
 		return 0;
 
 	if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) {
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 	}
 	/*
 	 * If this ifma is a network-layer ifma, a link-layer ifma may
 	 * have been associated with it. Release it first if so.
 	 */
 	ll_ifma = ifma->ifma_llifma;
 	if (ll_ifma != NULL) {
 		KASSERT(ifma->ifma_lladdr != NULL,
 		    ("%s: llifma w/o lladdr", __func__));
 		if (detaching)
 			ll_ifma->ifma_ifp = NULL;	/* XXX */
 		if (--ll_ifma->ifma_refcount == 0) {
 			if (ifp != NULL) {
 				if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
 					CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
 						ifma_link);
 					ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 				}
 			}
 			if_freemulti(ll_ifma);
 		}
 	}
 #ifdef INVARIANTS
 	if (ifp) {
 		struct ifmultiaddr *ifmatmp;
 
 		CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link)
 			MPASS(ifma != ifmatmp);
 	}
 #endif
 	if_freemulti(ifma);
 	/*
 	 * The last reference to this instance of struct ifmultiaddr
 	 * was released; the hardware should be notified of this change.
 	 */
 	return 1;
 }
 
 /*
  * Set the link layer address on an interface.
  *
  * At this time we only support certain types of interfaces,
  * and we don't allow the length of the address to change.
  *
  * Set noinline to be dtrace-friendly
  */
 __noinline int
 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 {
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 
 	ifa = ifp->if_addr;
 	if (ifa == NULL)
 		return (EINVAL);
 
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	if (sdl == NULL)
 		return (EINVAL);
 
 	if (len != sdl->sdl_alen)	/* don't allow length to change */
 		return (EINVAL);
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_XETHER:
 	case IFT_L2VLAN:
 	case IFT_BRIDGE:
 	case IFT_IEEE8023ADLAG:
 		bcopy(lladdr, LLADDR(sdl), len);
 		break;
 	default:
 		return (ENODEV);
 	}
 
 	/*
 	 * If the interface is already up, we need
 	 * to re-init it in order to reprogram its
 	 * address filter.
 	 */
 	if ((ifp->if_flags & IFF_UP) != 0) {
 		if (ifp->if_ioctl) {
 			ifp->if_flags &= ~IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 			ifp->if_flags |= IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 		}
 	}
 	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 
 	return (0);
 }
 
 /*
  * Compat function for handling basic encapsulation requests.
  * Not converted stacks (FDDI, IB, ..) supports traditional
  * output model: ARP (and other similar L2 protocols) are handled
  * inside output routine, arpresolve/nd6_resolve() returns MAC
  * address instead of full prepend.
  *
  * This function creates calculated header==MAC for IPv4/IPv6 and
  * returns EAFNOSUPPORT (which is then handled in ARP code) for other
  * address families.
  */
 static int
 if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
 {
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < req->lladdr_len)
 		return (ENOMEM);
 
 	switch (req->family) {
 	case AF_INET:
 	case AF_INET6:
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	/* Copy lladdr to storage as is */
 	memmove(req->buf, req->lladdr, req->lladdr_len);
 	req->bufsize = req->lladdr_len;
 	req->lladdr_off = 0;
 
 	return (0);
 }
 
 /*
  * Tunnel interfaces can nest, also they may cause infinite recursion
  * calls when misconfigured. We'll prevent this by detecting loops.
  * High nesting level may cause stack exhaustion. We'll prevent this
  * by introducing upper limit.
  *
  * Return 0, if tunnel nesting count is equal or less than limit.
  */
 int
 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
     int limit)
 {
 	struct m_tag *mtag;
 	int count;
 
 	count = 1;
 	mtag = NULL;
 	while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
 			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
 			return (EIO);
 		}
 		count++;
 	}
 	if (count > limit) {
 		log(LOG_NOTICE,
 		    "%s: if_output recursively called too many times(%d)\n",
 		    if_name(ifp), count);
 		return (EIO);
 	}
 	mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT);
 	if (mtag == NULL)
 		return (ENOMEM);
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
 	return (0);
 }
 
 /*
  * Get the link layer address that was read from the hardware at attach.
  *
  * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
  * their component interfaces as IFT_IEEE8023ADLAG.
  */
 int
 if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
 {
 
 	if (ifp->if_hw_addr == NULL)
 		return (ENODEV);
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_IEEE8023ADLAG:
 		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
 		return (0);
 	default:
 		return (ENODEV);
 	}
 }
 
 /*
  * The name argument must be a pointer to storage which will last as
  * long as the interface does.  For physical devices, the result of
  * device_get_name(dev) is a good choice and for pseudo-devices a
  * static string works well.
  */
 void
 if_initname(struct ifnet *ifp, const char *name, int unit)
 {
 	ifp->if_dname = name;
 	ifp->if_dunit = unit;
 	if (unit != IF_DUNIT_NONE)
 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
 	else
 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
 }
 
 int
 if_printf(struct ifnet *ifp, const char *fmt, ...)
 {
 	char if_fmt[256];
 	va_list ap;
 
 	snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt);
 	va_start(ap, fmt);
 	vlog(LOG_INFO, if_fmt, ap);
 	va_end(ap);
 	return (0);
 }
 
 void
 if_start(struct ifnet *ifp)
 {
 
 	(*(ifp)->if_start)(ifp);
 }
 
 /*
  * Backwards compatibility interface for drivers 
  * that have not implemented it
  */
 static int
 if_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	int error;
 
 	IFQ_HANDOFF(ifp, m, error);
 	return (error);
 }
 
 static void
 if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
 {
 
 	m_freem(m);
 }
 
 int
 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
 {
 	int active = 0;
 
 	IF_LOCK(ifq);
 	if (_IF_QFULL(ifq)) {
 		IF_UNLOCK(ifq);
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		m_freem(m);
 		return (0);
 	}
 	if (ifp != NULL) {
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
 		if (m->m_flags & (M_BCAST|M_MCAST))
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
 	}
 	_IF_ENQUEUE(ifq, m);
 	IF_UNLOCK(ifq);
 	if (ifp != NULL && !active)
 		(*(ifp)->if_start)(ifp);
 	return (1);
 }
 
 void
 if_register_com_alloc(u_char type,
     if_com_alloc_t *a, if_com_free_t *f)
 {
 	
 	KASSERT(if_com_alloc[type] == NULL,
 	    ("if_register_com_alloc: %d already registered", type));
 	KASSERT(if_com_free[type] == NULL,
 	    ("if_register_com_alloc: %d free already registered", type));
 
 	if_com_alloc[type] = a;
 	if_com_free[type] = f;
 }
 
 void
 if_deregister_com_alloc(u_char type)
 {
 	
 	KASSERT(if_com_alloc[type] != NULL,
 	    ("if_deregister_com_alloc: %d not registered", type));
 	KASSERT(if_com_free[type] != NULL,
 	    ("if_deregister_com_alloc: %d free not registered", type));
 	if_com_alloc[type] = NULL;
 	if_com_free[type] = NULL;
 }
 
 /* API for driver access to network stack owned ifnet.*/
 uint64_t
 if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
 {
 	uint64_t oldbrate;
 
 	oldbrate = ifp->if_baudrate;
 	ifp->if_baudrate = baudrate;
 	return (oldbrate);
 }
 
 uint64_t
 if_getbaudrate(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_baudrate);
 }
 
 int
 if_setcapabilities(if_t ifp, int capabilities)
 {
 	((struct ifnet *)ifp)->if_capabilities = capabilities;
 	return (0);
 }
 
 int
 if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
 {
 	((struct ifnet *)ifp)->if_capabilities |= setbit;
 	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
 
 	return (0);
 }
 
 int
 if_getcapabilities(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_capabilities;
 }
 
 int 
 if_setcapenable(if_t ifp, int capabilities)
 {
 	((struct ifnet *)ifp)->if_capenable = capabilities;
 	return (0);
 }
 
 int 
 if_setcapenablebit(if_t ifp, int setcap, int clearcap)
 {
 	if(setcap) 
 		((struct ifnet *)ifp)->if_capenable |= setcap;
 	if(clearcap)
 		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
 
 	return (0);
 }
 
 const char *
 if_getdname(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_dname;
 }
 
 int 
 if_togglecapenable(if_t ifp, int togglecap)
 {
 	((struct ifnet *)ifp)->if_capenable ^= togglecap;
 	return (0);
 }
 
 int
 if_getcapenable(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_capenable;
 }
 
 /*
  * This is largely undesirable because it ties ifnet to a device, but does
  * provide flexiblity for an embedded product vendor. Should be used with
  * the understanding that it violates the interface boundaries, and should be
  * a last resort only.
  */
 int
 if_setdev(if_t ifp, void *dev)
 {
 	return (0);
 }
 
 int
 if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
 {
 	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
 	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
 
 	return (0);
 }
 
 int
 if_getdrvflags(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_drv_flags;
 }
  
 int
 if_setdrvflags(if_t ifp, int flags)
 {
 	((struct ifnet *)ifp)->if_drv_flags = flags;
 	return (0);
 }
 
 
 int
 if_setflags(if_t ifp, int flags)
 {
-	/* XXX Temporary */
-	((struct ifnet *)ifp)->if_flags = flags | IFF_NEEDSEPOCH;
+
+	ifp->if_flags = flags;
 	return (0);
 }
 
 int
 if_setflagbits(if_t ifp, int set, int clear)
 {
 	((struct ifnet *)ifp)->if_flags |= set;
 	((struct ifnet *)ifp)->if_flags &= ~clear;
 
 	return (0);
 }
 
 int
 if_getflags(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_flags;
 }
 
 int
 if_clearhwassist(if_t ifp)
 {
 	((struct ifnet *)ifp)->if_hwassist = 0;
 	return (0);
 }
 
 int
 if_sethwassistbits(if_t ifp, int toset, int toclear)
 {
 	((struct ifnet *)ifp)->if_hwassist |= toset;
 	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
 
 	return (0);
 }
 
 int
 if_sethwassist(if_t ifp, int hwassist_bit)
 {
 	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
 	return (0);
 }
 
 int
 if_gethwassist(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_hwassist;
 }
 
 int
 if_setmtu(if_t ifp, int mtu)
 {
 	((struct ifnet *)ifp)->if_mtu = mtu;
 	return (0);
 }
 
 int
 if_getmtu(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_mtu;
 }
 
 int
 if_getmtu_family(if_t ifp, int family)
 {
 	struct domain *dp;
 
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
 			return (dp->dom_ifmtu((struct ifnet *)ifp));
 	}
 
 	return (((struct ifnet *)ifp)->if_mtu);
 }
 
 /*
  * Methods for drivers to access interface unicast and multicast
  * link level addresses.  Driver shall not know 'struct ifaddr' neither
  * 'struct ifmultiaddr'.
  */
 u_int
 if_lladdr_count(if_t ifp)
 {
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	u_int count;
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_LINK)
 			count++;
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 u_int
 if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
 {
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	u_int count;
 
 	MPASS(cb);
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
 		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr,
 		    count);
 	}
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 u_int
 if_llmaddr_count(if_t ifp)
 {
 	struct epoch_tracker et;
 	struct ifmultiaddr *ifma;
 	int count;
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 		if (ifma->ifma_addr->sa_family == AF_LINK)
 			count++;
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 u_int
 if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
 {
 	struct epoch_tracker et;
 	struct ifmultiaddr *ifma;
 	u_int count;
 
 	MPASS(cb);
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr,
 		    count);
 	}
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 int
 if_setsoftc(if_t ifp, void *softc)
 {
 	((struct ifnet *)ifp)->if_softc = softc;
 	return (0);
 }
 
 void *
 if_getsoftc(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_softc;
 }
 
 void 
 if_setrcvif(struct mbuf *m, if_t ifp)
 {
 
 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
 }
 
 void 
 if_setvtag(struct mbuf *m, uint16_t tag)
 {
 	m->m_pkthdr.ether_vtag = tag;	
 }
 
 uint16_t
 if_getvtag(struct mbuf *m)
 {
 
 	return (m->m_pkthdr.ether_vtag);
 }
 
 int
 if_sendq_empty(if_t ifp)
 {
 	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
 }
 
 struct ifaddr *
 if_getifaddr(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_addr;
 }
 
 int
 if_getamcount(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_amcount;
 }
 
 
 int
 if_setsendqready(if_t ifp)
 {
 	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
 	return (0);
 }
 
 int
 if_setsendqlen(if_t ifp, int tx_desc_count)
 {
 	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
 	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
 
 	return (0);
 }
 
 int
 if_vlantrunkinuse(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
 }
 
 int
 if_input(if_t ifp, struct mbuf* sendmp)
 {
 	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
 	return (0);
 
 }
 
 struct mbuf *
 if_dequeue(if_t ifp)
 {
 	struct mbuf *m;
 	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
 
 	return (m);
 }
 
 int
 if_sendq_prepend(if_t ifp, struct mbuf *m)
 {
 	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
 	return (0);
 }
 
 int
 if_setifheaderlen(if_t ifp, int len)
 {
 	((struct ifnet *)ifp)->if_hdrlen = len;
 	return (0);
 }
 
 caddr_t
 if_getlladdr(if_t ifp)
 {
 	return (IF_LLADDR((struct ifnet *)ifp));
 }
 
 void *
 if_gethandle(u_char type)
 {
 	return (if_alloc(type));
 }
 
 void
 if_bpfmtap(if_t ifh, struct mbuf *m)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 
 	BPF_MTAP(ifp, m);
 }
 
 void
 if_etherbpfmtap(if_t ifh, struct mbuf *m)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 
 	ETHER_BPF_MTAP(ifp, m);
 }
 
 void
 if_vlancap(if_t ifh)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 	VLAN_CAPABILITIES(ifp);
 }
 
 int
 if_sethwtsomax(if_t ifp, u_int if_hw_tsomax)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax;
         return (0);
 }
 
 int
 if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount;
         return (0);
 }
 
 int
 if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize;
         return (0);
 }
 
 u_int
 if_gethwtsomax(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomax);
 }
 
 u_int
 if_gethwtsomaxsegcount(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount);
 }
 
 u_int
 if_gethwtsomaxsegsize(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize);
 }
 
 void
 if_setinitfn(if_t ifp, void (*init_fn)(void *))
 {
 	((struct ifnet *)ifp)->if_init = init_fn;
 }
 
 void
 if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
 {
 	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
 }
 
 void
 if_setstartfn(if_t ifp, void (*start_fn)(if_t))
 {
 	((struct ifnet *)ifp)->if_start = (void *)start_fn;
 }
 
 void
 if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
 {
 	((struct ifnet *)ifp)->if_transmit = start_fn;
 }
 
 void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
 {
 	((struct ifnet *)ifp)->if_qflush = flush_fn;
 	
 }
 
 void
 if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
 {
 
 	ifp->if_get_counter = fn;
 }
 
 /* Revisit these - These are inline functions originally. */
 int
 drbr_inuse_drv(if_t ifh, struct buf_ring *br)
 {
 	return drbr_inuse(ifh, br);
 }
 
 struct mbuf*
 drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
 {
 	return drbr_dequeue(ifh, br);
 }
 
 int
 drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
 {
 	return drbr_needs_enqueue(ifh, br);
 }
 
 int
 drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
 {
 	return drbr_enqueue(ifh, br, m);
 
 }
diff --git a/sys/net/if.h b/sys/net/if.h
index 974074473e1b..1e7430263fc3 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -1,620 +1,620 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if.h	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #ifndef _NET_IF_H_
 #define	_NET_IF_H_
 
 #include <sys/cdefs.h>
 
 #if __BSD_VISIBLE
 /*
  * <net/if.h> does not depend on <sys/time.h> on most other systems.  This
  * helps userland compatibility.  (struct timeval ifi_lastchange)
  * The same holds for <sys/socket.h>.  (struct sockaddr ifru_addr)
  */
 #ifndef _KERNEL
 #include <sys/time.h>
 #include <sys/socket.h>
 #endif
 #endif
 
 /*
  * Length of interface external name, including terminating '\0'.
  * Note: this is the same size as a generic device's external name.
  */
 #define		IF_NAMESIZE	16
 #if __BSD_VISIBLE
 #define		IFNAMSIZ	IF_NAMESIZE
 #define		IF_MAXUNIT	0x7fff	/* historical value */
 #endif
 #if __BSD_VISIBLE
 
 /*
  * Structure used to query names of interface cloners.
  */
 
 struct if_clonereq {
 	int	ifcr_total;		/* total cloners (out) */
 	int	ifcr_count;		/* room for this many in user buffer */
 	char	*ifcr_buffer;		/* buffer for cloner names */
 };
 
 /*
  * Structure describing information about an interface
  * which may be of interest to management entities.
  */
 struct if_data {
 	/* generic interface information */
 	uint8_t	ifi_type;		/* ethernet, tokenring, etc */
 	uint8_t	ifi_physical;		/* e.g., AUI, Thinnet, 10base-T, etc */
 	uint8_t	ifi_addrlen;		/* media address length */
 	uint8_t	ifi_hdrlen;		/* media header length */
 	uint8_t	ifi_link_state;		/* current link state */
 	uint8_t	ifi_vhid;		/* carp vhid */
 	uint16_t	ifi_datalen;	/* length of this data struct */
 	uint32_t	ifi_mtu;	/* maximum transmission unit */
 	uint32_t	ifi_metric;	/* routing metric (external only) */
 	uint64_t	ifi_baudrate;	/* linespeed */
 	/* volatile statistics */
 	uint64_t	ifi_ipackets;	/* packets received on interface */
 	uint64_t	ifi_ierrors;	/* input errors on interface */
 	uint64_t	ifi_opackets;	/* packets sent on interface */
 	uint64_t	ifi_oerrors;	/* output errors on interface */
 	uint64_t	ifi_collisions;	/* collisions on csma interfaces */
 	uint64_t	ifi_ibytes;	/* total number of octets received */
 	uint64_t	ifi_obytes;	/* total number of octets sent */
 	uint64_t	ifi_imcasts;	/* packets received via multicast */
 	uint64_t	ifi_omcasts;	/* packets sent via multicast */
 	uint64_t	ifi_iqdrops;	/* dropped on input */
 	uint64_t	ifi_oqdrops;	/* dropped on output */
 	uint64_t	ifi_noproto;	/* destined for unsupported protocol */
 	uint64_t	ifi_hwassist;	/* HW offload capabilities, see IFCAP */
 
 	/* Unions are here to make sizes MI. */
 	union {				/* uptime at attach or stat reset */
 		time_t		tt;
 		uint64_t	ph;
 	} __ifi_epoch;
 #define	ifi_epoch	__ifi_epoch.tt
 	union {				/* time of last administrative change */
 		struct timeval	tv;
 		struct {
 			uint64_t ph1;
 			uint64_t ph2;
 		} ph;
 	} __ifi_lastchange;
 #define	ifi_lastchange	__ifi_lastchange.tv
 };
 
 /*-
  * Interface flags are of two types: network stack owned flags, and driver
  * owned flags.  Historically, these values were stored in the same ifnet
  * flags field, but with the advent of fine-grained locking, they have been
  * broken out such that the network stack is responsible for synchronizing
  * the stack-owned fields, and the device driver the device-owned fields.
  * Both halves can perform lockless reads of the other half's field, subject
  * to accepting the involved races.
  *
  * Both sets of flags come from the same number space, and should not be
  * permitted to conflict, as they are exposed to user space via a single
  * field.
  *
  * The following symbols identify read and write requirements for fields:
  *
  * (i) if_flags field set by device driver before attach, read-only there
  *     after.
  * (n) if_flags field written only by the network stack, read by either the
  *     stack or driver.
  * (d) if_drv_flags field written only by the device driver, read by either
  *     the stack or driver.
  */
 #define	IFF_UP		0x1		/* (n) interface is up */
 #define	IFF_BROADCAST	0x2		/* (i) broadcast address valid */
 #define	IFF_DEBUG	0x4		/* (n) turn on debugging */
 #define	IFF_LOOPBACK	0x8		/* (i) is a loopback net */
 #define	IFF_POINTOPOINT	0x10		/* (i) is a point-to-point link */
-#define	IFF_NEEDSEPOCH	0x20		/* (i) calls if_input w/o epoch */
+#define	IFF_KNOWSEPOCH	0x20		/* (i) calls if_input in net epoch */
 #define	IFF_DRV_RUNNING	0x40		/* (d) resources allocated */
 #define	IFF_NOARP	0x80		/* (n) no address resolution protocol */
 #define	IFF_PROMISC	0x100		/* (n) receive all packets */
 #define	IFF_ALLMULTI	0x200		/* (n) receive all multicast packets */
 #define	IFF_DRV_OACTIVE	0x400		/* (d) tx hardware queue is full */
 #define	IFF_SIMPLEX	0x800		/* (i) can't hear own transmissions */
 #define	IFF_LINK0	0x1000		/* per link layer defined bit */
 #define	IFF_LINK1	0x2000		/* per link layer defined bit */
 #define	IFF_LINK2	0x4000		/* per link layer defined bit */
 #define	IFF_ALTPHYS	IFF_LINK2	/* use alternate physical connection */
 #define	IFF_MULTICAST	0x8000		/* (i) supports multicast */
 #define	IFF_CANTCONFIG	0x10000		/* (i) unconfigurable using ioctl(2) */
 #define	IFF_PPROMISC	0x20000		/* (n) user-requested promisc mode */
 #define	IFF_MONITOR	0x40000		/* (n) user-requested monitor mode */
 #define	IFF_STATICARP	0x80000		/* (n) static ARP */
 #define	IFF_DYING	0x200000	/* (n) interface is winding down */
 #define	IFF_RENAMING	0x400000	/* (n) interface is being renamed */
 #define	IFF_NOGROUP	0x800000	/* (n) interface is not part of any groups */
 
 
 /*
  * Old names for driver flags so that user space tools can continue to use
  * the old (portable) names.
  */
 #ifndef _KERNEL
 #define	IFF_RUNNING	IFF_DRV_RUNNING
 #define	IFF_OACTIVE	IFF_DRV_OACTIVE
 #endif
 
 /* flags set internally only: */
 #define	IFF_CANTCHANGE \
 	(IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
 	    IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\
-	    IFF_DYING|IFF_CANTCONFIG)
+	    IFF_DYING|IFF_CANTCONFIG|IFF_KNOWSEPOCH)
 
 /*
  * Values for if_link_state.
  */
 #define	LINK_STATE_UNKNOWN	0	/* link invalid/unknown */
 #define	LINK_STATE_DOWN		1	/* link is down */
 #define	LINK_STATE_UP		2	/* link is up */
 
 /*
  * Some convenience macros used for setting ifi_baudrate.
  * XXX 1000 vs. 1024? --thorpej@netbsd.org
  */
 #define	IF_Kbps(x)	((uintmax_t)(x) * 1000)	/* kilobits/sec. */
 #define	IF_Mbps(x)	(IF_Kbps((x) * 1000))	/* megabits/sec. */
 #define	IF_Gbps(x)	(IF_Mbps((x) * 1000))	/* gigabits/sec. */
 
 /*
  * Capabilities that interfaces can advertise.
  *
  * struct ifnet.if_capabilities
  *   contains the optional features & capabilities a particular interface
  *   supports (not only the driver but also the detected hw revision).
  *   Capabilities are defined by IFCAP_* below.
  * struct ifnet.if_capenable
  *   contains the enabled (either by default or through ifconfig) optional
  *   features & capabilities on this interface.
  *   Capabilities are defined by IFCAP_* below.
  * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above
  *   contains the enabled optional feature & capabilites that can be used
  *   individually per packet and are specified in the mbuf pkthdr.csum_flags
  *   field.  IFCAP_* and CSUM_* do not match one to one and CSUM_* may be
  *   more detailed or differenciated than IFCAP_*.
  *   Hwassist features are defined CSUM_* in sys/mbuf.h
  *
  * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl
  * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE.
  * This is not strictly necessary because the common code never
  * changes capabilities, and it is left to the individual driver
  * to do the right thing. However, having the filter here
  * avoids replication of the same code in all individual drivers.
  */
 #define	IFCAP_RXCSUM		0x00001  /* can offload checksum on RX */
 #define	IFCAP_TXCSUM		0x00002  /* can offload checksum on TX */
 #define	IFCAP_NETCONS		0x00004  /* can be a network console */
 #define	IFCAP_VLAN_MTU		0x00008	/* VLAN-compatible MTU */
 #define	IFCAP_VLAN_HWTAGGING	0x00010	/* hardware VLAN tag support */
 #define	IFCAP_JUMBO_MTU		0x00020	/* 9000 byte MTU supported */
 #define	IFCAP_POLLING		0x00040	/* driver supports polling */
 #define	IFCAP_VLAN_HWCSUM	0x00080	/* can do IFCAP_HWCSUM on VLANs */
 #define	IFCAP_TSO4		0x00100	/* can do TCP Segmentation Offload */
 #define	IFCAP_TSO6		0x00200	/* can do TCP6 Segmentation Offload */
 #define	IFCAP_LRO		0x00400	/* can do Large Receive Offload */
 #define	IFCAP_WOL_UCAST		0x00800	/* wake on any unicast frame */
 #define	IFCAP_WOL_MCAST		0x01000	/* wake on any multicast frame */
 #define	IFCAP_WOL_MAGIC		0x02000	/* wake on any Magic Packet */
 #define	IFCAP_TOE4		0x04000	/* interface can offload TCP */
 #define	IFCAP_TOE6		0x08000	/* interface can offload TCP6 */
 #define	IFCAP_VLAN_HWFILTER	0x10000 /* interface hw can filter vlan tag */
 /* 	available		0x20000 */
 #define	IFCAP_VLAN_HWTSO	0x40000 /* can do IFCAP_TSO on VLANs */
 #define	IFCAP_LINKSTATE		0x80000 /* the runtime link state is dynamic */
 #define	IFCAP_NETMAP		0x100000 /* netmap mode supported/enabled */
 #define	IFCAP_RXCSUM_IPV6	0x200000  /* can offload checksum on IPv6 RX */
 #define	IFCAP_TXCSUM_IPV6	0x400000  /* can offload checksum on IPv6 TX */
 #define	IFCAP_HWSTATS		0x800000 /* manages counters internally */
 #define	IFCAP_TXRTLMT		0x1000000 /* hardware supports TX rate limiting */
 #define	IFCAP_HWRXTSTMP		0x2000000 /* hardware rx timestamping */
 #define	IFCAP_NOMAP		0x4000000 /* can TX unmapped mbufs */
 #define	IFCAP_TXTLS4		0x8000000 /* can do TLS encryption and segmentation for TCP */
 #define	IFCAP_TXTLS6		0x10000000 /* can do TLS encryption and segmentation for TCP6 */
 
 #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
 
 #define IFCAP_HWCSUM	(IFCAP_RXCSUM | IFCAP_TXCSUM)
 #define	IFCAP_TSO	(IFCAP_TSO4 | IFCAP_TSO6)
 #define	IFCAP_WOL	(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC)
 #define	IFCAP_TOE	(IFCAP_TOE4 | IFCAP_TOE6)
 #define	IFCAP_TXTLS	(IFCAP_TXTLS4 | IFCAP_TXTLS6)
 
 #define	IFCAP_CANTCHANGE	(IFCAP_NETMAP)
 
 #define	IFQ_MAXLEN	50
 #define	IFNET_SLOWHZ	1		/* granularity is 1 second */
 
 /*
  * Message format for use in obtaining information about interfaces
  * from getkerninfo and the routing socket
  * For the new, extensible interface see struct if_msghdrl below.
  */
 struct if_msghdr {
 	u_short	ifm_msglen;	/* to skip over non-understood messages */
 	u_char	ifm_version;	/* future binary compatibility */
 	u_char	ifm_type;	/* message type */
 	int	ifm_addrs;	/* like rtm_addrs */
 	int	ifm_flags;	/* value of if_flags */
 	u_short	ifm_index;	/* index for associated ifp */
 	u_short	_ifm_spare1;
 	struct	if_data ifm_data;/* statistics and other data about if */
 };
 
 /*
  * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
  * extensible after ifm_data_off or within ifm_data.  Both the if_msghdr and
  * if_data now have a member field detailing the struct length in addition to
  * the routing message length.  Macros are provided to find the start of
  * ifm_data and the start of the socket address strucutres immediately following
  * struct if_msghdrl given a pointer to struct if_msghdrl.
  */
 #define	IF_MSGHDRL_IFM_DATA(_l) \
     (struct if_data *)((char *)(_l) + (_l)->ifm_data_off)
 #define	IF_MSGHDRL_RTA(_l) \
     (void *)((uintptr_t)(_l) + (_l)->ifm_len)
 struct if_msghdrl {
 	u_short	ifm_msglen;	/* to skip over non-understood messages */
 	u_char	ifm_version;	/* future binary compatibility */
 	u_char	ifm_type;	/* message type */
 	int	ifm_addrs;	/* like rtm_addrs */
 	int	ifm_flags;	/* value of if_flags */
 	u_short	ifm_index;	/* index for associated ifp */
 	u_short _ifm_spare1;	/* spare space to grow if_index, see if_var.h */
 	u_short	ifm_len;	/* length of if_msghdrl incl. if_data */
 	u_short	ifm_data_off;	/* offset of if_data from beginning */
 	int	_ifm_spare2;
 	struct	if_data ifm_data;/* statistics and other data about if */
 };
 
 /*
  * Message format for use in obtaining information about interface addresses
  * from getkerninfo and the routing socket
  * For the new, extensible interface see struct ifa_msghdrl below.
  */
 struct ifa_msghdr {
 	u_short	ifam_msglen;	/* to skip over non-understood messages */
 	u_char	ifam_version;	/* future binary compatibility */
 	u_char	ifam_type;	/* message type */
 	int	ifam_addrs;	/* like rtm_addrs */
 	int	ifam_flags;	/* value of ifa_flags */
 	u_short	ifam_index;	/* index for associated ifp */
 	u_short	_ifam_spare1;
 	int	ifam_metric;	/* value of ifa_ifp->if_metric */
 };
 
 /*
  * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
  * extensible after ifam_metric or within ifam_data.  Both the ifa_msghdrl and
  * if_data now have a member field detailing the struct length in addition to
  * the routing message length.  Macros are provided to find the start of
  * ifm_data and the start of the socket address strucutres immediately following
  * struct ifa_msghdrl given a pointer to struct ifa_msghdrl.
  */
 #define	IFA_MSGHDRL_IFAM_DATA(_l) \
     (struct if_data *)((char *)(_l) + (_l)->ifam_data_off)
 #define	IFA_MSGHDRL_RTA(_l) \
     (void *)((uintptr_t)(_l) + (_l)->ifam_len)
 struct ifa_msghdrl {
 	u_short	ifam_msglen;	/* to skip over non-understood messages */
 	u_char	ifam_version;	/* future binary compatibility */
 	u_char	ifam_type;	/* message type */
 	int	ifam_addrs;	/* like rtm_addrs */
 	int	ifam_flags;	/* value of ifa_flags */
 	u_short	ifam_index;	/* index for associated ifp */
 	u_short _ifam_spare1;	/* spare space to grow if_index, see if_var.h */
 	u_short	ifam_len;	/* length of ifa_msghdrl incl. if_data */
 	u_short	ifam_data_off;	/* offset of if_data from beginning */
 	int	ifam_metric;	/* value of ifa_ifp->if_metric */
 	struct	if_data ifam_data;/* statistics and other data about if or
 				 * address */
 };
 
 /*
  * Message format for use in obtaining information about multicast addresses
  * from the routing socket
  */
 struct ifma_msghdr {
 	u_short	ifmam_msglen;	/* to skip over non-understood messages */
 	u_char	ifmam_version;	/* future binary compatibility */
 	u_char	ifmam_type;	/* message type */
 	int	ifmam_addrs;	/* like rtm_addrs */
 	int	ifmam_flags;	/* value of ifa_flags */
 	u_short	ifmam_index;	/* index for associated ifp */
 	u_short	_ifmam_spare1;
 };
 
 /*
  * Message format announcing the arrival or departure of a network interface.
  */
 struct if_announcemsghdr {
 	u_short	ifan_msglen;	/* to skip over non-understood messages */
 	u_char	ifan_version;	/* future binary compatibility */
 	u_char	ifan_type;	/* message type */
 	u_short	ifan_index;	/* index for associated ifp */
 	char	ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */
 	u_short	ifan_what;	/* what type of announcement */
 };
 
 #define	IFAN_ARRIVAL	0	/* interface arrival */
 #define	IFAN_DEPARTURE	1	/* interface departure */
 
 /*
  * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests
  */
 struct ifreq_buffer {
 	size_t	length;
 	void	*buffer;
 };
 
 /*
  * Interface request structure used for socket
  * ioctl's.  All interface ioctl's must have parameter
  * definitions which begin with ifr_name.  The
  * remainder may be interface specific.
  */
 struct	ifreq {
 	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	union {
 		struct	sockaddr ifru_addr;
 		struct	sockaddr ifru_dstaddr;
 		struct	sockaddr ifru_broadaddr;
 		struct	ifreq_buffer ifru_buffer;
 		short	ifru_flags[2];
 		short	ifru_index;
 		int	ifru_jid;
 		int	ifru_metric;
 		int	ifru_mtu;
 		int	ifru_phys;
 		int	ifru_media;
 		caddr_t	ifru_data;
 		int	ifru_cap[2];
 		u_int	ifru_fib;
 		u_char	ifru_vlan_pcp;
 	} ifr_ifru;
 #define	ifr_addr	ifr_ifru.ifru_addr	/* address */
 #define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-to-p link */
 #define	ifr_broadaddr	ifr_ifru.ifru_broadaddr	/* broadcast address */
 #ifndef _KERNEL
 #define	ifr_buffer	ifr_ifru.ifru_buffer	/* user supplied buffer with its length */
 #endif
 #define	ifr_flags	ifr_ifru.ifru_flags[0]	/* flags (low 16 bits) */
 #define	ifr_flagshigh	ifr_ifru.ifru_flags[1]	/* flags (high 16 bits) */
 #define	ifr_jid		ifr_ifru.ifru_jid	/* jail/vnet */
 #define	ifr_metric	ifr_ifru.ifru_metric	/* metric */
 #define	ifr_mtu		ifr_ifru.ifru_mtu	/* mtu */
 #define ifr_phys	ifr_ifru.ifru_phys	/* physical wire */
 #define ifr_media	ifr_ifru.ifru_media	/* physical media */
 #ifndef _KERNEL
 #define	ifr_data	ifr_ifru.ifru_data	/* for use by interface */
 #endif
 #define	ifr_reqcap	ifr_ifru.ifru_cap[0]	/* requested capabilities */
 #define	ifr_curcap	ifr_ifru.ifru_cap[1]	/* current capabilities */
 #define	ifr_index	ifr_ifru.ifru_index	/* interface index */
 #define	ifr_fib		ifr_ifru.ifru_fib	/* interface fib */
 #define	ifr_vlan_pcp	ifr_ifru.ifru_vlan_pcp	/* VLAN priority */
 #define	ifr_lan_pcp	ifr_ifru.ifru_vlan_pcp	/* VLAN priority */
 };
 
 #define	_SIZEOF_ADDR_IFREQ(ifr) \
 	((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \
 	 (sizeof(struct ifreq) - sizeof(struct sockaddr) + \
 	  (ifr).ifr_addr.sa_len) : sizeof(struct ifreq))
 
 struct ifaliasreq {
 	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	struct	sockaddr ifra_addr;
 	struct	sockaddr ifra_broadaddr;
 	struct	sockaddr ifra_mask;
 	int	ifra_vhid;
 };
 
 /* 9.x compat */
 struct oifaliasreq {
 	char	ifra_name[IFNAMSIZ];
 	struct	sockaddr ifra_addr;
 	struct	sockaddr ifra_broadaddr;
 	struct	sockaddr ifra_mask;
 };
 
 struct ifmediareq {
 	char	ifm_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	int	ifm_current;		/* current media options */
 	int	ifm_mask;		/* don't care mask */
 	int	ifm_status;		/* media status */
 	int	ifm_active;		/* active options */
 	int	ifm_count;		/* # entries in ifm_ulist array */
 	int	*ifm_ulist;		/* media words */
 };
 
 struct  ifdrv {
 	char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
 	unsigned long   ifd_cmd;
 	size_t          ifd_len;
 	void            *ifd_data;
 };
 
 /* 
  * Structure used to retrieve aux status data from interfaces.
  * Kernel suppliers to this interface should respect the formatting
  * needed by ifconfig(8): each line starts with a TAB and ends with
  * a newline.  The canonical example to copy and paste is in if_tun.c.
  */
 
 #define	IFSTATMAX	800		/* 10 lines of text */
 struct ifstat {
 	char	ifs_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	char	ascii[IFSTATMAX + 1];
 };
 
 /*
  * Structure used in SIOCGIFCONF request.
  * Used to retrieve interface configuration
  * for machine (useful for programs which
  * must know all networks accessible).
  */
 struct	ifconf {
 	int	ifc_len;		/* size of associated buffer */
 	union {
 		caddr_t	ifcu_buf;
 		struct	ifreq *ifcu_req;
 	} ifc_ifcu;
 #define	ifc_buf	ifc_ifcu.ifcu_buf	/* buffer address */
 #define	ifc_req	ifc_ifcu.ifcu_req	/* array of structures returned */
 };
 
 /*
  * interface groups
  */
 
 #define	IFG_ALL		"all"		/* group contains all interfaces */
 /* XXX: will we implement this? */
 #define	IFG_EGRESS	"egress"	/* if(s) default route(s) point to */
 
 struct ifg_req {
 	union {
 		char			 ifgrqu_group[IFNAMSIZ];
 		char			 ifgrqu_member[IFNAMSIZ];
 	} ifgrq_ifgrqu;
 #define	ifgrq_group	ifgrq_ifgrqu.ifgrqu_group
 #define	ifgrq_member	ifgrq_ifgrqu.ifgrqu_member
 };
 
 /*
  * Used to lookup groups for an interface
  */
 struct ifgroupreq {
 	char	ifgr_name[IFNAMSIZ];
 	u_int	ifgr_len;
 	union {
 		char	ifgru_group[IFNAMSIZ];
 		struct	ifg_req *ifgru_groups;
 	} ifgr_ifgru;
 #ifndef _KERNEL
 #define ifgr_group	ifgr_ifgru.ifgru_group
 #define ifgr_groups	ifgr_ifgru.ifgru_groups
 #endif
 };
 
 /*
  * Structure used to request i2c data
  * from interface transceivers.
  */
 struct ifi2creq {
 	uint8_t dev_addr;	/* i2c address (0xA0, 0xA2) */
 	uint8_t offset;		/* read offset */
 	uint8_t len;		/* read length */
 	uint8_t spare0;
 	uint32_t spare1;
 	uint8_t data[8];	/* read buffer */
 }; 
 
 /*
  * RSS hash.
  */
 
 #define	RSS_FUNC_NONE		0		/* RSS disabled */
 #define	RSS_FUNC_PRIVATE	1		/* non-standard */
 #define	RSS_FUNC_TOEPLITZ	2
 
 #define	RSS_TYPE_IPV4		0x00000001
 #define	RSS_TYPE_TCP_IPV4	0x00000002
 #define	RSS_TYPE_IPV6		0x00000004
 #define	RSS_TYPE_IPV6_EX	0x00000008
 #define	RSS_TYPE_TCP_IPV6	0x00000010
 #define	RSS_TYPE_TCP_IPV6_EX	0x00000020
 #define	RSS_TYPE_UDP_IPV4	0x00000040
 #define	RSS_TYPE_UDP_IPV6	0x00000080
 #define	RSS_TYPE_UDP_IPV6_EX	0x00000100
 
 #define	RSS_KEYLEN		128
 
 struct ifrsskey {
 	char		ifrk_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	uint8_t		ifrk_func;		/* RSS_FUNC_ */
 	uint8_t		ifrk_spare0;
 	uint16_t	ifrk_keylen;
 	uint8_t		ifrk_key[RSS_KEYLEN];
 };
 
 struct ifrsshash {
 	char		ifrh_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	uint8_t		ifrh_func;		/* RSS_FUNC_ */
 	uint8_t		ifrh_spare0;
 	uint16_t	ifrh_spare1;
 	uint32_t	ifrh_types;		/* RSS_TYPE_ */
 };
 
 #define	IFNET_PCP_NONE	0xff	/* PCP disabled */
 
 #define	IFDR_MSG_SIZE		64
 #define	IFDR_REASON_MSG		1
 #define	IFDR_REASON_VENDOR	2
 struct ifdownreason {
 	char		ifdr_name[IFNAMSIZ];
 	uint32_t	ifdr_reason;
 	uint32_t	ifdr_vendor;
 	char		ifdr_msg[IFDR_MSG_SIZE];
 };
 
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_IFADDR);
 MALLOC_DECLARE(M_IFMADDR);
 #endif
 #endif
 
 #ifndef _KERNEL
 struct if_nameindex {
 	unsigned int	if_index;	/* 1, 2, ... */
 	char		*if_name;	/* null terminated name: "le0", ... */
 };
 
 __BEGIN_DECLS
 void			 if_freenameindex(struct if_nameindex *);
 char			*if_indextoname(unsigned int, char *);
 struct if_nameindex	*if_nameindex(void);
 unsigned int		 if_nametoindex(const char *);
 __END_DECLS
 #endif
 #endif /* !_NET_IF_H_ */
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 7d918216b300..6d33173462cb 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -1,1449 +1,1452 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/priv.h>
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 
 #include <net/ieee_oui.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 #include <security/mac/mac_framework.h>
 
 #include <crypto/sha1.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
 VNET_DEFINE(pfil_head_t, link_pfil_head);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_attach_p)(struct ifnet *ifp);
 void	(*ng_ether_detach_p)(struct ifnet *ifp);
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* if_bridge(4) support */
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
 struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 #ifdef VIMAGE
 static	void ether_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static	int ether_requestencap(struct ifnet *, struct if_encap_req *);
 
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Handle link-layer encapsulation requests.
  */
 static int
 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
 {
 	struct ether_header *eh;
 	struct arphdr *ah;
 	uint16_t etype;
 	const u_char *lladdr;
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < ETHER_HDR_LEN)
 		return (ENOMEM);
 
 	eh = (struct ether_header *)req->buf;
 	lladdr = req->lladdr;
 	req->lladdr_off = 0;
 
 	switch (req->family) {
 	case AF_INET:
 		etype = htons(ETHERTYPE_IP);
 		break;
 	case AF_INET6:
 		etype = htons(ETHERTYPE_IPV6);
 		break;
 	case AF_ARP:
 		ah = (struct arphdr *)req->hdata;
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			etype = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			etype = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (req->flags & IFENCAP_FLAG_BROADCAST)
 			lladdr = ifp->if_broadcastaddr;
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
 	memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 	req->bufsize = sizeof(struct ether_header);
 
 	return (0);
 }
 
 
 static int
 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro, u_char *phdr,
 	uint32_t *pflags, struct llentry **plle)
 {
 	struct ether_header *eh;
 	uint32_t lleflags = 0;
 	int error = 0;
 #if defined(INET) || defined(INET6)
 	uint16_t etype;
 #endif
 
 	if (plle)
 		*plle = NULL;
 	eh = (struct ether_header *)phdr;
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
 			error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
 			    plle);
 		else {
 			if (m->m_flags & M_BCAST)
 				memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
 				    ETHER_ADDR_LEN);
 			else {
 				const struct in_addr *a;
 				a = &(((const struct sockaddr_in *)dst)->sin_addr);
 				ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
 			}
 			etype = htons(ETHERTYPE_IP);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0)
 			error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
 			    plle);
 		else {
 			const struct in6_addr *a6;
 			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
 			ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
 			etype = htons(ETHERTYPE_IPV6);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		if (m != NULL)
 			m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	if (error == EHOSTDOWN) {
 		if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
 			error = EHOSTUNREACH;
 	}
 
 	if (error != 0)
 		return (error);
 
 	*pflags = RT_MAY_LOOP;
 	if (lleflags & LLE_IFADDR)
 		*pflags |= RT_L2_ME;
 
 	return (0);
 }
 
 /*
  * Ethernet output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	int error = 0;
 	char linkhdr[ETHER_HDR_LEN], *phdr;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	int loop_copy = 1;
 	int hlen;	/* link layer header length */
 	uint32_t pflags;
 	struct llentry *lle = NULL;
 	int addref = 0;
 
 	phdr = NULL;
 	pflags = 0;
 	if (ro != NULL) {
 		/* XXX BPF uses ro_prepend */
 		if (ro->ro_prepend != NULL) {
 			phdr = ro->ro_prepend;
 			hlen = ro->ro_plen;
 		} else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
 			if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
 				lle = ro->ro_lle;
 				if (lle != NULL &&
 				    (lle->la_flags & LLE_VALID) == 0) {
 					LLE_FREE(lle);
 					lle = NULL;	/* redundant */
 					ro->ro_lle = NULL;
 				}
 				if (lle == NULL) {
 					/* if we lookup, keep cache */
 					addref = 1;
 				} else
 					/*
 					 * Notify LLE code that
 					 * the entry was used
 					 * by datapath.
 					 */
 					llentry_mark_used(lle);
 			}
 			if (lle != NULL) {
 				phdr = lle->r_linkdata;
 				hlen = lle->r_hdrlen;
 				pflags = lle->r_flags;
 			}
 		}
 	}
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	if (phdr == NULL) {
 		/* No prepend data supplied. Try to calculate ourselves. */
 		phdr = linkhdr;
 		hlen = ETHER_HDR_LEN;
 		error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
 		    addref ? &lle : NULL);
 		if (addref && lle != NULL)
 			ro->ro_lle = lle;
 		if (error != 0)
 			return (error == EWOULDBLOCK ? 0 : error);
 	}
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, dst->sa_family, 0));
 	}
 	loop_copy = pflags & RT_MAY_LOOP;
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 *
 	 * Note that we do prepend regardless of RT_HAS_HEADER flag.
 	 * This is done because BPF code shifts m_data pointer
 	 * to the end of ethernet header prior to calling if_output().
 	 */
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	if ((pflags & RT_HAS_HEADER) == 0) {
 		eh = mtod(m, struct ether_header *);
 		memcpy(eh, phdr, hlen);
 	}
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		struct mbuf *n;
 
 		/*
 		 * Because if_simloop() modifies the packet, we need a
 		 * writable copy through m_dup() instead of a readonly
 		 * one as m_copy[m] would give us. The alternative would
 		 * be to modify if_simloop() to handle the readonly mbuf,
 		 * but performancewise it is mostly equivalent (trading
 		 * extra data copying vs. extra locking).
 		 *
 		 * XXX This is a local workaround.  A number of less
 		 * often used kernel parts suffer from the same bug.
 		 * See PR kern/105943 for a proposed general solution.
 		 */
 		if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 			update_mbuf_csumflags(m, n);
 			(void)if_simloop(ifp, n, dst->sa_family, hlen);
 		} else
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	}
 
        /*
 	* Bridges require special output handling.
 	*/
 	if (ifp->if_bridge) {
 		BRIDGE_OUTPUT(ifp, m, error);
 		return (error);
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
 	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/* Continue with link-layer output */
 	return ether_output_frame(ifp, m);
 }
 
 static bool
 ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
 {
 	struct ether_header *eh;
 
 	eh = mtod(*mp, struct ether_header *);
 	if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
 	    ether_8021q_frame(mp, ifp, ifp, 0, pcp))
 		return (true);
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (false);
 }
 
 /*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
  * in the first mbuf (if BRIDGE'ing).
  */
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
 	uint8_t pcp;
 
 	pcp = ifp->if_pcp;
 	if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN &&
 	    !ether_set_pcp(&m, ifp, pcp))
 		return (0);
 
 	if (PFIL_HOOKED_OUT(V_link_pfil_head))
 		switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT,
 		    NULL)) {
 		case PFIL_DROPPED:
 			return (EACCES);
 		case PFIL_CONSUMED:
 			return (0);
 		}
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 		struct ether_header *eh;
 
 		eh = mtod(m, struct ether_header *);
 		switch (ntohs(eh->ether_type)) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return (EAFNOSUPPORT);
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 	return ((ifp->if_transmit)(ifp, m));
 }
 
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
  */
 static void
 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
 		m_freem(m);
 		return;
 	}
 #endif
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	random_harvest_queue_ether(m, sizeof(*m));
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 
 		switch (etype) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return;
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	ETHER_BPF_MTAP(ifp, m);
 
 	/*
 	 * If the CRC is still on the packet, trim it off. We do this once
 	 * and once only in case we are re-entered. Nothing else on the
 	 * Ethernet receive path expects to see the FCS.
 	 */
 	if (m->m_flags & M_HASFCS) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 
 	if (!(ifp->if_capenable & IFCAP_HWSTATS))
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
 		KASSERT(lagg_input_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
 		m = (*lagg_input_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * If the hardware did not process an 802.1Q tag, do this now,
 	 * to allow 802.1P priority frames to be passed to the main input
 	 * path correctly.
 	 * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
 	 */
 	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
 		struct ether_vlan_header *evl;
 
 		if (m->m_len < sizeof(*evl) &&
 		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		evl = mtod(m, struct ether_vlan_header *);
 		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 		m->m_flags |= M_VLANTAG;
 
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
 	 */
 	if (ifp->if_bridge != NULL) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
 	/*
 	 * Clear M_PROMISC on frame so that carp(4) will see it when the
 	 * mbuf flows up to Layer 3.
 	 * FreeBSD's implementation of carp(4) uses the inprotosw
 	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
 	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
 	 * is outside the scope of the M_PROMISC test below.
 	 * TODO: Maintain a hash table of ethernet addresses other than
 	 * ether_dhost which may be active on this ifp.
 	 */
 	if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
 		m->m_flags &= ~M_PROMISC;
 	} else
 #endif
 	{
 		/*
 		 * If the frame received was not for our MAC address, set the
 		 * M_PROMISC flag on the mbuf chain. The frame may need to
 		 * be seen by the rest of the Ethernet input path in case of
 		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
 		 * seen by upper protocol layers.
 		 */
 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
 		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
 			m->m_flags |= M_PROMISC;
 	}
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  * load based on RSS.
  *
  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  * not it had already done work distribution via multi-queue.  Then we could
  * direct dispatch in the event load balancing was already complete and
  * handle the case of interfaces with different capabilities better.
  *
  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  * at multiple layers?
  *
  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  * works fine without RSS.  Need to characterise the performance overhead
  * of the detour through the netisr code in the event the result is always
  * direct dispatch.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL,
 	    ("%s: NULL interface pointer", __func__));
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
 static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
 #ifdef RSS
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 #else
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 #endif
 };
 
 static void
 ether_init(__unused void *arg)
 {
 
 	netisr_register(&ether_nh);
 }
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
 vnet_ether_init(__unused void *arg)
 {
 	struct pfil_head_args args;
 
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_ETHERNET;
 	args.pa_headname = PFIL_ETHER_NAME;
 	V_link_pfil_head = pfil_head_register(&args);
 
 #ifdef VIMAGE
 	netisr_register_vnet(&ether_nh);
 #endif
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
  
 #ifdef VIMAGE
 static void
 vnet_ether_pfil_destroy(__unused void *arg)
 {
 
 	pfil_head_unregister(V_link_pfil_head);
 }
 VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
     vnet_ether_pfil_destroy, NULL);
 
 static void
 vnet_ether_destroy(__unused void *arg)
 {
 
 	netisr_unregister_vnet(&ether_nh);
 }
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
 #endif
 
 
 
 static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct mbuf *mn;
+	bool needs_epoch;
+
+	needs_epoch = !(ifp->if_flags & IFF_KNOWSEPOCH);
 
 	/*
 	 * The drivers are allowed to pass in a chain of packets linked with
 	 * m_nextpkt. We split them up into separate packets here and pass
 	 * them up. This allows the drivers to amortize the receive lock.
 	 */
 	CURVNET_SET_QUIET(ifp->if_vnet);
-	if (__predict_false(ifp->if_flags & IFF_NEEDSEPOCH))
+	if (__predict_false(needs_epoch))
 		NET_EPOCH_ENTER(et);
 	while (m) {
 		mn = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We will rely on rcvif being set properly in the deferred
 		 * context, so assert it is correct here.
 		 */
 		MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
 		    "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
 		netisr_dispatch(NETISR_ETHER, m);
 		m = mn;
 	}
-	if (__predict_false(ifp->if_flags & IFF_NEEDSEPOCH))
+	if (__predict_false(needs_epoch))
 		NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 }
 
 /*
  * Upper layer processing for a received Ethernet packet.
  */
 void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	int i, isr;
 	u_short ether_type;
 
 	NET_EPOCH_ASSERT();
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 	/* Do not grab PROMISC frames in case we are re-entered. */
 	if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) {
 		i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL);
 		if (i != 0 || m == NULL)
 			return;
 	}
 
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
 	/*
 	 * If this frame has a VLAN tag other than 0, call vlan_input()
 	 * if its module is loaded. Otherwise, drop.
 	 */
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			m_freem(m);
 			return;
 		}
 		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
 		    __func__));
 		/* Clear before possibly re-entering ether_input(). */
 		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
 	 * Pass promiscuously received frames to the upper layer if the user
 	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
 	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	/*
 	 * Packet is to be discarded.  If netgraph is present,
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
 		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
 	m_freem(m);
 }
 
 /*
  * Convert Ethernet address to printable (loggable) representation.
  * This routine is for compatibility; it's better to just use
  *
  *	printf("%6D", <pointer to address>, ":");
  *
  * since there's no static buffer involved.
  */
 char *
 ether_sprintf(const u_char *ap)
 {
 	static char etherbuf[18];
 	snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
 	return (etherbuf);
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_addrlen = ETHER_ADDR_LEN;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 	if_attach(ifp);
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
 	ifp->if_requestencap = ether_requestencap;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	if (ifp->if_hw_addr != NULL)
 		bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 
 	uuid_ether_add(LLADDR(sdl));
 
 	/* Add necessary bits are setup; announce it now. */
 	EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
 }
 
 /*
  * Perform common duties while detaching an Ethernet interface
  */
 void
 ether_ifdetach(struct ifnet *ifp)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 	uuid_ether_del(LLADDR(sdl));
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 #ifdef VIMAGE
 void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	if (ng_ether_attach_p != NULL) {
 		CURVNET_SET_QUIET(new_vnet);
 		(*ng_ether_attach_p)(ifp);
 		CURVNET_RESTORE();
 	}
 }
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
 
 #if 0
 /*
  * This is for reference.  We have a table-driven version
  * of the little-endian crc32 generator, which is faster
  * than the double-loop.
  */
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = (crc ^ data) & 1;
 			crc >>= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_LE);
 		}
 	}
 
 	return (crc);
 }
 #else
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	static const uint32_t crctab[] = {
 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 	};
 	size_t i;
 	uint32_t crc;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		crc ^= buf[i];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 	}
 
 	return (crc);
 }
 #endif
 
 uint32_t
 ether_crc32_be(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc, carry;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 			crc <<= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 		}
 	}
 
 	return (crc);
 }
 
 int
 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
 		    ETHER_ADDR_LEN);
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > ETHERMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 
 	case SIOCSLANPCP:
 		error = priv_check(curthread, PRIV_NET_SETLANPCP);
 		if (error != 0)
 			break;
 		if (ifr->ifr_lan_pcp > 7 &&
 		    ifr->ifr_lan_pcp != IFNET_PCP_NONE) {
 			error = EINVAL;
 		} else {
 			ifp->if_pcp = ifr->ifr_lan_pcp;
 			/* broadcast event about PCP change */
 			EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
 		}
 		break;
 
 	case SIOCGLANPCP:
 		ifr->ifr_lan_pcp = ifp->if_pcp;
 		break;
 
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = NULL;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static moduledata_t ether_mod = {
 	.name = "ether",
 };
 
 void
 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 {
 	struct ether_vlan_header vlan;
 	struct mbuf mv, mb;
 
 	KASSERT((m->m_flags & M_VLANTAG) != 0,
 	    ("%s: vlan information not present", __func__));
 	KASSERT(m->m_len >= sizeof(struct ether_header),
 	    ("%s: mbuf not large enough for header", __func__));
 	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 	vlan.evl_proto = vlan.evl_encap_proto;
 	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 	m->m_len -= sizeof(struct ether_header);
 	m->m_data += sizeof(struct ether_header);
 	/*
 	 * If a data link has been supplied by the caller, then we will need to
 	 * re-create a stack allocated mbuf chain with the following structure:
 	 *
 	 * (1) mbuf #1 will contain the supplied data link
 	 * (2) mbuf #2 will contain the vlan header
 	 * (3) mbuf #3 will contain the original mbuf's packet data
 	 *
 	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
 	 */
 	if (data != NULL) {
 		mv.m_next = m;
 		mv.m_data = (caddr_t)&vlan;
 		mv.m_len = sizeof(vlan);
 		mb.m_next = &mv;
 		mb.m_data = data;
 		mb.m_len = dlen;
 		bpf_mtap(bp, &mb);
 	} else
 		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 	m->m_len += sizeof(struct ether_header);
 	m->m_data -= sizeof(struct ether_header);
 }
 
 struct mbuf *
 ether_vlanencap(struct mbuf *m, uint16_t tag)
 {
 	struct ether_vlan_header *evl;
 
 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 
 	if (m->m_len < sizeof(*evl)) {
 		m = m_pullup(m, sizeof(*evl));
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * Transform the Ethernet header into an Ethernet header
 	 * with 802.1Q encapsulation.
 	 */
 	evl = mtod(m, struct ether_vlan_header *);
 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
 	evl->evl_tag = htons(tag);
 	return (m);
 }
 
 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
     "IEEE 802.1Q VLAN");
 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
     "for consistency");
 
 VNET_DEFINE_STATIC(int, soft_pad);
 #define	V_soft_pad	VNET(soft_pad)
 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(soft_pad), 0,
     "pad short frames before tagging");
 
 /*
  * For now, make preserving PCP via an mbuf tag optional, as it increases
  * per-packet memory allocations and frees.  In the future, it would be
  * preferable to reuse ether_vtag for this, or similar.
  */
 int vlan_mtag_pcp = 0;
 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW,
     &vlan_mtag_pcp, 0,
     "Retain VLAN PCP information as packets are passed up the stack");
 
 bool
 ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
     uint16_t vid, uint8_t pcp)
 {
 	struct m_tag *mtag;
 	int n;
 	uint16_t tag;
 	static const char pad[8];	/* just zeros */
 
 	/*
 	 * Pad the frame to the minimum size allowed if told to.
 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
 	 * paragraph C.4.4.3.b.  It can help to work around buggy
 	 * bridges that violate paragraph C.4.4.3.a from the same
 	 * document, i.e., fail to pad short frames after untagging.
 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
 	 * untagging it will produce a 62-byte frame, which is a runt
 	 * and requires padding.  There are VLAN-enabled network
 	 * devices that just discard such runts instead or mishandle
 	 * them somehow.
 	 */
 	if (V_soft_pad && p->if_type == IFT_ETHER) {
 		for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
 		     n > 0; n -= sizeof(pad)) {
 			if (!m_append(*mp, min(n, sizeof(pad)), pad))
 				break;
 		}
 		if (n > 0) {
 			m_freem(*mp);
 			*mp = NULL;
 			if_printf(ife, "cannot pad short frame");
 			return (false);
 		}
 	}
 
 	/*
 	 * If underlying interface can do VLAN tag insertion itself,
 	 * just pass the packet along. However, we need some way to
 	 * tell the interface where the packet came from so that it
 	 * knows how to find the VLAN tag to use, so we attach a
 	 * packet tag that holds it.
 	 */
 	if (vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
 	    MTAG_8021Q_PCP_OUT, NULL)) != NULL)
 		tag = EVL_MAKETAG(vid, *(uint8_t *)(mtag + 1), 0);
 	else
 		tag = EVL_MAKETAG(vid, pcp, 0);
 	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
 		(*mp)->m_pkthdr.ether_vtag = tag;
 		(*mp)->m_flags |= M_VLANTAG;
 	} else {
 		*mp = ether_vlanencap(*mp, tag);
 		if (*mp == NULL) {
 			if_printf(ife, "unable to prepend 802.1Q header");
 			return (false);
 		}
 	}
 	return (true);
 }
 
 /*
  * Allocate an address from the FreeBSD Foundation OUI.  This uses a
  * cryptographic hash function on the containing jail's UUID and the interface
  * name to attempt to provide a unique but stable address.  Pseudo-interfaces
  * which require a MAC address should use this function to allocate
  * non-locally-administered addresses.
  */
 void
 ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
 {
 #define	ETHER_GEN_ADDR_BUFSIZ	HOSTUUIDLEN + IFNAMSIZ + 2
 	SHA1_CTX ctx;
 	char buf[ETHER_GEN_ADDR_BUFSIZ];
 	char uuid[HOSTUUIDLEN + 1];
 	uint64_t addr;
 	int i, sz;
 	char digest[SHA1_RESULTLEN];
 
 	getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
 	sz = snprintf(buf, ETHER_GEN_ADDR_BUFSIZ, "%s-%s", uuid, ifp->if_xname);
 	SHA1Init(&ctx);
 	SHA1Update(&ctx, buf, sz);
 	SHA1Final(digest, &ctx);
 
 	addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
 	    OUI_FREEBSD_GENERATED_MASK;
 	addr = OUI_FREEBSD(addr);
 	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
 		hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
 		    0xFF;
 	}
 }
 
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index d06fbf6ec01b..95fbdbf7c446 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -1,6863 +1,6864 @@
 /*-
  * Copyright (c) 2014-2018, Matthew Macy <mmacy@mattmacy.io>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  *  1. Redistributions of source code must retain the above copyright notice,
  *     this list of conditions and the following disclaimer.
  *
  *  2. Neither the name of Matthew Macy nor the names of its
  *     contributors may be used to endorse or promote products derived from
  *     this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_acpi.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
 #include <sys/kobj.h>
 #include <sys/rman.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/taskqueue.h>
 #include <sys/limits.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/mp_ring.h>
 #include <net/debugnet.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/in_systm.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 
 #include <machine/bus.h>
 #include <machine/in_cksum.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <dev/led/led.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pci_private.h>
 
 #include <net/iflib.h>
 #include <net/iflib_private.h>
 
 #include "ifdi_if.h"
 
 #ifdef PCI_IOV
 #include <dev/pci/pci_iov.h>
 #endif
 
 #include <sys/bitstring.h>
 /*
  * enable accounting of every mbuf as it comes in to and goes out of
  * iflib's software descriptor references
  */
 #define MEMORY_LOGGING 0
 /*
  * Enable mbuf vectors for compressing long mbuf chains
  */
 
 /*
  * NB:
  * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead
  *   we prefetch needs to be determined by the time spent in m_free vis a vis
  *   the cost of a prefetch. This will of course vary based on the workload:
  *      - NFLX's m_free path is dominated by vm-based M_EXT manipulation which
  *        is quite expensive, thus suggesting very little prefetch.
  *      - small packet forwarding which is just returning a single mbuf to
  *        UMA will typically be very fast vis a vis the cost of a memory
  *        access.
  */
 
 
 /*
  * File organization:
  *  - private structures
  *  - iflib private utility functions
  *  - ifnet functions
  *  - vlan registry and other exported functions
  *  - iflib public core functions
  *
  *
  */
 MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library");
 
 #define	IFLIB_RXEOF_MORE (1U << 0)
 #define	IFLIB_RXEOF_EMPTY (2U << 0)
 
 struct iflib_txq;
 typedef struct iflib_txq *iflib_txq_t;
 struct iflib_rxq;
 typedef struct iflib_rxq *iflib_rxq_t;
 struct iflib_fl;
 typedef struct iflib_fl *iflib_fl_t;
 
 struct iflib_ctx;
 
 static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid);
 static void iflib_timer(void *arg);
 
 typedef struct iflib_filter_info {
 	driver_filter_t *ifi_filter;
 	void *ifi_filter_arg;
 	struct grouptask *ifi_task;
 	void *ifi_ctx;
 } *iflib_filter_info_t;
 
 struct iflib_ctx {
 	KOBJ_FIELDS;
 	/*
 	 * Pointer to hardware driver's softc
 	 */
 	void *ifc_softc;
 	device_t ifc_dev;
 	if_t ifc_ifp;
 
 	cpuset_t ifc_cpus;
 	if_shared_ctx_t ifc_sctx;
 	struct if_softc_ctx ifc_softc_ctx;
 
 	struct sx ifc_ctx_sx;
 	struct mtx ifc_state_mtx;
 
 	iflib_txq_t ifc_txqs;
 	iflib_rxq_t ifc_rxqs;
 	uint32_t ifc_if_flags;
 	uint32_t ifc_flags;
 	uint32_t ifc_max_fl_buf_size;
 	uint32_t ifc_rx_mbuf_sz;
 
 	int ifc_link_state;
 	int ifc_watchdog_events;
 	struct cdev *ifc_led_dev;
 	struct resource *ifc_msix_mem;
 
 	struct if_irq ifc_legacy_irq;
 	struct grouptask ifc_admin_task;
 	struct grouptask ifc_vflr_task;
 	struct iflib_filter_info ifc_filter_info;
 	struct ifmedia	ifc_media;
 	struct ifmedia	*ifc_mediap;
 
 	struct sysctl_oid *ifc_sysctl_node;
 	uint16_t ifc_sysctl_ntxqs;
 	uint16_t ifc_sysctl_nrxqs;
 	uint16_t ifc_sysctl_qs_eq_override;
 	uint16_t ifc_sysctl_rx_budget;
 	uint16_t ifc_sysctl_tx_abdicate;
 	uint16_t ifc_sysctl_core_offset;
 #define	CORE_OFFSET_UNSPECIFIED	0xffff
 	uint8_t  ifc_sysctl_separate_txrx;
 
 	qidx_t ifc_sysctl_ntxds[8];
 	qidx_t ifc_sysctl_nrxds[8];
 	struct if_txrx ifc_txrx;
 #define isc_txd_encap  ifc_txrx.ift_txd_encap
 #define isc_txd_flush  ifc_txrx.ift_txd_flush
 #define isc_txd_credits_update  ifc_txrx.ift_txd_credits_update
 #define isc_rxd_available ifc_txrx.ift_rxd_available
 #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get
 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
 #define isc_rxd_flush ifc_txrx.ift_rxd_flush
 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
 #define isc_legacy_intr ifc_txrx.ift_legacy_intr
 	eventhandler_tag ifc_vlan_attach_event;
 	eventhandler_tag ifc_vlan_detach_event;
 	struct ether_addr ifc_mac;
 };
 
 void *
 iflib_get_softc(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_softc);
 }
 
 device_t
 iflib_get_dev(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_dev);
 }
 
 if_t
 iflib_get_ifp(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_ifp);
 }
 
 struct ifmedia *
 iflib_get_media(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_mediap);
 }
 
 uint32_t
 iflib_get_flags(if_ctx_t ctx)
 {
 	return (ctx->ifc_flags);
 }
 
 void
 iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN])
 {
 
 	bcopy(mac, ctx->ifc_mac.octet, ETHER_ADDR_LEN);
 }
 
 if_softc_ctx_t
 iflib_get_softc_ctx(if_ctx_t ctx)
 {
 
 	return (&ctx->ifc_softc_ctx);
 }
 
 if_shared_ctx_t
 iflib_get_sctx(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_sctx);
 }
 
 #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2)
 #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*))
 #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1)))
 
 #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP)
 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
 
 typedef struct iflib_sw_rx_desc_array {
 	bus_dmamap_t	*ifsd_map;         /* bus_dma maps for packet */
 	struct mbuf	**ifsd_m;           /* pkthdr mbufs */
 	caddr_t		*ifsd_cl;          /* direct cluster pointer for rx */
 	bus_addr_t	*ifsd_ba;          /* bus addr of cluster for rx */
 } iflib_rxsd_array_t;
 
 typedef struct iflib_sw_tx_desc_array {
 	bus_dmamap_t    *ifsd_map;         /* bus_dma maps for packet */
 	bus_dmamap_t	*ifsd_tso_map;     /* bus_dma maps for TSO packet */
 	struct mbuf    **ifsd_m;           /* pkthdr mbufs */
 } if_txsd_vec_t;
 
 /* magic number that should be high enough for any hardware */
 #define IFLIB_MAX_TX_SEGS		128
 #define IFLIB_RX_COPY_THRESH		128
 #define IFLIB_MAX_RX_REFRESH		32
 /* The minimum descriptors per second before we start coalescing */
 #define IFLIB_MIN_DESC_SEC		16384
 #define IFLIB_DEFAULT_TX_UPDATE_FREQ	16
 #define IFLIB_QUEUE_IDLE		0
 #define IFLIB_QUEUE_HUNG		1
 #define IFLIB_QUEUE_WORKING		2
 /* maximum number of txqs that can share an rx interrupt */
 #define IFLIB_MAX_TX_SHARED_INTR	4
 
 /* this should really scale with ring size - this is a fairly arbitrary value */
 #define TX_BATCH_SIZE			32
 
 #define IFLIB_RESTART_BUDGET		8
 
 #define CSUM_OFFLOAD		(CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
 				 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
 				 CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
 
 struct iflib_txq {
 	qidx_t		ift_in_use;
 	qidx_t		ift_cidx;
 	qidx_t		ift_cidx_processed;
 	qidx_t		ift_pidx;
 	uint8_t		ift_gen;
 	uint8_t		ift_br_offset;
 	uint16_t	ift_npending;
 	uint16_t	ift_db_pending;
 	uint16_t	ift_rs_pending;
 	/* implicit pad */
 	uint8_t		ift_txd_size[8];
 	uint64_t	ift_processed;
 	uint64_t	ift_cleaned;
 	uint64_t	ift_cleaned_prev;
 #if MEMORY_LOGGING
 	uint64_t	ift_enqueued;
 	uint64_t	ift_dequeued;
 #endif
 	uint64_t	ift_no_tx_dma_setup;
 	uint64_t	ift_no_desc_avail;
 	uint64_t	ift_mbuf_defrag_failed;
 	uint64_t	ift_mbuf_defrag;
 	uint64_t	ift_map_failed;
 	uint64_t	ift_txd_encap_efbig;
 	uint64_t	ift_pullups;
 	uint64_t	ift_last_timer_tick;
 
 	struct mtx	ift_mtx;
 	struct mtx	ift_db_mtx;
 
 	/* constant values */
 	if_ctx_t	ift_ctx;
 	struct ifmp_ring        *ift_br;
 	struct grouptask	ift_task;
 	qidx_t		ift_size;
 	uint16_t	ift_id;
 	struct callout	ift_timer;
 
 	if_txsd_vec_t	ift_sds;
 	uint8_t		ift_qstatus;
 	uint8_t		ift_closed;
 	uint8_t		ift_update_freq;
 	struct iflib_filter_info ift_filter_info;
 	bus_dma_tag_t	ift_buf_tag;
 	bus_dma_tag_t	ift_tso_buf_tag;
 	iflib_dma_info_t	ift_ifdi;
 #define MTX_NAME_LEN 16
 	char                    ift_mtx_name[MTX_NAME_LEN];
 	bus_dma_segment_t	ift_segs[IFLIB_MAX_TX_SEGS]  __aligned(CACHE_LINE_SIZE);
 #ifdef IFLIB_DIAGNOSTICS
 	uint64_t ift_cpu_exec_count[256];
 #endif
 } __aligned(CACHE_LINE_SIZE);
 
 struct iflib_fl {
 	qidx_t		ifl_cidx;
 	qidx_t		ifl_pidx;
 	qidx_t		ifl_credits;
 	uint8_t		ifl_gen;
 	uint8_t		ifl_rxd_size;
 #if MEMORY_LOGGING
 	uint64_t	ifl_m_enqueued;
 	uint64_t	ifl_m_dequeued;
 	uint64_t	ifl_cl_enqueued;
 	uint64_t	ifl_cl_dequeued;
 #endif
 	/* implicit pad */
 	bitstr_t 	*ifl_rx_bitmap;
 	qidx_t		ifl_fragidx;
 	/* constant */
 	qidx_t		ifl_size;
 	uint16_t	ifl_buf_size;
 	uint16_t	ifl_cltype;
 	uma_zone_t	ifl_zone;
 	iflib_rxsd_array_t	ifl_sds;
 	iflib_rxq_t	ifl_rxq;
 	uint8_t		ifl_id;
 	bus_dma_tag_t	ifl_buf_tag;
 	iflib_dma_info_t	ifl_ifdi;
 	uint64_t	ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE);
 	caddr_t		ifl_vm_addrs[IFLIB_MAX_RX_REFRESH];
 	qidx_t	ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH];
 }  __aligned(CACHE_LINE_SIZE);
 
 static inline qidx_t
 get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen)
 {
 	qidx_t used;
 
 	if (pidx > cidx)
 		used = pidx - cidx;
 	else if (pidx < cidx)
 		used = size - cidx + pidx;
 	else if (gen == 0 && pidx == cidx)
 		used = 0;
 	else if (gen == 1 && pidx == cidx)
 		used = size;
 	else
 		panic("bad state");
 
 	return (used);
 }
 
 #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen))
 
 #define IDXDIFF(head, tail, wrap) \
 	((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))
 
 struct iflib_rxq {
 	if_ctx_t	ifr_ctx;
 	iflib_fl_t	ifr_fl;
 	uint64_t	ifr_rx_irq;
 	struct pfil_head	*pfil;
 	/*
 	 * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is
 	 * the command queue consumer index.  Otherwise it's unused.
 	 */
 	qidx_t		ifr_cq_cidx;
 	uint16_t	ifr_id;
 	uint8_t		ifr_nfl;
 	uint8_t		ifr_ntxqirq;
 	uint8_t		ifr_txqid[IFLIB_MAX_TX_SHARED_INTR];
 	uint8_t		ifr_fl_offset;
 	struct lro_ctrl			ifr_lc;
 	struct grouptask        ifr_task;
 	struct callout		ifr_watchdog;
 	struct iflib_filter_info ifr_filter_info;
 	iflib_dma_info_t		ifr_ifdi;
 
 	/* dynamically allocate if any drivers need a value substantially larger than this */
 	struct if_rxd_frag	ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
 #ifdef IFLIB_DIAGNOSTICS
 	uint64_t ifr_cpu_exec_count[256];
 #endif
 }  __aligned(CACHE_LINE_SIZE);
 
 typedef struct if_rxsd {
 	caddr_t *ifsd_cl;
 	iflib_fl_t ifsd_fl;
 	qidx_t ifsd_cidx;
 } *if_rxsd_t;
 
 /* multiple of word size */
 #ifdef __LP64__
 #define PKT_INFO_SIZE	6
 #define RXD_INFO_SIZE	5
 #define PKT_TYPE uint64_t
 #else
 #define PKT_INFO_SIZE	11
 #define RXD_INFO_SIZE	8
 #define PKT_TYPE uint32_t
 #endif
 #define PKT_LOOP_BOUND  ((PKT_INFO_SIZE/3)*3)
 #define RXD_LOOP_BOUND  ((RXD_INFO_SIZE/4)*4)
 
 typedef struct if_pkt_info_pad {
 	PKT_TYPE pkt_val[PKT_INFO_SIZE];
 } *if_pkt_info_pad_t;
 typedef struct if_rxd_info_pad {
 	PKT_TYPE rxd_val[RXD_INFO_SIZE];
 } *if_rxd_info_pad_t;
 
 CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info));
 CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info));
 
 
 static inline void
 pkt_info_zero(if_pkt_info_t pi)
 {
 	if_pkt_info_pad_t pi_pad;
 
 	pi_pad = (if_pkt_info_pad_t)pi;
 	pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0;
 	pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0;
 #ifndef __LP64__
 	pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0;
 	pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0;
 #endif	
 }
 
 static device_method_t iflib_pseudo_methods[] = {
 	DEVMETHOD(device_attach, noop_attach),
 	DEVMETHOD(device_detach, iflib_pseudo_detach),
 	DEVMETHOD_END
 };
 
 driver_t iflib_pseudodriver = {
 	"iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx),
 };
 
 static inline void
 rxd_info_zero(if_rxd_info_t ri)
 {
 	if_rxd_info_pad_t ri_pad;
 	int i;
 
 	ri_pad = (if_rxd_info_pad_t)ri;
 	for (i = 0; i < RXD_LOOP_BOUND; i += 4) {
 		ri_pad->rxd_val[i] = 0;
 		ri_pad->rxd_val[i+1] = 0;
 		ri_pad->rxd_val[i+2] = 0;
 		ri_pad->rxd_val[i+3] = 0;
 	}
 #ifdef __LP64__
 	ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0;
 #endif
 }
 
 /*
  * Only allow a single packet to take up most 1/nth of the tx ring
  */
 #define MAX_SINGLE_PACKET_FRACTION 12
 #define IF_BAD_DMA (bus_addr_t)-1
 
 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
 
 #define CTX_LOCK_INIT(_sc)  sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock")
 #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx)
 #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx)
 #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx)
 
 #define STATE_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF)
 #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx)
 #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx)
 #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx)
 
 #define CALLOUT_LOCK(txq)	mtx_lock(&txq->ift_mtx)
 #define CALLOUT_UNLOCK(txq) 	mtx_unlock(&txq->ift_mtx)
 
 void
 iflib_set_detach(if_ctx_t ctx)
 {
 	STATE_LOCK(ctx);
 	ctx->ifc_flags |= IFC_IN_DETACH;
 	STATE_UNLOCK(ctx);
 }
 
 /* Our boot-time initialization hook */
 static int	iflib_module_event_handler(module_t, int, void *);
 
 static moduledata_t iflib_moduledata = {
 	"iflib",
 	iflib_module_event_handler,
 	NULL
 };
 
 DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(iflib, 1);
 
 MODULE_DEPEND(iflib, pci, 1, 1, 1);
 MODULE_DEPEND(iflib, ether, 1, 1, 1);
 
 TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
 TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
 
 #ifndef IFLIB_DEBUG_COUNTERS
 #ifdef INVARIANTS
 #define IFLIB_DEBUG_COUNTERS 1
 #else
 #define IFLIB_DEBUG_COUNTERS 0
 #endif /* !INVARIANTS */
 #endif
 
 static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
                    "iflib driver parameters");
 
 /*
  * XXX need to ensure that this can't accidentally cause the head to be moved backwards 
  */
 static int iflib_min_tx_latency = 0;
 SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
 		   &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput");
 static int iflib_no_tx_batch = 0;
 SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW,
 		   &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput");
 
 
 #if IFLIB_DEBUG_COUNTERS
 
 static int iflib_tx_seen;
 static int iflib_tx_sent;
 static int iflib_tx_encap;
 static int iflib_rx_allocs;
 static int iflib_fl_refills;
 static int iflib_fl_refills_large;
 static int iflib_tx_frees;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD,
 		   &iflib_tx_seen, 0, "# TX mbufs seen");
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD,
 		   &iflib_tx_sent, 0, "# TX mbufs sent");
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD,
 		   &iflib_tx_encap, 0, "# TX mbufs encapped");
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD,
 		   &iflib_tx_frees, 0, "# TX frees");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD,
 		   &iflib_rx_allocs, 0, "# RX allocations");
 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD,
 		   &iflib_fl_refills, 0, "# refills");
 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD,
 		   &iflib_fl_refills_large, 0, "# large refills");
 
 
 static int iflib_txq_drain_flushing;
 static int iflib_txq_drain_oactive;
 static int iflib_txq_drain_notready;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD,
 		   &iflib_txq_drain_flushing, 0, "# drain flushes");
 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD,
 		   &iflib_txq_drain_oactive, 0, "# drain oactives");
 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD,
 		   &iflib_txq_drain_notready, 0, "# drain notready");
 
 
 static int iflib_encap_load_mbuf_fail;
 static int iflib_encap_pad_mbuf_fail;
 static int iflib_encap_txq_avail_fail;
 static int iflib_encap_txd_encap_fail;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD,
 		   &iflib_encap_load_mbuf_fail, 0, "# busdma load failures");
 SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD,
 		   &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures");
 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD,
 		   &iflib_encap_txq_avail_fail, 0, "# txq avail failures");
 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD,
 		   &iflib_encap_txd_encap_fail, 0, "# driver encap failures");
 
 static int iflib_task_fn_rxs;
 static int iflib_rx_intr_enables;
 static int iflib_fast_intrs;
 static int iflib_rx_unavail;
 static int iflib_rx_ctx_inactive;
 static int iflib_rx_if_input;
 static int iflib_rxd_flush;
 
 static int iflib_verbose_debug;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD,
 		   &iflib_task_fn_rxs, 0, "# task_fn_rx calls");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD,
 		   &iflib_rx_intr_enables, 0, "# RX intr enables");
 SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD,
 		   &iflib_fast_intrs, 0, "# fast_intr calls");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD,
 		   &iflib_rx_unavail, 0, "# times rxeof called with no available data");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD,
 		   &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD,
 		   &iflib_rx_if_input, 0, "# times rxeof called if_input");
 SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD,
 	         &iflib_rxd_flush, 0, "# times rxd_flush called");
 SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW,
 		   &iflib_verbose_debug, 0, "enable verbose debugging");
 
 #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
 static void
 iflib_debug_reset(void)
 {
 	iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs =
 		iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees =
 		iflib_txq_drain_flushing = iflib_txq_drain_oactive =
 		iflib_txq_drain_notready =
 		iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail =
 		iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail =
 		iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs =
 		iflib_rx_unavail =
 		iflib_rx_ctx_inactive = iflib_rx_if_input =
 		iflib_rxd_flush = 0;
 }
 
 #else
 #define DBG_COUNTER_INC(name)
 static void iflib_debug_reset(void) {}
 #endif
 
 #define IFLIB_DEBUG 0
 
 static void iflib_tx_structures_free(if_ctx_t ctx);
 static void iflib_rx_structures_free(if_ctx_t ctx);
 static int iflib_queues_alloc(if_ctx_t ctx);
 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
 static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget);
 static int iflib_qset_structures_setup(if_ctx_t ctx);
 static int iflib_msix_init(if_ctx_t ctx);
 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str);
 static void iflib_txq_check_drain(iflib_txq_t txq, int budget);
 static uint32_t iflib_txq_can_drain(struct ifmp_ring *);
 #ifdef ALTQ
 static void iflib_altq_if_start(if_t ifp);
 static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m);
 #endif
 static int iflib_register(if_ctx_t);
 static void iflib_deregister(if_ctx_t);
 static void iflib_unregister_vlan_handlers(if_ctx_t ctx);
 static void iflib_init_locked(if_ctx_t ctx);
 static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
 static void iflib_add_device_sysctl_post(if_ctx_t ctx);
 static void iflib_ifmp_purge(iflib_txq_t txq);
 static void _iflib_pre_assert(if_softc_ctx_t scctx);
 static void iflib_if_init_locked(if_ctx_t ctx);
 static void iflib_free_intr_mem(if_ctx_t ctx);
 #ifndef __NO_STRICT_ALIGNMENT
 static struct mbuf * iflib_fixup_rx(struct mbuf *m);
 #endif
 
 static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets =
     SLIST_HEAD_INITIALIZER(cpu_offsets);
 struct cpu_offset {
 	SLIST_ENTRY(cpu_offset) entries;
 	cpuset_t	set;
 	unsigned int	refcount;
 	uint16_t	offset;
 };
 static struct mtx cpu_offset_mtx;
 MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock",
     MTX_DEF);
 
 DEBUGNET_DEFINE(iflib);
 
 #ifdef DEV_NETMAP
 #include <sys/selinfo.h>
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
 
 MODULE_DEPEND(iflib, netmap, 1, 1, 1);
 
 static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init);
 
 /*
  * device-specific sysctl variables:
  *
  * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
  *	During regular operations the CRC is stripped, but on some
  *	hardware reception of frames not multiple of 64 is slower,
  *	so using crcstrip=0 helps in benchmarks.
  *
  * iflib_rx_miss, iflib_rx_miss_bufs:
  *	count packets that might be missed due to lost interrupts.
  */
 SYSCTL_DECL(_dev_netmap);
 /*
  * The xl driver by default strips CRCs and we do not override it.
  */
 
 int iflib_crcstrip = 1;
 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip,
     CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames");
 
 int iflib_rx_miss, iflib_rx_miss_bufs;
 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss,
     CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr");
 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs,
     CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs");
 
 /*
  * Register/unregister. We are already under netmap lock.
  * Only called on the first register or the last unregister.
  */
 static int
 iflib_netmap_register(struct netmap_adapter *na, int onoff)
 {
 	if_t ifp = na->ifp;
 	if_ctx_t ctx = ifp->if_softc;
 	int status;
 
 	CTX_LOCK(ctx);
 	IFDI_INTR_DISABLE(ctx);
 
 	/* Tell the stack that the interface is no longer active */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	if (!CTX_IS_VF(ctx))
 		IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip);
 
 	/* enable or disable flags and callbacks in na and ifp */
 	if (onoff) {
 		nm_set_native_flags(na);
 	} else {
 		nm_clear_native_flags(na);
 	}
 	iflib_stop(ctx);
 	iflib_init_locked(ctx);
 	IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ?
 	status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1;
 	if (status)
 		nm_clear_native_flags(na);
 	CTX_UNLOCK(ctx);
 	return (status);
 }
 
 static int
 netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init)
 {
 	struct netmap_adapter *na = kring->na;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int head = kring->rhead;
 	struct netmap_ring *ring = kring->ring;
 	bus_dmamap_t *map;
 	struct if_rxd_update iru;
 	if_ctx_t ctx = rxq->ifr_ctx;
 	iflib_fl_t fl = &rxq->ifr_fl[0];
 	uint32_t refill_pidx, nic_i;
 #if IFLIB_DEBUG_COUNTERS
 	int rf_count = 0;
 #endif
 
 	if (nm_i == head && __predict_true(!init))
 		return 0;
 	iru_init(&iru, rxq, 0 /* flid */);
 	map = fl->ifl_sds.ifsd_map;
 	refill_pidx = netmap_idx_k2n(kring, nm_i);
 	/*
 	 * IMPORTANT: we must leave one free slot in the ring,
 	 * so move head back by one unit
 	 */
 	head = nm_prev(head, lim);
 	nic_i = UINT_MAX;
 	DBG_COUNTER_INC(fl_refills);
 	while (nm_i != head) {
 #if IFLIB_DEBUG_COUNTERS
 		if (++rf_count == 9)
 			DBG_COUNTER_INC(fl_refills_large);
 #endif
 		for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
 			void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]);
 			uint32_t nic_i_dma = refill_pidx;
 			nic_i = netmap_idx_k2n(kring, nm_i);
 
 			MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH);
 
 			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
 			        return netmap_ring_reinit(kring);
 
 			fl->ifl_vm_addrs[tmp_pidx] = addr;
 			if (__predict_false(init)) {
 				netmap_load_map(na, fl->ifl_buf_tag,
 				    map[nic_i], addr);
 			} else if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
 				netmap_reload_map(na, fl->ifl_buf_tag,
 				    map[nic_i], addr);
 			}
 			slot->flags &= ~NS_BUF_CHANGED;
 
 			nm_i = nm_next(nm_i, lim);
 			fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim);
 			if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1)
 				continue;
 
 			iru.iru_pidx = refill_pidx;
 			iru.iru_count = tmp_pidx+1;
 			ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
 			refill_pidx = nic_i;
 			for (int n = 0; n < iru.iru_count; n++) {
 				bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i_dma],
 						BUS_DMASYNC_PREREAD);
 				/* XXX - change this to not use the netmap func*/
 				nic_i_dma = nm_next(nic_i_dma, lim);
 			}
 		}
 	}
 	kring->nr_hwcur = head;
 
 	bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	if (__predict_true(nic_i != UINT_MAX)) {
 		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
 		DBG_COUNTER_INC(rxd_flush);
 	}
 	return (0);
 }
 
 /*
  * Reconcile kernel and user view of the transmit ring.
  *
  * All information is in the kring.
  * Userspace wants to send packets up to the one before kring->rhead,
  * kernel knows kring->nr_hwcur is the first unsent packet.
  *
  * Here we push packets out (as many as possible), and possibly
  * reclaim buffers from previously completed transmission.
  *
  * The caller (netmap) guarantees that there is only one instance
  * running at any time. Any interference with other driver
  * methods should be handled by the individual drivers.
  */
 static int
 iflib_netmap_txsync(struct netmap_kring *kring, int flags)
 {
 	struct netmap_adapter *na = kring->na;
 	if_t ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i;	/* index into the netmap kring */
 	u_int nic_i;	/* index into the NIC ring */
 	u_int n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	struct if_pkt_info pi;
 
 	/*
 	 * interrupts on every tx packet are expensive so request
 	 * them every half ring, or where NS_REPORT is set
 	 */
 	u_int report_frequency = kring->nkr_num_slots >> 1;
 	/* device-specific */
 	if_ctx_t ctx = ifp->if_softc;
 	iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id];
 
 	bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
 	 * First part: process new packets to send.
 	 * nm_i is the current index in the netmap kring,
 	 * nic_i is the corresponding index in the NIC ring.
 	 *
 	 * If we have packets to send (nm_i != head)
 	 * iterate over the netmap ring, fetch length and update
 	 * the corresponding slot in the NIC ring. Some drivers also
 	 * need to update the buffer's physical address in the NIC slot
 	 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
 	 *
 	 * The netmap_reload_map() calls is especially expensive,
 	 * even when (as in this case) the tag is 0, so do only
 	 * when the buffer has actually changed.
 	 *
 	 * If possible do not set the report/intr bit on all slots,
 	 * but only a few times per ring or when NS_REPORT is set.
 	 *
 	 * Finally, on 10G and faster drivers, it might be useful
 	 * to prefetch the next slot and txr entry.
 	 */
 
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {	/* we have new packets to send */
 		pkt_info_zero(&pi);
 		pi.ipi_segs = txq->ift_segs;
 		pi.ipi_qsidx = kring->ring_id;
 		nic_i = netmap_idx_k2n(kring, nm_i);
 
 		__builtin_prefetch(&ring->slot[nm_i]);
 		__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]);
 		__builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
 
 		for (n = 0; nm_i != head; n++) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
 			u_int len = slot->len;
 			uint64_t paddr;
 			void *addr = PNMB(na, slot, &paddr);
 			int flags = (slot->flags & NS_REPORT ||
 				nic_i == 0 || nic_i == report_frequency) ?
 				IPI_TX_INTR : 0;
 
 			/* device-specific */
 			pi.ipi_len = len;
 			pi.ipi_segs[0].ds_addr = paddr;
 			pi.ipi_segs[0].ds_len = len;
 			pi.ipi_nsegs = 1;
 			pi.ipi_ndescs = 0;
 			pi.ipi_pidx = nic_i;
 			pi.ipi_flags = flags;
 
 			/* Fill the slot in the NIC ring. */
 			ctx->isc_txd_encap(ctx->ifc_softc, &pi);
 			DBG_COUNTER_INC(tx_encap);
 
 			/* prefetch for next round */
 			__builtin_prefetch(&ring->slot[nm_i + 1]);
 			__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]);
 			__builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
 
 			NM_CHECK_ADDR_LEN(na, addr, len);
 
 			if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
 				netmap_reload_map(na, txq->ift_buf_tag,
 				    txq->ift_sds.ifsd_map[nic_i], addr);
 			}
 			/* make sure changes to the buffer are synced */
 			bus_dmamap_sync(txq->ift_buf_tag,
 			    txq->ift_sds.ifsd_map[nic_i],
 			    BUS_DMASYNC_PREWRITE);
 
 			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
 			nm_i = nm_next(nm_i, lim);
 			nic_i = nm_next(nic_i, lim);
 		}
 		kring->nr_hwcur = nm_i;
 
 		/* synchronize the NIC ring */
 		bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		/* (re)start the tx unit up to slot nic_i (excluded) */
 		ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i);
 	}
 
 	/*
 	 * Second part: reclaim buffers for completed transmissions.
 	 *
 	 * If there are unclaimed buffers, attempt to reclaim them.
 	 * If none are reclaimed, and TX IRQs are not in use, do an initial
 	 * minimal delay, then trigger the tx handler which will spin in the
 	 * group task queue.
 	 */
 	if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) {
 		if (iflib_tx_credits_update(ctx, txq)) {
 			/* some tx completed, increment avail */
 			nic_i = txq->ift_cidx_processed;
 			kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
 		}
 	}
 	if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ))
 		if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) {
 			callout_reset_on(&txq->ift_timer, hz < 2000 ? 1 : hz / 1000,
 			    iflib_timer, txq, txq->ift_timer.c_cpu);
 	}
 	return (0);
 }
 
 /*
  * Reconcile kernel and user view of the receive ring.
  * Same as for the txsync, this routine must be efficient.
  * The caller guarantees a single invocations, but races against
  * the rest of the driver should be handled here.
  *
  * On call, kring->rhead is the first packet that userspace wants
  * to keep, and kring->rcur is the wakeup point.
  * The kernel has previously reported packets up to kring->rtail.
  *
  * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
  * of whether or not we received an interrupt.
  */
 static int
 iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
 	struct netmap_adapter *na = kring->na;
 	struct netmap_ring *ring = kring->ring;
 	if_t ifp = na->ifp;
 	iflib_fl_t fl;
 	uint32_t nm_i;	/* index into the netmap ring */
 	uint32_t nic_i;	/* index into the NIC ring */
 	u_int i, n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 	struct if_rxd_info ri;
 
 	if_ctx_t ctx = ifp->if_softc;
 	iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
 	if (head > lim)
 		return netmap_ring_reinit(kring);
 
 	/*
 	 * XXX netmap_fl_refill() only ever (re)fills free list 0 so far.
 	 */
 
 	for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) {
 		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	}
 
 	/*
 	 * First part: import newly received packets.
 	 *
 	 * nm_i is the index of the next free slot in the netmap ring,
 	 * nic_i is the index of the next received packet in the NIC ring,
 	 * and they may differ in case if_init() has been called while
 	 * in netmap mode. For the receive ring we have
 	 *
 	 *	nic_i = rxr->next_check;
 	 *	nm_i = kring->nr_hwtail (previous)
 	 * and
 	 *	nm_i == (nic_i + kring->nkr_hwofs) % ring_size
 	 *
 	 * rxr->next_check is set to 0 on a ring reinit
 	 */
 	if (netmap_no_pendintr || force_update) {
 		int crclen = iflib_crcstrip ? 0 : 4;
 		int error, avail;
 
 		for (i = 0; i < rxq->ifr_nfl; i++) {
 			fl = &rxq->ifr_fl[i];
 			nic_i = fl->ifl_cidx;
 			nm_i = netmap_idx_n2k(kring, nic_i);
 			avail = ctx->isc_rxd_available(ctx->ifc_softc,
 			    rxq->ifr_id, nic_i, USHRT_MAX);
 			for (n = 0; avail > 0; n++, avail--) {
 				rxd_info_zero(&ri);
 				ri.iri_frags = rxq->ifr_frags;
 				ri.iri_qsidx = kring->ring_id;
 				ri.iri_ifp = ctx->ifc_ifp;
 				ri.iri_cidx = nic_i;
 
 				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
 				ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen;
 				ring->slot[nm_i].flags = 0;
 				bus_dmamap_sync(fl->ifl_buf_tag,
 				    fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD);
 				nm_i = nm_next(nm_i, lim);
 				nic_i = nm_next(nic_i, lim);
 			}
 			if (n) { /* update the state variables */
 				if (netmap_no_pendintr && !force_update) {
 					/* diagnostics */
 					iflib_rx_miss ++;
 					iflib_rx_miss_bufs += n;
 				}
 				fl->ifl_cidx = nic_i;
 				kring->nr_hwtail = nm_i;
 			}
 			kring->nr_kflags &= ~NKR_PENDINTR;
 		}
 	}
 	/*
 	 * Second part: skip past packets that userspace has released.
 	 * (kring->nr_hwcur to head excluded),
 	 * and make the buffers available for reception.
 	 * As usual nm_i is the index in the netmap ring,
 	 * nic_i is the index in the NIC ring, and
 	 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
 	 */
 	/* XXX not sure how this will work with multiple free lists */
 	nm_i = kring->nr_hwcur;
 
 	return (netmap_fl_refill(rxq, kring, nm_i, false));
 }
 
 static void
 iflib_netmap_intr(struct netmap_adapter *na, int onoff)
 {
 	if_ctx_t ctx = na->ifp->if_softc;
 
 	CTX_LOCK(ctx);
 	if (onoff) {
 		IFDI_INTR_ENABLE(ctx);
 	} else {
 		IFDI_INTR_DISABLE(ctx);
 	}
 	CTX_UNLOCK(ctx);
 }
 
 
 static int
 iflib_netmap_attach(if_ctx_t ctx)
 {
 	struct netmap_adapter na;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 
 	bzero(&na, sizeof(na));
 
 	na.ifp = ctx->ifc_ifp;
 	na.na_flags = NAF_BDG_MAYSLEEP;
 	MPASS(ctx->ifc_softc_ctx.isc_ntxqsets);
 	MPASS(ctx->ifc_softc_ctx.isc_nrxqsets);
 
 	na.num_tx_desc = scctx->isc_ntxd[0];
 	na.num_rx_desc = scctx->isc_nrxd[0];
 	na.nm_txsync = iflib_netmap_txsync;
 	na.nm_rxsync = iflib_netmap_rxsync;
 	na.nm_register = iflib_netmap_register;
 	na.nm_intr = iflib_netmap_intr;
 	na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets;
 	na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets;
 	return (netmap_attach(&na));
 }
 
 static void
 iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq)
 {
 	struct netmap_adapter *na = NA(ctx->ifc_ifp);
 	struct netmap_slot *slot;
 
 	slot = netmap_reset(na, NR_TX, txq->ift_id, 0);
 	if (slot == NULL)
 		return;
 	for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
 
 		/*
 		 * In netmap mode, set the map for the packet buffer.
 		 * NOTE: Some drivers (not this one) also need to set
 		 * the physical buffer address in the NIC ring.
 		 * netmap_idx_n2k() maps a nic index, i, into the corresponding
 		 * netmap slot index, si
 		 */
 		int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i);
 		netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i],
 		    NMB(na, slot + si));
 	}
 }
 
 static void
 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
 {
 	struct netmap_adapter *na = NA(ctx->ifc_ifp);
 	struct netmap_kring *kring = na->rx_rings[rxq->ifr_id];
 	struct netmap_slot *slot;
 	uint32_t nm_i;
 
 	slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
 	if (slot == NULL)
 		return;
 	nm_i = netmap_idx_n2k(kring, 0);
 	netmap_fl_refill(rxq, kring, nm_i, true);
 }
 
 static void
 iflib_netmap_timer_adjust(if_ctx_t ctx, iflib_txq_t txq, uint32_t *reset_on)
 {
 	struct netmap_kring *kring;
 	uint16_t txqid;
 
 	txqid = txq->ift_id;
 	kring = NA(ctx->ifc_ifp)->tx_rings[txqid];
 
 	if (kring->nr_hwcur != nm_next(kring->nr_hwtail, kring->nkr_num_slots - 1)) {
 		bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 		    BUS_DMASYNC_POSTREAD);
 		if (ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false))
 			netmap_tx_irq(ctx->ifc_ifp, txqid);
 		if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) {
 			if (hz < 2000)
 				*reset_on = 1;
 			else
 				*reset_on = hz / 1000;
 		}
 	}
 }
 
 #define iflib_netmap_detach(ifp) netmap_detach(ifp)
 
 #else
 #define iflib_netmap_txq_init(ctx, txq)
 #define iflib_netmap_rxq_init(ctx, rxq)
 #define iflib_netmap_detach(ifp)
 
 #define iflib_netmap_attach(ctx) (0)
 #define netmap_rx_irq(ifp, qid, budget) (0)
 #define netmap_tx_irq(ifp, qid) do {} while (0)
 #define iflib_netmap_timer_adjust(ctx, txq, reset_on)
 #endif
 
 #if defined(__i386__) || defined(__amd64__)
 static __inline void
 prefetch(void *x)
 {
 	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
 }
 static __inline void
 prefetch2cachelines(void *x)
 {
 	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
 #if (CACHE_LINE_SIZE < 128)
 	__asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long)))));
 #endif
 }
 #else
 #define prefetch(x)
 #define prefetch2cachelines(x)
 #endif
 
 static void
 iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid)
 {
 	iflib_fl_t fl;
 
 	fl = &rxq->ifr_fl[flid];
 	iru->iru_paddrs = fl->ifl_bus_addrs;
 	iru->iru_vaddrs = &fl->ifl_vm_addrs[0];
 	iru->iru_idxs = fl->ifl_rxd_idxs;
 	iru->iru_qsidx = rxq->ifr_id;
 	iru->iru_buf_size = fl->ifl_buf_size;
 	iru->iru_flidx = fl->ifl_id;
 }
 
 static void
 _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
 {
 	if (err)
 		return;
 	*(bus_addr_t *) arg = segs[0].ds_addr;
 }
 
 int
 iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags)
 {
 	int err;
 	device_t dev = ctx->ifc_dev;
 
 	err = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
 				align, 0,		/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				1,			/* nsegments */
 				size,			/* maxsegsize */
 				BUS_DMA_ALLOCNOW,	/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&dma->idi_tag);
 	if (err) {
 		device_printf(dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, err);
 		goto fail_0;
 	}
 
 	err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map);
 	if (err) {
 		device_printf(dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 		    __func__, (uintmax_t)size, err);
 		goto fail_1;
 	}
 
 	dma->idi_paddr = IF_BAD_DMA;
 	err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr,
 	    size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT);
 	if (err || dma->idi_paddr == IF_BAD_DMA) {
 		device_printf(dev,
 		    "%s: bus_dmamap_load failed: %d\n",
 		    __func__, err);
 		goto fail_2;
 	}
 
 	dma->idi_size = size;
 	return (0);
 
 fail_2:
 	bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
 fail_1:
 	bus_dma_tag_destroy(dma->idi_tag);
 fail_0:
 	dma->idi_tag = NULL;
 
 	return (err);
 }
 
 int
 iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 
 	KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized"));
 
 	return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags));
 }
 
 int
 iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count)
 {
 	int i, err;
 	iflib_dma_info_t *dmaiter;
 
 	dmaiter = dmalist;
 	for (i = 0; i < count; i++, dmaiter++) {
 		if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0)
 			break;
 	}
 	if (err)
 		iflib_dma_free_multi(dmalist, i);
 	return (err);
 }
 
 void
 iflib_dma_free(iflib_dma_info_t dma)
 {
 	if (dma->idi_tag == NULL)
 		return;
 	if (dma->idi_paddr != IF_BAD_DMA) {
 		bus_dmamap_sync(dma->idi_tag, dma->idi_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->idi_tag, dma->idi_map);
 		dma->idi_paddr = IF_BAD_DMA;
 	}
 	if (dma->idi_vaddr != NULL) {
 		bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
 		dma->idi_vaddr = NULL;
 	}
 	bus_dma_tag_destroy(dma->idi_tag);
 	dma->idi_tag = NULL;
 }
 
 void
 iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count)
 {
 	int i;
 	iflib_dma_info_t *dmaiter = dmalist;
 
 	for (i = 0; i < count; i++, dmaiter++)
 		iflib_dma_free(*dmaiter);
 }
 
 #ifdef EARLY_AP_STARTUP
 static const int iflib_started = 1;
 #else
 /*
  * We used to abuse the smp_started flag to decide if the queues have been
  * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()).
  * That gave bad races, since the SYSINIT() runs strictly after smp_started
  * is set.  Run a SYSINIT() strictly after that to just set a usable
  * completion flag.
  */
 
 static int iflib_started;
 
 static void
 iflib_record_started(void *arg)
 {
 	iflib_started = 1;
 }
 
 SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST,
 	iflib_record_started, NULL);
 #endif
 
 static int
 iflib_fast_intr(void *arg)
 {
 	iflib_filter_info_t info = arg;
 	struct grouptask *gtask = info->ifi_task;
 	int result;
 
 	if (!iflib_started)
 		return (FILTER_STRAY);
 
 	DBG_COUNTER_INC(fast_intrs);
 	if (info->ifi_filter != NULL) {
 		result = info->ifi_filter(info->ifi_filter_arg);
 		if ((result & FILTER_SCHEDULE_THREAD) == 0)
 			return (result);
 	}
 
 	GROUPTASK_ENQUEUE(gtask);
 	return (FILTER_HANDLED);
 }
 
 static int
 iflib_fast_intr_rxtx(void *arg)
 {
 	iflib_filter_info_t info = arg;
 	struct grouptask *gtask = info->ifi_task;
 	if_ctx_t ctx;
 	iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx;
 	iflib_txq_t txq;
 	void *sc;
 	int i, cidx, result;
 	qidx_t txqid;
 	bool intr_enable, intr_legacy;
 
 	if (!iflib_started)
 		return (FILTER_STRAY);
 
 	DBG_COUNTER_INC(fast_intrs);
 	if (info->ifi_filter != NULL) {
 		result = info->ifi_filter(info->ifi_filter_arg);
 		if ((result & FILTER_SCHEDULE_THREAD) == 0)
 			return (result);
 	}
 
 	ctx = rxq->ifr_ctx;
 	sc = ctx->ifc_softc;
 	intr_enable = false;
 	intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY);
 	MPASS(rxq->ifr_ntxqirq);
 	for (i = 0; i < rxq->ifr_ntxqirq; i++) {
 		txqid = rxq->ifr_txqid[i];
 		txq = &ctx->ifc_txqs[txqid];
 		bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 		    BUS_DMASYNC_POSTREAD);
 		if (!ctx->isc_txd_credits_update(sc, txqid, false)) {
 			if (intr_legacy)
 				intr_enable = true;
 			else
 				IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
 			continue;
 		}
 		GROUPTASK_ENQUEUE(&txq->ift_task);
 	}
 	if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ)
 		cidx = rxq->ifr_cq_cidx;
 	else
 		cidx = rxq->ifr_fl[0].ifl_cidx;
 	if (iflib_rxd_avail(ctx, rxq, cidx, 1))
 		GROUPTASK_ENQUEUE(gtask);
 	else {
 		if (intr_legacy)
 			intr_enable = true;
 		else
 			IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
 		DBG_COUNTER_INC(rx_intr_enables);
 	}
 	if (intr_enable)
 		IFDI_INTR_ENABLE(ctx);
 	return (FILTER_HANDLED);
 }
 
 
 static int
 iflib_fast_intr_ctx(void *arg)
 {
 	iflib_filter_info_t info = arg;
 	struct grouptask *gtask = info->ifi_task;
 	int result;
 
 	if (!iflib_started)
 		return (FILTER_STRAY);
 
 	DBG_COUNTER_INC(fast_intrs);
 	if (info->ifi_filter != NULL) {
 		result = info->ifi_filter(info->ifi_filter_arg);
 		if ((result & FILTER_SCHEDULE_THREAD) == 0)
 			return (result);
 	}
 
 	GROUPTASK_ENQUEUE(gtask);
 	return (FILTER_HANDLED);
 }
 
 static int
 _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
 		 driver_filter_t filter, driver_intr_t handler, void *arg,
 		 const char *name)
 {
 	struct resource *res;
 	void *tag = NULL;
 	device_t dev = ctx->ifc_dev;
 	int flags, i, rc;
 
 	flags = RF_ACTIVE;
 	if (ctx->ifc_flags & IFC_LEGACY)
 		flags |= RF_SHAREABLE;
 	MPASS(rid < 512);
 	i = rid;
 	res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, flags);
 	if (res == NULL) {
 		device_printf(dev,
 		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
 		return (ENOMEM);
 	}
 	irq->ii_res = res;
 	KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL"));
 	rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET,
 						filter, handler, arg, &tag);
 	if (rc != 0) {
 		device_printf(dev,
 		    "failed to setup interrupt for rid %d, name %s: %d\n",
 					  rid, name ? name : "unknown", rc);
 		return (rc);
 	} else if (name)
 		bus_describe_intr(dev, res, tag, "%s", name);
 
 	irq->ii_tag = tag;
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Allocate DMA resources for TX buffers as well as memory for the TX
  *  mbuf map.  TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a
  *  iflib_sw_tx_desc_array structure, storing all the information that
  *  is needed to transmit a packet on the wire.  This is called only
  *  once at attach, setup is done every reset.
  *
  **********************************************************************/
 static int
 iflib_txsd_alloc(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
 	bus_size_t tsomaxsize;
 	int err, nsegments, ntsosegments;
 	bool tso;
 
 	nsegments = scctx->isc_tx_nsegments;
 	ntsosegments = scctx->isc_tx_tso_segments_max;
 	tsomaxsize = scctx->isc_tx_tso_size_max;
 	if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU)
 		tsomaxsize += sizeof(struct ether_vlan_header);
 	MPASS(scctx->isc_ntxd[0] > 0);
 	MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0);
 	MPASS(nsegments > 0);
 	if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) {
 		MPASS(ntsosegments > 0);
 		MPASS(sctx->isc_tso_maxsize >= tsomaxsize);
 	}
 
 	/*
 	 * Set up DMA tags for TX buffers.
 	 */
 	if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       sctx->isc_tx_maxsize,		/* maxsize */
 			       nsegments,	/* nsegments */
 			       sctx->isc_tx_maxsegsize,	/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txq->ift_buf_tag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err);
 		device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n",
 		    (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize);
 		goto fail;
 	}
 	tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0;
 	if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       tsomaxsize,		/* maxsize */
 			       ntsosegments,	/* nsegments */
 			       sctx->isc_tso_maxsegsize,/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txq->ift_tso_buf_tag))) {
 		device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n",
 		    err);
 		goto fail;
 	}
 
 	/* Allocate memory for the TX mbuf map. */
 	if (!(txq->ift_sds.ifsd_m =
 	    (struct mbuf **) malloc(sizeof(struct mbuf *) *
 	    scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX mbuf map memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 
 	/*
 	 * Create the DMA maps for TX buffers.
 	 */
 	if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc(
 	    sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset],
 	    M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
 		device_printf(dev,
 		    "Unable to allocate TX buffer DMA map memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 	if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc(
 	    sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset],
 	    M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
 		device_printf(dev,
 		    "Unable to allocate TSO TX buffer map memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 	for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) {
 		err = bus_dmamap_create(txq->ift_buf_tag, 0,
 		    &txq->ift_sds.ifsd_map[i]);
 		if (err != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 		if (!tso)
 			continue;
 		err = bus_dmamap_create(txq->ift_tso_buf_tag, 0,
 		    &txq->ift_sds.ifsd_tso_map[i]);
 		if (err != 0) {
 			device_printf(dev, "Unable to create TSO TX DMA map\n");
 			goto fail;
 		}
 	}
 	return (0);
 fail:
 	/* We free all, it handles case where we are in the middle */
 	iflib_tx_structures_free(ctx);
 	return (err);
 }
 
 static void
 iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i)
 {
 	bus_dmamap_t map;
 
 	if (txq->ift_sds.ifsd_map != NULL) {
 		map = txq->ift_sds.ifsd_map[i];
 		bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txq->ift_buf_tag, map);
 		bus_dmamap_destroy(txq->ift_buf_tag, map);
 		txq->ift_sds.ifsd_map[i] = NULL;
 	}
 
 	if (txq->ift_sds.ifsd_tso_map != NULL) {
 		map = txq->ift_sds.ifsd_tso_map[i];
 		bus_dmamap_sync(txq->ift_tso_buf_tag, map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txq->ift_tso_buf_tag, map);
 		bus_dmamap_destroy(txq->ift_tso_buf_tag, map);
 		txq->ift_sds.ifsd_tso_map[i] = NULL;
 	}
 }
 
 static void
 iflib_txq_destroy(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
 
 	for (int i = 0; i < txq->ift_size; i++)
 		iflib_txsd_destroy(ctx, txq, i);
 
 	if (txq->ift_br != NULL) {
 		ifmp_ring_free(txq->ift_br);
 		txq->ift_br = NULL;
 	}
 
 	mtx_destroy(&txq->ift_mtx);
 
 	if (txq->ift_sds.ifsd_map != NULL) {
 		free(txq->ift_sds.ifsd_map, M_IFLIB);
 		txq->ift_sds.ifsd_map = NULL;
 	}
 	if (txq->ift_sds.ifsd_tso_map != NULL) {
 		free(txq->ift_sds.ifsd_tso_map, M_IFLIB);
 		txq->ift_sds.ifsd_tso_map = NULL;
 	}
 	if (txq->ift_sds.ifsd_m != NULL) {
 		free(txq->ift_sds.ifsd_m, M_IFLIB);
 		txq->ift_sds.ifsd_m = NULL;
 	}
 	if (txq->ift_buf_tag != NULL) {
 		bus_dma_tag_destroy(txq->ift_buf_tag);
 		txq->ift_buf_tag = NULL;
 	}
 	if (txq->ift_tso_buf_tag != NULL) {
 		bus_dma_tag_destroy(txq->ift_tso_buf_tag);
 		txq->ift_tso_buf_tag = NULL;
 	}
 	if (txq->ift_ifdi != NULL) {
 		free(txq->ift_ifdi, M_IFLIB);
 	}
 }
 
 static void
 iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
 {
 	struct mbuf **mp;
 
 	mp = &txq->ift_sds.ifsd_m[i];
 	if (*mp == NULL)
 		return;
 
 	if (txq->ift_sds.ifsd_map != NULL) {
 		bus_dmamap_sync(txq->ift_buf_tag,
 		    txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]);
 	}
 	if (txq->ift_sds.ifsd_tso_map != NULL) {
 		bus_dmamap_sync(txq->ift_tso_buf_tag,
 		    txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txq->ift_tso_buf_tag,
 		    txq->ift_sds.ifsd_tso_map[i]);
 	}
 	m_free(*mp);
 	DBG_COUNTER_INC(tx_frees);
 	*mp = NULL;
 }
 
 static int
 iflib_txq_setup(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	iflib_dma_info_t di;
 	int i;
 
 	/* Set number of descriptors available */
 	txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 	/* XXX make configurable */
 	txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ;
 
 	/* Reset indices */
 	txq->ift_cidx_processed = 0;
 	txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
 	txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
 
 	for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++)
 		bzero((void *)di->idi_vaddr, di->idi_size);
 
 	IFDI_TXQ_SETUP(ctx, txq->ift_id);
 	for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++)
 		bus_dmamap_sync(di->idi_tag, di->idi_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Allocate DMA resources for RX buffers as well as memory for the RX
  *  mbuf map, direct RX cluster pointer map and RX cluster bus address
  *  map.  RX DMA map, RX mbuf map, direct RX cluster pointer map and
  *  RX cluster map are kept in a iflib_sw_rx_desc_array structure.
  *  Since we use use one entry in iflib_sw_rx_desc_array per received
  *  packet, the maximum number of entries we'll need is equal to the
  *  number of hardware receive descriptors that we've allocated.
  *
  **********************************************************************/
 static int
 iflib_rxsd_alloc(iflib_rxq_t rxq)
 {
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
 	iflib_fl_t fl;
 	int			err;
 
 	MPASS(scctx->isc_nrxd[0] > 0);
 	MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0);
 
 	fl = rxq->ifr_fl;
 	for (int i = 0; i <  rxq->ifr_nfl; i++, fl++) {
 		fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
 		/* Set up DMA tag for RX buffers. */
 		err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 					 1, 0,			/* alignment, bounds */
 					 BUS_SPACE_MAXADDR,	/* lowaddr */
 					 BUS_SPACE_MAXADDR,	/* highaddr */
 					 NULL, NULL,		/* filter, filterarg */
 					 sctx->isc_rx_maxsize,	/* maxsize */
 					 sctx->isc_rx_nsegments,	/* nsegments */
 					 sctx->isc_rx_maxsegsize,	/* maxsegsize */
 					 0,			/* flags */
 					 NULL,			/* lockfunc */
 					 NULL,			/* lockarg */
 					 &fl->ifl_buf_tag);
 		if (err) {
 			device_printf(dev,
 			    "Unable to allocate RX DMA tag: %d\n", err);
 			goto fail;
 		}
 
 		/* Allocate memory for the RX mbuf map. */
 		if (!(fl->ifl_sds.ifsd_m =
 		      (struct mbuf **) malloc(sizeof(struct mbuf *) *
 					      scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev,
 			    "Unable to allocate RX mbuf map memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 
 		/* Allocate memory for the direct RX cluster pointer map. */
 		if (!(fl->ifl_sds.ifsd_cl =
 		      (caddr_t *) malloc(sizeof(caddr_t) *
 					      scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev,
 			    "Unable to allocate RX cluster map memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 
 		/* Allocate memory for the RX cluster bus address map. */
 		if (!(fl->ifl_sds.ifsd_ba =
 		      (bus_addr_t *) malloc(sizeof(bus_addr_t) *
 					      scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev,
 			    "Unable to allocate RX bus address map memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 
 		/*
 		 * Create the DMA maps for RX buffers.
 		 */
 		if (!(fl->ifl_sds.ifsd_map =
 		      (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev,
 			    "Unable to allocate RX buffer DMA map memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 		for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) {
 			err = bus_dmamap_create(fl->ifl_buf_tag, 0,
 			    &fl->ifl_sds.ifsd_map[i]);
 			if (err != 0) {
 				device_printf(dev, "Unable to create RX buffer DMA map\n");
 				goto fail;
 			}
 		}
 	}
 	return (0);
 
 fail:
 	iflib_rx_structures_free(ctx);
 	return (err);
 }
 
 
 /*
  * Internal service routines
  */
 
 struct rxq_refill_cb_arg {
 	int               error;
 	bus_dma_segment_t seg;
 	int               nseg;
 };
 
 static void
 _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct rxq_refill_cb_arg *cb_arg = arg;
 
 	cb_arg->error = error;
 	cb_arg->seg = segs[0];
 	cb_arg->nseg = nseg;
 }
 
 /**
  * _iflib_fl_refill - refill an rxq free-buffer list
  * @ctx: the iflib context
  * @fl: the free list to refill
  * @count: the number of new buffers to allocate
  *
  * (Re)populate an rxq free-buffer list with up to @count new packet buffers.
  * The caller must assure that @count does not exceed the queue's capacity.
  */
 static uint8_t
 _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
 {
 	struct if_rxd_update iru;
 	struct rxq_refill_cb_arg cb_arg;
 	struct mbuf *m;
 	caddr_t cl, *sd_cl;
 	struct mbuf **sd_m;
 	bus_dmamap_t *sd_map;
 	bus_addr_t bus_addr, *sd_ba;
 	int err, frag_idx, i, idx, n, pidx;
 	qidx_t credits;
 
 	sd_m = fl->ifl_sds.ifsd_m;
 	sd_map = fl->ifl_sds.ifsd_map;
 	sd_cl = fl->ifl_sds.ifsd_cl;
 	sd_ba = fl->ifl_sds.ifsd_ba;
 	pidx = fl->ifl_pidx;
 	idx = pidx;
 	frag_idx = fl->ifl_fragidx;
 	credits = fl->ifl_credits;
 
 	i = 0;
 	n = count;
 	MPASS(n > 0);
 	MPASS(credits + n <= fl->ifl_size);
 
 	if (pidx < fl->ifl_cidx)
 		MPASS(pidx + n <= fl->ifl_cidx);
 	if (pidx == fl->ifl_cidx && (credits < fl->ifl_size))
 		MPASS(fl->ifl_gen == 0);
 	if (pidx > fl->ifl_cidx)
 		MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx);
 
 	DBG_COUNTER_INC(fl_refills);
 	if (n > 8)
 		DBG_COUNTER_INC(fl_refills_large);
 	iru_init(&iru, fl->ifl_rxq, fl->ifl_id);
 	while (n--) {
 		/*
 		 * We allocate an uninitialized mbuf + cluster, mbuf is
 		 * initialized after rx.
 		 *
 		 * If the cluster is still set then we know a minimum sized packet was received
 		 */
 		bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size,
 		    &frag_idx);
 		if (frag_idx < 0)
 			bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx);
 		MPASS(frag_idx >= 0);
 		if ((cl = sd_cl[frag_idx]) == NULL) {
 			if ((cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
 				break;
 
 			cb_arg.error = 0;
 			MPASS(sd_map != NULL);
 			err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx],
 			    cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg,
 			    BUS_DMA_NOWAIT);
 			if (err != 0 || cb_arg.error) {
 				/*
 				 * !zone_pack ?
 				 */
 				if (fl->ifl_zone == zone_pack)
 					uma_zfree(fl->ifl_zone, cl);
 				break;
 			}
 
 			sd_ba[frag_idx] =  bus_addr = cb_arg.seg.ds_addr;
 			sd_cl[frag_idx] = cl;
 #if MEMORY_LOGGING
 			fl->ifl_cl_enqueued++;
 #endif
 		} else {
 			bus_addr = sd_ba[frag_idx];
 		}
 		bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx],
 		    BUS_DMASYNC_PREREAD);
 
 		if (sd_m[frag_idx] == NULL) {
 			if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
 				break;
 			}
 			sd_m[frag_idx] = m;
 		}
 		bit_set(fl->ifl_rx_bitmap, frag_idx);
 #if MEMORY_LOGGING
 		fl->ifl_m_enqueued++;
 #endif
 
 		DBG_COUNTER_INC(rx_allocs);
 		fl->ifl_rxd_idxs[i] = frag_idx;
 		fl->ifl_bus_addrs[i] = bus_addr;
 		fl->ifl_vm_addrs[i] = cl;
 		credits++;
 		i++;
 		MPASS(credits <= fl->ifl_size);
 		if (++idx == fl->ifl_size) {
 			fl->ifl_gen = 1;
 			idx = 0;
 		}
 		if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
 			iru.iru_pidx = pidx;
 			iru.iru_count = i;
 			ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
 			i = 0;
 			pidx = idx;
 			fl->ifl_pidx = idx;
 			fl->ifl_credits = credits;
 		}
 	}
 
 	if (i) {
 		iru.iru_pidx = pidx;
 		iru.iru_count = i;
 		ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
 		fl->ifl_pidx = idx;
 		fl->ifl_credits = credits;
 	}
 	DBG_COUNTER_INC(rxd_flush);
 	if (fl->ifl_pidx == 0)
 		pidx = fl->ifl_size - 1;
 	else
 		pidx = fl->ifl_pidx - 1;
 
 	bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
 	fl->ifl_fragidx = frag_idx;
 
 	return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY);
 }
 
 static __inline uint8_t
 __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max)
 {
 	/* we avoid allowing pidx to catch up with cidx as it confuses ixl */
 	int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1;
 #ifdef INVARIANTS
 	int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1;
 #endif
 
 	MPASS(fl->ifl_credits <= fl->ifl_size);
 	MPASS(reclaimable == delta);
 
 	if (reclaimable > 0)
 		return (_iflib_fl_refill(ctx, fl, min(max, reclaimable)));
 	return (0);
 }
 
 uint8_t
 iflib_in_detach(if_ctx_t ctx)
 {
 	bool in_detach;
 
 	STATE_LOCK(ctx);
 	in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH);
 	STATE_UNLOCK(ctx);
 	return (in_detach);
 }
 
 static void
 iflib_fl_bufs_free(iflib_fl_t fl)
 {
 	iflib_dma_info_t idi = fl->ifl_ifdi;
 	bus_dmamap_t sd_map;
 	uint32_t i;
 
 	for (i = 0; i < fl->ifl_size; i++) {
 		struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i];
 		caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i];
 
 		if (*sd_cl != NULL) {
 			sd_map = fl->ifl_sds.ifsd_map[i];
 			bus_dmamap_sync(fl->ifl_buf_tag, sd_map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(fl->ifl_buf_tag, sd_map);
 			if (*sd_cl != NULL)
 				uma_zfree(fl->ifl_zone, *sd_cl);
 			if (*sd_m != NULL) {
 				m_init(*sd_m, M_NOWAIT, MT_DATA, 0);
 				uma_zfree(zone_mbuf, *sd_m);
 			}
 		} else {
 			MPASS(*sd_cl == NULL);
 			MPASS(*sd_m == NULL);
 		}
 #if MEMORY_LOGGING
 		fl->ifl_m_dequeued++;
 		fl->ifl_cl_dequeued++;
 #endif
 		*sd_cl = NULL;
 		*sd_m = NULL;
 	}
 #ifdef INVARIANTS
 	for (i = 0; i < fl->ifl_size; i++) {
 		MPASS(fl->ifl_sds.ifsd_cl[i] == NULL);
 		MPASS(fl->ifl_sds.ifsd_m[i] == NULL);
 	}
 #endif
 	/*
 	 * Reset free list values
 	 */
 	fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0;
 	bzero(idi->idi_vaddr, idi->idi_size);
 }
 
 /*********************************************************************
  *
  *  Initialize a free list and its buffers.
  *
  **********************************************************************/
 static int
 iflib_fl_setup(iflib_fl_t fl)
 {
 	iflib_rxq_t rxq = fl->ifl_rxq;
 	if_ctx_t ctx = rxq->ifr_ctx;
 
 	bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1);
 	/*
 	** Free current RX buffer structs and their mbufs
 	*/
 	iflib_fl_bufs_free(fl);
 	/* Now replenish the mbufs */
 	MPASS(fl->ifl_credits == 0);
 	fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz;
 	if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
 		ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
 	fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
 	fl->ifl_zone = m_getzone(fl->ifl_buf_size);
 
 
 	/* avoid pre-allocating zillions of clusters to an idle card
 	 * potentially speeding up attach
 	 */
 	(void) _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size));
 	MPASS(min(128, fl->ifl_size) == fl->ifl_credits);
 	if (min(128, fl->ifl_size) != fl->ifl_credits)
 		return (ENOBUFS);
 	/*
 	 * handle failure
 	 */
 	MPASS(rxq != NULL);
 	MPASS(fl->ifl_ifdi != NULL);
 	bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Free receive ring data structures
  *
  **********************************************************************/
 static void
 iflib_rx_sds_free(iflib_rxq_t rxq)
 {
 	iflib_fl_t fl;
 	int i, j;
 
 	if (rxq->ifr_fl != NULL) {
 		for (i = 0; i < rxq->ifr_nfl; i++) {
 			fl = &rxq->ifr_fl[i];
 			if (fl->ifl_buf_tag != NULL) {
 				if (fl->ifl_sds.ifsd_map != NULL) {
 					for (j = 0; j < fl->ifl_size; j++) {
 						bus_dmamap_sync(
 						    fl->ifl_buf_tag,
 						    fl->ifl_sds.ifsd_map[j],
 						    BUS_DMASYNC_POSTREAD);
 						bus_dmamap_unload(
 						    fl->ifl_buf_tag,
 						    fl->ifl_sds.ifsd_map[j]);
 						bus_dmamap_destroy(
 						    fl->ifl_buf_tag,
 						    fl->ifl_sds.ifsd_map[j]);
 					}
 				}
 				bus_dma_tag_destroy(fl->ifl_buf_tag);
 				fl->ifl_buf_tag = NULL;
 			}
 			free(fl->ifl_sds.ifsd_m, M_IFLIB);
 			free(fl->ifl_sds.ifsd_cl, M_IFLIB);
 			free(fl->ifl_sds.ifsd_ba, M_IFLIB);
 			free(fl->ifl_sds.ifsd_map, M_IFLIB);
 			fl->ifl_sds.ifsd_m = NULL;
 			fl->ifl_sds.ifsd_cl = NULL;
 			fl->ifl_sds.ifsd_ba = NULL;
 			fl->ifl_sds.ifsd_map = NULL;
 		}
 		free(rxq->ifr_fl, M_IFLIB);
 		rxq->ifr_fl = NULL;
 		free(rxq->ifr_ifdi, M_IFLIB);
 		rxq->ifr_ifdi = NULL;
 		rxq->ifr_cq_cidx = 0;
 	}
 }
 
 /*
  * Timer routine
  */
 static void
 iflib_timer(void *arg)
 {
 	iflib_txq_t txq = arg;
 	if_ctx_t ctx = txq->ift_ctx;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 	uint64_t this_tick = ticks;
 	uint32_t reset_on = hz / 2;
 
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
 
 	/*
 	** Check on the state of the TX queue(s), this
 	** can be done without the lock because its RO
 	** and the HUNG state will be static if set.
 	*/
 	if (this_tick - txq->ift_last_timer_tick >= hz / 2) {
 		txq->ift_last_timer_tick = this_tick;
 		IFDI_TIMER(ctx, txq->ift_id);
 		if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
 		    ((txq->ift_cleaned_prev == txq->ift_cleaned) ||
 		     (sctx->isc_pause_frames == 0)))
 			goto hung;
 
 		if (txq->ift_qstatus != IFLIB_QUEUE_IDLE &&
 		    ifmp_ring_is_stalled(txq->ift_br)) {
 			KASSERT(ctx->ifc_link_state == LINK_STATE_UP, ("queue can't be marked as hung if interface is down"));
 			txq->ift_qstatus = IFLIB_QUEUE_HUNG;
 		}
 		txq->ift_cleaned_prev = txq->ift_cleaned;
 	}
 #ifdef DEV_NETMAP
 	if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP)
 		iflib_netmap_timer_adjust(ctx, txq, &reset_on);
 #endif
 	/* handle any laggards */
 	if (txq->ift_db_pending)
 		GROUPTASK_ENQUEUE(&txq->ift_task);
 
 	sctx->isc_pause_frames = 0;
 	if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 
 		callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu);
 	return;
 
  hung:
 	device_printf(ctx->ifc_dev,
 	    "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n",
 	    txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
 	STATE_LOCK(ctx);
 	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 	ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET);
 	iflib_admin_intr_deferred(ctx);
 	STATE_UNLOCK(ctx);
 }
 
 static void
 iflib_calc_rx_mbuf_sz(if_ctx_t ctx)
 {
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 
 	/*
 	 * XXX don't set the max_frame_size to larger
 	 * than the hardware can handle
 	 */
 	if (sctx->isc_max_frame_size <= MCLBYTES)
 		ctx->ifc_rx_mbuf_sz = MCLBYTES;
 	else
 		ctx->ifc_rx_mbuf_sz = MJUMPAGESIZE;
 }
 
 uint32_t
 iflib_get_rx_mbuf_sz(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_rx_mbuf_sz);
 }
 
 static void
 iflib_init_locked(if_ctx_t ctx)
 {
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	if_t ifp = ctx->ifc_ifp;
 	iflib_fl_t fl;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
 
 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 	IFDI_INTR_DISABLE(ctx);
 
 	tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
 	tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
 	/* Set hardware offload abilities */
 	if_clearhwassist(ifp);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 		if_sethwassistbits(ifp, tx_ip_csum_flags, 0);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
 		if_sethwassistbits(ifp,  tx_ip6_csum_flags, 0);
 	if (if_getcapenable(ifp) & IFCAP_TSO4)
 		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
 	if (if_getcapenable(ifp) & IFCAP_TSO6)
 		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
 
 	for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		CALLOUT_UNLOCK(txq);
 		iflib_netmap_txq_init(ctx, txq);
 	}
 
 	/*
 	 * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so
 	 * that drivers can use the value when setting up the hardware receive
 	 * buffers.
 	 */
 	iflib_calc_rx_mbuf_sz(ctx);
 
 #ifdef INVARIANTS
 	i = if_getdrvflags(ifp);
 #endif
 	IFDI_INIT(ctx);
 	MPASS(if_getdrvflags(ifp) == i);
 	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 		/* XXX this should really be done on a per-queue basis */
 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
 			MPASS(rxq->ifr_id == i);
 			iflib_netmap_rxq_init(ctx, rxq);
 			continue;
 		}
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			if (iflib_fl_setup(fl)) {
 				device_printf(ctx->ifc_dev,
 				    "setting up free list %d failed - "
 				    "check cluster settings\n", j);
 				goto done;
 			}
 		}
 	}
 done:
 	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 	IFDI_INTR_ENABLE(ctx);
 	txq = ctx->ifc_txqs;
 	for (i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
 			txq->ift_timer.c_cpu);
 }
 
 static int
 iflib_media_change(if_t ifp)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 	int err;
 
 	CTX_LOCK(ctx);
 	if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0)
 		iflib_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 	return (err);
 }
 
 static void
 iflib_media_status(if_t ifp, struct ifmediareq *ifmr)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	CTX_LOCK(ctx);
 	IFDI_UPDATE_ADMIN_STATUS(ctx);
 	IFDI_MEDIA_STATUS(ctx, ifmr);
 	CTX_UNLOCK(ctx);
 }
 
 void
 iflib_stop(if_ctx_t ctx)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	iflib_dma_info_t di;
 	iflib_fl_t fl;
 	int i, j;
 
 	/* Tell the stack that the interface is no longer active */
 	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
 	IFDI_INTR_DISABLE(ctx);
 	DELAY(1000);
 	IFDI_STOP(ctx);
 	DELAY(1000);
 
 	iflib_debug_reset();
 	/* Wait for current tx queue users to exit to disarm watchdog timer. */
 	for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
 		/* make sure all transmitters have completed before proceeding XXX */
 
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		CALLOUT_UNLOCK(txq);
 
 		/* clean any enqueued buffers */
 		iflib_ifmp_purge(txq);
 		/* Free any existing tx buffers. */
 		for (j = 0; j < txq->ift_size; j++) {
 			iflib_txsd_free(ctx, txq, j);
 		}
 		txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
 		txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
 		txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
 		txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
 		txq->ift_pullups = 0;
 		ifmp_ring_reset_stats(txq->ift_br);
 		for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++)
 			bzero((void *)di->idi_vaddr, di->idi_size);
 	}
 	for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
 		/* make sure all transmitters have completed before proceeding XXX */
 
 		rxq->ifr_cq_cidx = 0;
 		for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++)
 			bzero((void *)di->idi_vaddr, di->idi_size);
 		/* also resets the free lists pidx/cidx */
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
 			iflib_fl_bufs_free(fl);
 	}
 }
 
 static inline caddr_t
 calc_next_rxd(iflib_fl_t fl, int cidx)
 {
 	qidx_t size;
 	int nrxd;
 	caddr_t start, end, cur, next;
 
 	nrxd = fl->ifl_size;
 	size = fl->ifl_rxd_size;
 	start = fl->ifl_ifdi->idi_vaddr;
 
 	if (__predict_false(size == 0))
 		return (start);
 	cur = start + size*cidx;
 	end = start + size*nrxd;
 	next = CACHE_PTR_NEXT(cur);
 	return (next < end ? next : start);
 }
 
 static inline void
 prefetch_pkts(iflib_fl_t fl, int cidx)
 {
 	int nextptr;
 	int nrxd = fl->ifl_size;
 	caddr_t next_rxd;
 
 
 	nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1);
 	prefetch(&fl->ifl_sds.ifsd_m[nextptr]);
 	prefetch(&fl->ifl_sds.ifsd_cl[nextptr]);
 	next_rxd = calc_next_rxd(fl, cidx);
 	prefetch(next_rxd);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]);
 }
 
 static struct mbuf *
 rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd,
     int *pf_rv, if_rxd_info_t ri)
 {
 	bus_dmamap_t map;
 	iflib_fl_t fl;
 	caddr_t payload;
 	struct mbuf *m;
 	int flid, cidx, len, next;
 
 	map = NULL;
 	flid = irf->irf_flid;
 	cidx = irf->irf_idx;
 	fl = &rxq->ifr_fl[flid];
 	sd->ifsd_fl = fl;
 	sd->ifsd_cidx = cidx;
 	m = fl->ifl_sds.ifsd_m[cidx];
 	sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx];
 	fl->ifl_credits--;
 #if MEMORY_LOGGING
 	fl->ifl_m_dequeued++;
 #endif
 	if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH)
 		prefetch_pkts(fl, cidx);
 	next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1);
 	prefetch(&fl->ifl_sds.ifsd_map[next]);
 	map = fl->ifl_sds.ifsd_map[cidx];
 	next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1);
 
 	/* not valid assert if bxe really does SGE from non-contiguous elements */
 	MPASS(fl->ifl_cidx == cidx);
 	bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD);
 
 	if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL) {
 		payload  = *sd->ifsd_cl;
 		payload +=  ri->iri_pad;
 		len = ri->iri_len - ri->iri_pad;
 		*pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp,
 		    len | PFIL_MEMPTR | PFIL_IN, NULL);
 		switch (*pf_rv) {
 		case PFIL_DROPPED:
 		case PFIL_CONSUMED:
 			/*
 			 * The filter ate it.  Everything is recycled.
 			 */
 			m = NULL;
 			unload = 0;
 			break;
 		case PFIL_REALLOCED:
 			/*
 			 * The filter copied it.  Everything is recycled.
 			 */
 			m = pfil_mem2mbuf(payload);
 			unload = 0;
 			break;
 		case PFIL_PASS:
 			/*
 			 * Filter said it was OK, so receive like
 			 * normal
 			 */
 			fl->ifl_sds.ifsd_m[cidx] = NULL;
 			break;
 		default:
 			MPASS(0);
 		}
 	} else {
 		fl->ifl_sds.ifsd_m[cidx] = NULL;
 		*pf_rv = PFIL_PASS;
 	}
 
 	if (unload)
 		bus_dmamap_unload(fl->ifl_buf_tag, map);
 	fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1);
 	if (__predict_false(fl->ifl_cidx == 0))
 		fl->ifl_gen = 0;
 	bit_clear(fl->ifl_rx_bitmap, cidx);
 	return (m);
 }
 
 static struct mbuf *
 assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv)
 {
 	struct mbuf *m, *mh, *mt;
 	caddr_t cl;
 	int  *pf_rv_ptr, flags, i, padlen;
 	bool consumed;
 
 	i = 0;
 	mh = NULL;
 	consumed = false;
 	*pf_rv = PFIL_PASS;
 	pf_rv_ptr = pf_rv;
 	do {
 		m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd,
 		    pf_rv_ptr, ri);
 
 		MPASS(*sd->ifsd_cl != NULL);
 
 		/*
 		 * Exclude zero-length frags & frags from
 		 * packets the filter has consumed or dropped
 		 */
 		if (ri->iri_frags[i].irf_len == 0 || consumed ||
 		    *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED) {
 			if (mh == NULL) {
 				/* everything saved here */
 				consumed = true;
 				pf_rv_ptr = NULL;
 				continue;
 			}
 			/* XXX we can save the cluster here, but not the mbuf */
 			m_init(m, M_NOWAIT, MT_DATA, 0);
 			m_free(m);
 			continue;
 		}
 		if (mh == NULL) {
 			flags = M_PKTHDR|M_EXT;
 			mh = mt = m;
 			padlen = ri->iri_pad;
 		} else {
 			flags = M_EXT;
 			mt->m_next = m;
 			mt = m;
 			/* assuming padding is only on the first fragment */
 			padlen = 0;
 		}
 		cl = *sd->ifsd_cl;
 		*sd->ifsd_cl = NULL;
 
 		/* Can these two be made one ? */
 		m_init(m, M_NOWAIT, MT_DATA, flags);
 		m_cljset(m, cl, sd->ifsd_fl->ifl_cltype);
 		/*
 		 * These must follow m_init and m_cljset
 		 */
 		m->m_data += padlen;
 		ri->iri_len -= padlen;
 		m->m_len = ri->iri_frags[i].irf_len;
 	} while (++i < ri->iri_nfrags);
 
 	return (mh);
 }
 
 /*
  * Process one software descriptor
  */
 static struct mbuf *
 iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
 {
 	struct if_rxsd sd;
 	struct mbuf *m;
 	int pf_rv;
 
 	/* should I merge this back in now that the two paths are basically duplicated? */
 	if (ri->iri_nfrags == 1 &&
 	    ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) {
 		m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd,
 		    &pf_rv, ri);
 		if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED)
 			return (m);
 		if (pf_rv == PFIL_PASS) {
 			m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
 #ifndef __NO_STRICT_ALIGNMENT
 			if (!IP_ALIGNED(m))
 				m->m_data += 2;
 #endif
 			memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len);
 			m->m_len = ri->iri_frags[0].irf_len;
 		}
 	} else {
 		m = assemble_segments(rxq, ri, &sd, &pf_rv);
 		if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED)
 			return (m);
 	}
 	m->m_pkthdr.len = ri->iri_len;
 	m->m_pkthdr.rcvif = ri->iri_ifp;
 	m->m_flags |= ri->iri_flags;
 	m->m_pkthdr.ether_vtag = ri->iri_vtag;
 	m->m_pkthdr.flowid = ri->iri_flowid;
 	M_HASHTYPE_SET(m, ri->iri_rsstype);
 	m->m_pkthdr.csum_flags = ri->iri_csum_flags;
 	m->m_pkthdr.csum_data = ri->iri_csum_data;
 	return (m);
 }
 
 #if defined(INET6) || defined(INET)
 static void
 iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6)
 {
 	CURVNET_SET(lc->ifp->if_vnet);
 #if defined(INET6)
 	*v6 = V_ip6_forwarding;
 #endif
 #if defined(INET)
 	*v4 = V_ipforwarding;
 #endif
 	CURVNET_RESTORE();
 }
 
 /*
  * Returns true if it's possible this packet could be LROed.
  * if it returns false, it is guaranteed that tcp_lro_rx()
  * would not return zero.
  */
 static bool
 iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding)
 {
 	struct ether_header *eh;
 
 	eh = mtod(m, struct ether_header *);
 	switch (eh->ether_type) {
 #if defined(INET6)
 		case htons(ETHERTYPE_IPV6):
 			return (!v6_forwarding);
 #endif
 #if defined (INET)
 		case htons(ETHERTYPE_IP):
 			return (!v4_forwarding);
 #endif
 	}
 
 	return false;
 }
 #else
 static void
 iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused)
 {
 }
 #endif
 
 static void
 _task_fn_rx_watchdog(void *context)
 {
 	iflib_rxq_t rxq = context;
 
 	GROUPTASK_ENQUEUE(&rxq->ifr_task);
 }
 
 static uint8_t
 iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
 {
 	if_t ifp;
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	int avail, i;
 	qidx_t *cidxp;
 	struct if_rxd_info ri;
 	int err, budget_left, rx_bytes, rx_pkts;
 	iflib_fl_t fl;
 	int lro_enabled;
 	bool v4_forwarding, v6_forwarding, lro_possible;
 	uint8_t retval = 0;
 
 	/*
 	 * XXX early demux data packets so that if_input processing only handles
 	 * acks in interrupt context
 	 */
 	struct mbuf *m, *mh, *mt, *mf;
 
 	NET_EPOCH_ASSERT();
 
 	lro_possible = v4_forwarding = v6_forwarding = false;
 	ifp = ctx->ifc_ifp;
 	mh = mt = NULL;
 	MPASS(budget > 0);
 	rx_pkts	= rx_bytes = 0;
 	if (sctx->isc_flags & IFLIB_HAS_RXCQ)
 		cidxp = &rxq->ifr_cq_cidx;
 	else
 		cidxp = &rxq->ifr_fl[0].ifl_cidx;
 	if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) {
 		for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
 			retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8);
 		DBG_COUNTER_INC(rx_unavail);
 		return (retval);
 	}
 
 	/* pfil needs the vnet to be set */
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	for (budget_left = budget; budget_left > 0 && avail > 0;) {
 		if (__predict_false(!CTX_ACTIVE(ctx))) {
 			DBG_COUNTER_INC(rx_ctx_inactive);
 			break;
 		}
 		/*
 		 * Reset client set fields to their default values
 		 */
 		rxd_info_zero(&ri);
 		ri.iri_qsidx = rxq->ifr_id;
 		ri.iri_cidx = *cidxp;
 		ri.iri_ifp = ifp;
 		ri.iri_frags = rxq->ifr_frags;
 		err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
 
 		if (err)
 			goto err;
 		rx_pkts += 1;
 		rx_bytes += ri.iri_len;
 		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 			*cidxp = ri.iri_cidx;
 			/* Update our consumer index */
 			/* XXX NB: shurd - check if this is still safe */
 			while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0])
 				rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
 			/* was this only a completion queue message? */
 			if (__predict_false(ri.iri_nfrags == 0))
 				continue;
 		}
 		MPASS(ri.iri_nfrags != 0);
 		MPASS(ri.iri_len != 0);
 
 		/* will advance the cidx on the corresponding free lists */
 		m = iflib_rxd_pkt_get(rxq, &ri);
 		avail--;
 		budget_left--;
 		if (avail == 0 && budget_left)
 			avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left);
 
 		if (__predict_false(m == NULL))
 			continue;
 
 		/* imm_pkt: -- cxgb */
 		if (mh == NULL)
 			mh = mt = m;
 		else {
 			mt->m_nextpkt = m;
 			mt = m;
 		}
 	}
 	CURVNET_RESTORE();
 	/* make sure that we can refill faster than drain */
 	for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
 		retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8);
 
 	lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
 	if (lro_enabled)
 		iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding);
 	mt = mf = NULL;
 	while (mh != NULL) {
 		m = mh;
 		mh = mh->m_nextpkt;
 		m->m_nextpkt = NULL;
 #ifndef __NO_STRICT_ALIGNMENT
 		if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL)
 			continue;
 #endif
 		rx_bytes += m->m_pkthdr.len;
 		rx_pkts++;
 #if defined(INET6) || defined(INET)
 		if (lro_enabled) {
 			if (!lro_possible) {
 				lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding);
 				if (lro_possible && mf != NULL) {
 					ifp->if_input(ifp, mf);
 					DBG_COUNTER_INC(rx_if_input);
 					mt = mf = NULL;
 				}
 			}
 			if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) ==
 			    (CSUM_L4_CALC|CSUM_L4_VALID)) {
 				if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
 					continue;
 			}
 		}
 #endif
 		if (lro_possible) {
 			ifp->if_input(ifp, m);
 			DBG_COUNTER_INC(rx_if_input);
 			continue;
 		}
 
 		if (mf == NULL)
 			mf = m;
 		if (mt != NULL)
 			mt->m_nextpkt = m;
 		mt = m;
 	}
 	if (mf != NULL) {
 		ifp->if_input(ifp, mf);
 		DBG_COUNTER_INC(rx_if_input);
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
 
 	/*
 	 * Flush any outstanding LRO work
 	 */
 #if defined(INET6) || defined(INET)
 	tcp_lro_flush_all(&rxq->ifr_lc);
 #endif
 	if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0)
 		retval |= IFLIB_RXEOF_MORE;
 	return (retval);
 err:
 	STATE_LOCK(ctx);
 	ctx->ifc_flags |= IFC_DO_RESET;
 	iflib_admin_intr_deferred(ctx);
 	STATE_UNLOCK(ctx);
 	return (0);
 }
 
 #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1)
 static inline qidx_t
 txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use)
 {
 	qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
 	qidx_t minthresh = txq->ift_size / 8;
 	if (in_use > 4*minthresh)
 		return (notify_count);
 	if (in_use > 2*minthresh)
 		return (notify_count >> 1);
 	if (in_use > minthresh)
 		return (notify_count >> 3);
 	return (0);
 }
 
 static inline qidx_t
 txq_max_rs_deferred(iflib_txq_t txq)
 {
 	qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
 	qidx_t minthresh = txq->ift_size / 8;
 	if (txq->ift_in_use > 4*minthresh)
 		return (notify_count);
 	if (txq->ift_in_use > 2*minthresh)
 		return (notify_count >> 1);
 	if (txq->ift_in_use > minthresh)
 		return (notify_count >> 2);
 	return (2);
 }
 
 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
 
 #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use))
 #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq)
 #define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
 
 /* forward compatibility for cxgb */
 #define FIRST_QSET(ctx) 0
 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
 #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
 
 /* XXX we should be setting this to something other than zero */
 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
 #define	MAX_TX_DESC(ctx) max((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \
     (ctx)->ifc_softc_ctx.isc_tx_nsegments)
 
 static inline bool
 iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use)
 {
 	qidx_t dbval, max;
 	bool rang;
 
 	rang = false;
 	max = TXQ_MAX_DB_DEFERRED(txq, in_use);
 	if (ring || txq->ift_db_pending >= max) {
 		dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx;
 		bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval);
 		txq->ift_db_pending = txq->ift_npending = 0;
 		rang = true;
 	}
 	return (rang);
 }
 
 #ifdef PKT_DEBUG
 static void
 print_pkt(if_pkt_info_t pi)
 {
 	printf("pi len:  %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n",
 	       pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx);
 	printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n",
 	       pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag);
 	printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n",
 	       pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto);
 }
 #endif
 
 #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO)
 #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO))
 #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
 #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO))
 
 static int
 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 {
 	if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
 	struct ether_vlan_header *eh;
 	struct mbuf *m;
 
 	m = *mp;
 	if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
 	    M_WRITABLE(m) == 0) {
 		if ((m = m_dup(m, M_NOWAIT)) == NULL) {
 			return (ENOMEM);
 		} else {
 			m_freem(*mp);
 			DBG_COUNTER_INC(tx_frees);
 			*mp = m;
 		}
 	}
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present,
 	 * helpful for QinQ too.
 	 */
 	if (__predict_false(m->m_len < sizeof(*eh))) {
 		txq->ift_pullups++;
 		if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL))
 			return (ENOMEM);
 	}
 	eh = mtod(m, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		pi->ipi_etype = ntohs(eh->evl_proto);
 		pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		pi->ipi_etype = ntohs(eh->evl_encap_proto);
 		pi->ipi_ehdrlen = ETHER_HDR_LEN;
 	}
 
 	switch (pi->ipi_etype) {
 #ifdef INET
 	case ETHERTYPE_IP:
 	{
 		struct mbuf *n;
 		struct ip *ip = NULL;
 		struct tcphdr *th = NULL;
 		int minthlen;
 
 		minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
 		if (__predict_false(m->m_len < minthlen)) {
 			/*
 			 * if this code bloat is causing too much of a hit
 			 * move it to a separate function and mark it noinline
 			 */
 			if (m->m_len == pi->ipi_ehdrlen) {
 				n = m->m_next;
 				MPASS(n);
 				if (n->m_len >= sizeof(*ip))  {
 					ip = (struct ip *)n->m_data;
 					if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 						th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 				} else {
 					txq->ift_pullups++;
 					if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
 						return (ENOMEM);
 					ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 				}
 			} else {
 				txq->ift_pullups++;
 				if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
 					return (ENOMEM);
 				ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 				if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 					th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 			}
 		} else {
 			ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 			if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 				th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 		}
 		pi->ipi_ip_hlen = ip->ip_hl << 2;
 		pi->ipi_ipproto = ip->ip_p;
 		pi->ipi_flags |= IPI_TX_IPV4;
 
 		/* TCP checksum offload may require TCP header length */
 		if (IS_TX_OFFLOAD4(pi)) {
 			if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) {
 				if (__predict_false(th == NULL)) {
 					txq->ift_pullups++;
 					if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
 						return (ENOMEM);
 					th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
 				}
 				pi->ipi_tcp_hflags = th->th_flags;
 				pi->ipi_tcp_hlen = th->th_off << 2;
 				pi->ipi_tcp_seq = th->th_seq;
 			}
 			if (IS_TSO4(pi)) {
 				if (__predict_false(ip->ip_p != IPPROTO_TCP))
 					return (ENXIO);
 				/*
 				 * TSO always requires hardware checksum offload.
 				 */
 				pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP);
 				th->th_sum = in_pseudo(ip->ip_src.s_addr,
 						       ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 				pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
 				if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
 					ip->ip_sum = 0;
 					ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
 				}
 			}
 		}
 		if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP))
                        ip->ip_sum = 0;
 
 		break;
 	}
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 	{
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
 		struct tcphdr *th;
 		pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
 
 		if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) {
 			txq->ift_pullups++;
 			if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL))
 				return (ENOMEM);
 		}
 		th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen);
 
 		/* XXX-BZ this will go badly in case of ext hdrs. */
 		pi->ipi_ipproto = ip6->ip6_nxt;
 		pi->ipi_flags |= IPI_TX_IPV6;
 
 		/* TCP checksum offload may require TCP header length */
 		if (IS_TX_OFFLOAD6(pi)) {
 			if (pi->ipi_ipproto == IPPROTO_TCP) {
 				if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
 					txq->ift_pullups++;
 					if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
 						return (ENOMEM);
 				}
 				pi->ipi_tcp_hflags = th->th_flags;
 				pi->ipi_tcp_hlen = th->th_off << 2;
 				pi->ipi_tcp_seq = th->th_seq;
 			}
 			if (IS_TSO6(pi)) {
 				if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
 					return (ENXIO);
 				/*
 				 * TSO always requires hardware checksum offload.
 				 */
 				pi->ipi_csum_flags |= CSUM_IP6_TCP;
 				th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 				pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
 			}
 		}
 		break;
 	}
 #endif
 	default:
 		pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
 		pi->ipi_ip_hlen = 0;
 		break;
 	}
 	*mp = m;
 
 	return (0);
 }
 
 /*
  * If dodgy hardware rejects the scatter gather chain we've handed it
  * we'll need to remove the mbuf chain from ifsg_m[] before we can add the
  * m_defrag'd mbufs
  */
 static __noinline struct mbuf *
 iflib_remove_mbuf(iflib_txq_t txq)
 {
 	int ntxd, pidx;
 	struct mbuf *m, **ifsd_m;
 
 	ifsd_m = txq->ift_sds.ifsd_m;
 	ntxd = txq->ift_size;
 	pidx = txq->ift_pidx & (ntxd - 1);
 	ifsd_m = txq->ift_sds.ifsd_m;
 	m = ifsd_m[pidx];
 	ifsd_m[pidx] = NULL;
 	bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]);
 	if (txq->ift_sds.ifsd_tso_map != NULL)
 		bus_dmamap_unload(txq->ift_tso_buf_tag,
 		    txq->ift_sds.ifsd_tso_map[pidx]);
 #if MEMORY_LOGGING
 	txq->ift_dequeued++;
 #endif
 	return (m);
 }
 
 static inline caddr_t
 calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid)
 {
 	qidx_t size;
 	int ntxd;
 	caddr_t start, end, cur, next;
 
 	ntxd = txq->ift_size;
 	size = txq->ift_txd_size[qid];
 	start = txq->ift_ifdi[qid].idi_vaddr;
 
 	if (__predict_false(size == 0))
 		return (start);
 	cur = start + size*cidx;
 	end = start + size*ntxd;
 	next = CACHE_PTR_NEXT(cur);
 	return (next < end ? next : start);
 }
 
 /*
  * Pad an mbuf to ensure a minimum ethernet frame size.
  * min_frame_size is the frame size (less CRC) to pad the mbuf to
  */
 static __noinline int
 iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size)
 {
 	/*
 	 * 18 is enough bytes to pad an ARP packet to 46 bytes, and
 	 * and ARP message is the smallest common payload I can think of
 	 */
 	static char pad[18];	/* just zeros */
 	int n;
 	struct mbuf *new_head;
 
 	if (!M_WRITABLE(*m_head)) {
 		new_head = m_dup(*m_head, M_NOWAIT);
 		if (new_head == NULL) {
 			m_freem(*m_head);
 			device_printf(dev, "cannot pad short frame, m_dup() failed");
 			DBG_COUNTER_INC(encap_pad_mbuf_fail);
 			DBG_COUNTER_INC(tx_frees);
 			return ENOMEM;
 		}
 		m_freem(*m_head);
 		*m_head = new_head;
 	}
 
 	for (n = min_frame_size - (*m_head)->m_pkthdr.len;
 	     n > 0; n -= sizeof(pad))
 		if (!m_append(*m_head, min(n, sizeof(pad)), pad))
 			break;
 
 	if (n > 0) {
 		m_freem(*m_head);
 		device_printf(dev, "cannot pad short frame\n");
 		DBG_COUNTER_INC(encap_pad_mbuf_fail);
 		DBG_COUNTER_INC(tx_frees);
 		return (ENOBUFS);
 	}
 
 	return 0;
 }
 
 static int
 iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
 {
 	if_ctx_t		ctx;
 	if_shared_ctx_t		sctx;
 	if_softc_ctx_t		scctx;
 	bus_dma_tag_t		buf_tag;
 	bus_dma_segment_t	*segs;
 	struct mbuf		*m_head, **ifsd_m;
 	void			*next_txd;
 	bus_dmamap_t		map;
 	struct if_pkt_info	pi;
 	int remap = 0;
 	int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd;
 
 	ctx = txq->ift_ctx;
 	sctx = ctx->ifc_sctx;
 	scctx = &ctx->ifc_softc_ctx;
 	segs = txq->ift_segs;
 	ntxd = txq->ift_size;
 	m_head = *m_headp;
 	map = NULL;
 
 	/*
 	 * If we're doing TSO the next descriptor to clean may be quite far ahead
 	 */
 	cidx = txq->ift_cidx;
 	pidx = txq->ift_pidx;
 	if (ctx->ifc_flags & IFC_PREFETCH) {
 		next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
 		if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) {
 			next_txd = calc_next_txd(txq, cidx, 0);
 			prefetch(next_txd);
 		}
 
 		/* prefetch the next cache line of mbuf pointers and flags */
 		prefetch(&txq->ift_sds.ifsd_m[next]);
 		prefetch(&txq->ift_sds.ifsd_map[next]);
 		next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
 	}
 	map = txq->ift_sds.ifsd_map[pidx];
 	ifsd_m = txq->ift_sds.ifsd_m;
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		buf_tag = txq->ift_tso_buf_tag;
 		max_segs = scctx->isc_tx_tso_segments_max;
 		map = txq->ift_sds.ifsd_tso_map[pidx];
 		MPASS(buf_tag != NULL);
 		MPASS(max_segs > 0);
 	} else {
 		buf_tag = txq->ift_buf_tag;
 		max_segs = scctx->isc_tx_nsegments;
 		map = txq->ift_sds.ifsd_map[pidx];
 	}
 	if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) &&
 	    __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) {
 		err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size);
 		if (err) {
 			DBG_COUNTER_INC(encap_txd_encap_fail);
 			return err;
 		}
 	}
 	m_head = *m_headp;
 
 	pkt_info_zero(&pi);
 	pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
 	pi.ipi_pidx = pidx;
 	pi.ipi_qsidx = txq->ift_id;
 	pi.ipi_len = m_head->m_pkthdr.len;
 	pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
 	pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0;
 
 	/* deliberate bitwise OR to make one condition */
 	if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
 		if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) {
 			DBG_COUNTER_INC(encap_txd_encap_fail);
 			return (err);
 		}
 		m_head = *m_headp;
 	}
 
 retry:
 	err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs,
 	    BUS_DMA_NOWAIT);
 defrag:
 	if (__predict_false(err)) {
 		switch (err) {
 		case EFBIG:
 			/* try collapse once and defrag once */
 			if (remap == 0) {
 				m_head = m_collapse(*m_headp, M_NOWAIT, max_segs);
 				/* try defrag if collapsing fails */
 				if (m_head == NULL)
 					remap++;
 			}
 			if (remap == 1) {
 				txq->ift_mbuf_defrag++;
 				m_head = m_defrag(*m_headp, M_NOWAIT);
 			}
 			/*
 			 * remap should never be >1 unless bus_dmamap_load_mbuf_sg
 			 * failed to map an mbuf that was run through m_defrag
 			 */
 			MPASS(remap <= 1);
 			if (__predict_false(m_head == NULL || remap > 1))
 				goto defrag_failed;
 			remap++;
 			*m_headp = m_head;
 			goto retry;
 			break;
 		case ENOMEM:
 			txq->ift_no_tx_dma_setup++;
 			break;
 		default:
 			txq->ift_no_tx_dma_setup++;
 			m_freem(*m_headp);
 			DBG_COUNTER_INC(tx_frees);
 			*m_headp = NULL;
 			break;
 		}
 		txq->ift_map_failed++;
 		DBG_COUNTER_INC(encap_load_mbuf_fail);
 		DBG_COUNTER_INC(encap_txd_encap_fail);
 		return (err);
 	}
 	ifsd_m[pidx] = m_head;
 	/*
 	 * XXX assumes a 1 to 1 relationship between segments and
 	 *        descriptors - this does not hold true on all drivers, e.g.
 	 *        cxgb
 	 */
 	if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) {
 		txq->ift_no_desc_avail++;
 		bus_dmamap_unload(buf_tag, map);
 		DBG_COUNTER_INC(encap_txq_avail_fail);
 		DBG_COUNTER_INC(encap_txd_encap_fail);
 		if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
 			GROUPTASK_ENQUEUE(&txq->ift_task);
 		return (ENOBUFS);
 	}
 	/*
 	 * On Intel cards we can greatly reduce the number of TX interrupts
 	 * we see by only setting report status on every Nth descriptor.
 	 * However, this also means that the driver will need to keep track
 	 * of the descriptors that RS was set on to check them for the DD bit.
 	 */
 	txq->ift_rs_pending += nsegs + 1;
 	if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) ||
 	     iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) {
 		pi.ipi_flags |= IPI_TX_INTR;
 		txq->ift_rs_pending = 0;
 	}
 
 	pi.ipi_segs = segs;
 	pi.ipi_nsegs = nsegs;
 
 	MPASS(pidx >= 0 && pidx < txq->ift_size);
 #ifdef PKT_DEBUG
 	print_pkt(&pi);
 #endif
 	if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) {
 		bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE);
 		DBG_COUNTER_INC(tx_encap);
 		MPASS(pi.ipi_new_pidx < txq->ift_size);
 
 		ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
 		if (pi.ipi_new_pidx < pi.ipi_pidx) {
 			ndesc += txq->ift_size;
 			txq->ift_gen = 1;
 		}
 		/*
 		 * drivers can need as many as 
 		 * two sentinels
 		 */
 		MPASS(ndesc <= pi.ipi_nsegs + 2);
 		MPASS(pi.ipi_new_pidx != pidx);
 		MPASS(ndesc > 0);
 		txq->ift_in_use += ndesc;
 
 		/*
 		 * We update the last software descriptor again here because there may
 		 * be a sentinel and/or there may be more mbufs than segments
 		 */
 		txq->ift_pidx = pi.ipi_new_pidx;
 		txq->ift_npending += pi.ipi_ndescs;
 	} else {
 		*m_headp = m_head = iflib_remove_mbuf(txq);
 		if (err == EFBIG) {
 			txq->ift_txd_encap_efbig++;
 			if (remap < 2) {
 				remap = 1;
 				goto defrag;
 			}
 		}
 		goto defrag_failed;
 	}
 	/*
 	 * err can't possibly be non-zero here, so we don't neet to test it
 	 * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail).
 	 */
 	return (err);
 
 defrag_failed:
 	txq->ift_mbuf_defrag_failed++;
 	txq->ift_map_failed++;
 	m_freem(*m_headp);
 	DBG_COUNTER_INC(tx_frees);
 	*m_headp = NULL;
 	DBG_COUNTER_INC(encap_txd_encap_fail);
 	return (ENOMEM);
 }
 
 static void
 iflib_tx_desc_free(iflib_txq_t txq, int n)
 {
 	uint32_t qsize, cidx, mask, gen;
 	struct mbuf *m, **ifsd_m;
 	bool do_prefetch;
 
 	cidx = txq->ift_cidx;
 	gen = txq->ift_gen;
 	qsize = txq->ift_size;
 	mask = qsize-1;
 	ifsd_m = txq->ift_sds.ifsd_m;
 	do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH);
 
 	while (n-- > 0) {
 		if (do_prefetch) {
 			prefetch(ifsd_m[(cidx + 3) & mask]);
 			prefetch(ifsd_m[(cidx + 4) & mask]);
 		}
 		if ((m = ifsd_m[cidx]) != NULL) {
 			prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]);
 			if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 				bus_dmamap_sync(txq->ift_tso_buf_tag,
 				    txq->ift_sds.ifsd_tso_map[cidx],
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txq->ift_tso_buf_tag,
 				    txq->ift_sds.ifsd_tso_map[cidx]);
 			} else {
 				bus_dmamap_sync(txq->ift_buf_tag,
 				    txq->ift_sds.ifsd_map[cidx],
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txq->ift_buf_tag,
 				    txq->ift_sds.ifsd_map[cidx]);
 			}
 			/* XXX we don't support any drivers that batch packets yet */
 			MPASS(m->m_nextpkt == NULL);
 			m_freem(m);
 			ifsd_m[cidx] = NULL;
 #if MEMORY_LOGGING
 			txq->ift_dequeued++;
 #endif
 			DBG_COUNTER_INC(tx_frees);
 		}
 		if (__predict_false(++cidx == qsize)) {
 			cidx = 0;
 			gen = 0;
 		}
 	}
 	txq->ift_cidx = cidx;
 	txq->ift_gen = gen;
 }
 
 static __inline int
 iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh)
 {
 	int reclaim;
 	if_ctx_t ctx = txq->ift_ctx;
 
 	KASSERT(thresh >= 0, ("invalid threshold to reclaim"));
 	MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size);
 
 	/*
 	 * Need a rate-limiting check so that this isn't called every time
 	 */
 	iflib_tx_credits_update(ctx, txq);
 	reclaim = DESC_RECLAIMABLE(txq);
 
 	if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) {
 #ifdef INVARIANTS
 		if (iflib_verbose_debug) {
 			printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__,
 			       txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments,
 			       reclaim, thresh);
 
 		}
 #endif
 		return (0);
 	}
 	iflib_tx_desc_free(txq, reclaim);
 	txq->ift_cleaned += reclaim;
 	txq->ift_in_use -= reclaim;
 
 	return (reclaim);
 }
 
 static struct mbuf **
 _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining)
 {
 	int next, size;
 	struct mbuf **items;
 
 	size = r->size;
 	next = (cidx + CACHE_PTR_INCREMENT) & (size-1);
 	items = __DEVOLATILE(struct mbuf **, &r->items[0]);
 
 	prefetch(items[(cidx + offset) & (size-1)]);
 	if (remaining > 1) {
 		prefetch2cachelines(&items[next]);
 		prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]);
 		prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]);
 		prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]);
 	}
 	return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)]));
 }
 
 static void
 iflib_txq_check_drain(iflib_txq_t txq, int budget)
 {
 
 	ifmp_ring_check_drainage(txq->ift_br, budget);
 }
 
 static uint32_t
 iflib_txq_can_drain(struct ifmp_ring *r)
 {
 	iflib_txq_t txq = r->cookie;
 	if_ctx_t ctx = txq->ift_ctx;
 
 	if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2)
 		return (1);
 	bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 	    BUS_DMASYNC_POSTREAD);
 	return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id,
 	    false));
 }
 
 static uint32_t
 iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 {
 	iflib_txq_t txq = r->cookie;
 	if_ctx_t ctx = txq->ift_ctx;
 	if_t ifp = ctx->ifc_ifp;
 	struct mbuf *m, **mp;
 	int avail, bytes_sent, consumed, count, err, i, in_use_prev;
 	int mcast_sent, pkt_sent, reclaimed, txq_avail;
 	bool do_prefetch, rang, ring;
 
 	if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) ||
 			    !LINK_ACTIVE(ctx))) {
 		DBG_COUNTER_INC(txq_drain_notready);
 		return (0);
 	}
 	reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
 	rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use);
 	avail = IDXDIFF(pidx, cidx, r->size);
 	if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
 		DBG_COUNTER_INC(txq_drain_flushing);
 		for (i = 0; i < avail; i++) {
 			if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq))
 				m_free(r->items[(cidx + i) & (r->size-1)]);
 			r->items[(cidx + i) & (r->size-1)] = NULL;
 		}
 		return (avail);
 	}
 
 	if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
 		txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		CALLOUT_UNLOCK(txq);
 		DBG_COUNTER_INC(txq_drain_oactive);
 		return (0);
 	}
 	if (reclaimed)
 		txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 	consumed = mcast_sent = bytes_sent = pkt_sent = 0;
 	count = MIN(avail, TX_BATCH_SIZE);
 #ifdef INVARIANTS
 	if (iflib_verbose_debug)
 		printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__,
 		       avail, ctx->ifc_flags, TXQ_AVAIL(txq));
 #endif
 	do_prefetch = (ctx->ifc_flags & IFC_PREFETCH);
 	txq_avail = TXQ_AVAIL(txq);
 	err = 0;
 	for (i = 0; i < count && txq_avail > MAX_TX_DESC(ctx) + 2; i++) {
 		int rem = do_prefetch ? count - i : 0;
 
 		mp = _ring_peek_one(r, cidx, i, rem);
 		MPASS(mp != NULL && *mp != NULL);
 		if (__predict_false(*mp == (struct mbuf *)txq)) {
 			consumed++;
 			continue;
 		}
 		in_use_prev = txq->ift_in_use;
 		err = iflib_encap(txq, mp);
 		if (__predict_false(err)) {
 			/* no room - bail out */
 			if (err == ENOBUFS)
 				break;
 			consumed++;
 			/* we can't send this packet - skip it */
 			continue;
 		}
 		consumed++;
 		pkt_sent++;
 		m = *mp;
 		DBG_COUNTER_INC(tx_sent);
 		bytes_sent += m->m_pkthdr.len;
 		mcast_sent += !!(m->m_flags & M_MCAST);
 		txq_avail = TXQ_AVAIL(txq);
 
 		txq->ift_db_pending += (txq->ift_in_use - in_use_prev);
 		ETHER_BPF_MTAP(ifp, m);
 		if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING)))
 			break;
 		rang = iflib_txd_db_check(ctx, txq, false, in_use_prev);
 	}
 
 	/* deliberate use of bitwise or to avoid gratuitous short-circuit */
 	ring = rang ? false  : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx));
 	iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
 	if (mcast_sent)
 		if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
 #ifdef INVARIANTS
 	if (iflib_verbose_debug)
 		printf("consumed=%d\n", consumed);
 #endif
 	return (consumed);
 }
 
 static uint32_t
 iflib_txq_drain_always(struct ifmp_ring *r)
 {
 	return (1);
 }
 
 static uint32_t
 iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 {
 	int i, avail;
 	struct mbuf **mp;
 	iflib_txq_t txq;
 
 	txq = r->cookie;
 
 	txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 	CALLOUT_LOCK(txq);
 	callout_stop(&txq->ift_timer);
 	CALLOUT_UNLOCK(txq);
 
 	avail = IDXDIFF(pidx, cidx, r->size);
 	for (i = 0; i < avail; i++) {
 		mp = _ring_peek_one(r, cidx, i, avail - i);
 		if (__predict_false(*mp == (struct mbuf *)txq))
 			continue;
 		m_freem(*mp);
 		DBG_COUNTER_INC(tx_frees);
 	}
 	MPASS(ifmp_ring_is_stalled(r) == 0);
 	return (avail);
 }
 
 static void
 iflib_ifmp_purge(iflib_txq_t txq)
 {
 	struct ifmp_ring *r;
 
 	r = txq->ift_br;
 	r->drain = iflib_txq_drain_free;
 	r->can_drain = iflib_txq_drain_always;
 
 	ifmp_ring_check_drainage(r, r->size);
 
 	r->drain = iflib_txq_drain;
 	r->can_drain = iflib_txq_can_drain;
 }
 
 static void
 _task_fn_tx(void *context)
 {
 	iflib_txq_t txq = context;
 	if_ctx_t ctx = txq->ift_ctx;
 #if defined(ALTQ) || defined(DEV_NETMAP)
 	if_t ifp = ctx->ifc_ifp;
 #endif
 	int abdicate = ctx->ifc_sysctl_tx_abdicate;
 
 #ifdef IFLIB_DIAGNOSTICS
 	txq->ift_cpu_exec_count[curcpu]++;
 #endif
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
 #ifdef DEV_NETMAP
 	if (if_getcapenable(ifp) & IFCAP_NETMAP) {
 		bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 		    BUS_DMASYNC_POSTREAD);
 		if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
 			netmap_tx_irq(ifp, txq->ift_id);
 		if (ctx->ifc_flags & IFC_LEGACY)
 			IFDI_INTR_ENABLE(ctx);
 		else
 			IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
 		return;
 	}
 #endif
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		iflib_altq_if_start(ifp);
 #endif
 	if (txq->ift_db_pending)
 		ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate);
 	else if (!abdicate)
 		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 	/*
 	 * When abdicating, we always need to check drainage, not just when we don't enqueue
 	 */
 	if (abdicate)
 		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 	if (ctx->ifc_flags & IFC_LEGACY)
 		IFDI_INTR_ENABLE(ctx);
 	else
 		IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
 }
 
 static void
 _task_fn_rx(void *context)
 {
 	iflib_rxq_t rxq = context;
 	if_ctx_t ctx = rxq->ifr_ctx;
 	uint8_t more;
 	uint16_t budget;
 
 #ifdef IFLIB_DIAGNOSTICS
 	rxq->ifr_cpu_exec_count[curcpu]++;
 #endif
 	DBG_COUNTER_INC(task_fn_rxs);
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 		return;
 #ifdef DEV_NETMAP
 	if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) {
 		u_int work = 0;
 		if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) {
 			more = 0;
 			goto skip_rxeof;
 		}
 	}
 #endif
 	budget = ctx->ifc_sysctl_rx_budget;
 	if (budget == 0)
 		budget = 16;	/* XXX */
 	more = iflib_rxeof(rxq, budget);
 #ifdef DEV_NETMAP
 skip_rxeof:
 #endif
 	if ((more & IFLIB_RXEOF_MORE) == 0) {
 		if (ctx->ifc_flags & IFC_LEGACY)
 			IFDI_INTR_ENABLE(ctx);
 		else
 			IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
 		DBG_COUNTER_INC(rx_intr_enables);
 	}
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 		return;
 
 	if (more & IFLIB_RXEOF_MORE)
 		GROUPTASK_ENQUEUE(&rxq->ifr_task);
 	else if (more & IFLIB_RXEOF_EMPTY)
 		callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq);
 }
 
 static void
 _task_fn_admin(void *context)
 {
 	if_ctx_t ctx = context;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 	iflib_txq_t txq;
 	int i;
 	bool oactive, running, do_reset, do_watchdog, in_detach;
 	uint32_t reset_on = hz / 2;
 
 	STATE_LOCK(ctx);
 	running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING);
 	oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE);
 	do_reset = (ctx->ifc_flags & IFC_DO_RESET);
 	do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG);
 	in_detach = (ctx->ifc_flags & IFC_IN_DETACH);
 	ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG);
 	STATE_UNLOCK(ctx);
 
 	if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN))
 		return;
 	if (in_detach)
 		return;
 
 	CTX_LOCK(ctx);
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		CALLOUT_UNLOCK(txq);
 	}
 	if (do_watchdog) {
 		ctx->ifc_watchdog_events++;
 		IFDI_WATCHDOG_RESET(ctx);
 	}
 	IFDI_UPDATE_ADMIN_STATUS(ctx);
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
 #ifdef DEV_NETMAP
 		reset_on = hz / 2;
 		if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP)
 			iflib_netmap_timer_adjust(ctx, txq, &reset_on);
 #endif
 		callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu);
 	}
 	IFDI_LINK_INTR_ENABLE(ctx);
 	if (do_reset)
 		iflib_if_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 
 	if (LINK_ACTIVE(ctx) == 0)
 		return;
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 }
 
 
 static void
 _task_fn_iov(void *context)
 {
 	if_ctx_t ctx = context;
 
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) &&
 	    !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN))
 		return;
 
 	CTX_LOCK(ctx);
 	IFDI_VFLR_HANDLE(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static int
 iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
 {
 	int err;
 	if_int_delay_info_t info;
 	if_ctx_t ctx;
 
 	info = (if_int_delay_info_t)arg1;
 	ctx = info->iidi_ctx;
 	info->iidi_req = req;
 	info->iidi_oidp = oidp;
 	CTX_LOCK(ctx);
 	err = IFDI_SYSCTL_INT_DELAY(ctx, info);
 	CTX_UNLOCK(ctx);
 	return (err);
 }
 
 /*********************************************************************
  *
  *  IFNET FUNCTIONS
  *
  **********************************************************************/
 
 static void
 iflib_if_init_locked(if_ctx_t ctx)
 {
 	iflib_stop(ctx);
 	iflib_init_locked(ctx);
 }
 
 
 static void
 iflib_if_init(void *arg)
 {
 	if_ctx_t ctx = arg;
 
 	CTX_LOCK(ctx);
 	iflib_if_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static int
 iflib_if_transmit(if_t ifp, struct mbuf *m)
 {
 	if_ctx_t	ctx = if_getsoftc(ifp);
 
 	iflib_txq_t txq;
 	int err, qidx;
 	int abdicate = ctx->ifc_sysctl_tx_abdicate;
 
 	if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
 		DBG_COUNTER_INC(tx_frees);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	MPASS(m->m_nextpkt == NULL);
 	/* ALTQ-enabled interfaces always use queue 0. */
 	qidx = 0;
 	if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd))
 		qidx = QIDX(ctx, m);
 	/*
 	 * XXX calculate buf_ring based on flowid (divvy up bits?)
 	 */
 	txq = &ctx->ifc_txqs[qidx];
 
 #ifdef DRIVER_BACKPRESSURE
 	if (txq->ift_closed) {
 		while (m != NULL) {
 			next = m->m_nextpkt;
 			m->m_nextpkt = NULL;
 			m_freem(m);
 			DBG_COUNTER_INC(tx_frees);
 			m = next;
 		}
 		return (ENOBUFS);
 	}
 #endif
 #ifdef notyet
 	qidx = count = 0;
 	mp = marr;
 	next = m;
 	do {
 		count++;
 		next = next->m_nextpkt;
 	} while (next != NULL);
 
 	if (count > nitems(marr))
 		if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) {
 			/* XXX check nextpkt */
 			m_freem(m);
 			/* XXX simplify for now */
 			DBG_COUNTER_INC(tx_frees);
 			return (ENOBUFS);
 		}
 	for (next = m, i = 0; next != NULL; i++) {
 		mp[i] = next;
 		next = next->m_nextpkt;
 		mp[i]->m_nextpkt = NULL;
 	}
 #endif
 	DBG_COUNTER_INC(tx_seen);
 	err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate);
 
 	if (abdicate)
 		GROUPTASK_ENQUEUE(&txq->ift_task);
  	if (err) {
 		if (!abdicate)
 			GROUPTASK_ENQUEUE(&txq->ift_task);
 		/* support forthcoming later */
 #ifdef DRIVER_BACKPRESSURE
 		txq->ift_closed = TRUE;
 #endif
 		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 		m_freem(m);
 		DBG_COUNTER_INC(tx_frees);
 	}
 
 	return (err);
 }
 
 #ifdef ALTQ
 /*
  * The overall approach to integrating iflib with ALTQ is to continue to use
  * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware
  * ring.  Technically, when using ALTQ, queueing to an intermediate mp_ring
  * is redundant/unnecessary, but doing so minimizes the amount of
  * ALTQ-specific code required in iflib.  It is assumed that the overhead of
  * redundantly queueing to an intermediate mp_ring is swamped by the
  * performance limitations inherent in using ALTQ.
  *
  * When ALTQ support is compiled in, all iflib drivers will use a transmit
  * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the
  * given interface.  If ALTQ is enabled for an interface, then all
  * transmitted packets for that interface will be submitted to the ALTQ
  * subsystem via IFQ_ENQUEUE().  We don't use the legacy if_transmit()
  * implementation because it uses IFQ_HANDOFF(), which will duplicatively
  * update stats that the iflib machinery handles, and which is sensitve to
  * the disused IFF_DRV_OACTIVE flag.  Additionally, iflib_altq_if_start()
  * will be installed as the start routine for use by ALTQ facilities that
  * need to trigger queue drains on a scheduled basis.
  *
  */
 static void
 iflib_altq_if_start(if_t ifp)
 {
 	struct ifaltq *ifq = &ifp->if_snd;
 	struct mbuf *m;
 	
 	IFQ_LOCK(ifq);
 	IFQ_DEQUEUE_NOLOCK(ifq, m);
 	while (m != NULL) {
 		iflib_if_transmit(ifp, m);
 		IFQ_DEQUEUE_NOLOCK(ifq, m);
 	}
 	IFQ_UNLOCK(ifq);
 }
 
 static int
 iflib_altq_if_transmit(if_t ifp, struct mbuf *m)
 {
 	int err;
 
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		IFQ_ENQUEUE(&ifp->if_snd, m, err);
 		if (err == 0)
 			iflib_altq_if_start(ifp);
 	} else
 		err = iflib_if_transmit(ifp, m);
 
 	return (err);
 }
 #endif /* ALTQ */
 
 static void
 iflib_if_qflush(if_t ifp)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 	iflib_txq_t txq = ctx->ifc_txqs;
 	int i;
 
 	STATE_LOCK(ctx);
 	ctx->ifc_flags |= IFC_QFLUSH;
 	STATE_UNLOCK(ctx);
 	for (i = 0; i < NTXQSETS(ctx); i++, txq++)
 		while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br)))
 			iflib_txq_check_drain(txq, 0);
 	STATE_LOCK(ctx);
 	ctx->ifc_flags &= ~IFC_QFLUSH;
 	STATE_UNLOCK(ctx);
 
 	/*
 	 * When ALTQ is enabled, this will also take care of purging the
 	 * ALTQ queue(s).
 	 */
 	if_qflush(ifp);
 }
 
 
 #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
 		     IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
 		     IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \
 		     IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_NOMAP)
 
 static int
 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 	struct ifreq	*ifr = (struct ifreq *)data;
 #if defined(INET) || defined(INET6)
 	struct ifaddr	*ifa = (struct ifaddr *)data;
 #endif
 	bool		avoid_reset = false;
 	int		err = 0, reinit = 0, bits;
 
 	switch (command) {
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			avoid_reset = true;
 #endif
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6)
 			avoid_reset = true;
 #endif
 		/*
 		** Calling init results in link renegotiation,
 		** so we avoid doing it when possible.
 		*/
 		if (avoid_reset) {
 			if_setflagbits(ifp, IFF_UP,0);
 			if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
 				reinit = 1;
 #ifdef INET
 			if (!(if_getflags(ifp) & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 #endif
 		} else
 			err = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 		CTX_LOCK(ctx);
 		if (ifr->ifr_mtu == if_getmtu(ifp)) {
 			CTX_UNLOCK(ctx);
 			break;
 		}
 		bits = if_getdrvflags(ifp);
 		/* stop the driver and free any clusters before proceeding */
 		iflib_stop(ctx);
 
 		if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) {
 			STATE_LOCK(ctx);
 			if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size)
 				ctx->ifc_flags |= IFC_MULTISEG;
 			else
 				ctx->ifc_flags &= ~IFC_MULTISEG;
 			STATE_UNLOCK(ctx);
 			err = if_setmtu(ifp, ifr->ifr_mtu);
 		}
 		iflib_init_locked(ctx);
 		STATE_LOCK(ctx);
 		if_setdrvflags(ifp, bits);
 		STATE_UNLOCK(ctx);
 		CTX_UNLOCK(ctx);
 		break;
 	case SIOCSIFFLAGS:
 		CTX_LOCK(ctx);
 		if (if_getflags(ifp) & IFF_UP) {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 				if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
 				}
 			} else
 				reinit = 1;
 		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			iflib_stop(ctx);
 		}
 		ctx->ifc_if_flags = if_getflags(ifp);
 		CTX_UNLOCK(ctx);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			CTX_LOCK(ctx);
 			IFDI_INTR_DISABLE(ctx);
 			IFDI_MULTI_SET(ctx);
 			IFDI_INTR_ENABLE(ctx);
 			CTX_UNLOCK(ctx);
 		}
 		break;
 	case SIOCSIFMEDIA:
 		CTX_LOCK(ctx);
 		IFDI_MEDIA_SET(ctx);
 		CTX_UNLOCK(ctx);
 		/* FALLTHROUGH */
 	case SIOCGIFMEDIA:
 	case SIOCGIFXMEDIA:
 		err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command);
 		break;
 	case SIOCGI2C:
 	{
 		struct ifi2creq i2c;
 
 		err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
 		if (err != 0)
 			break;
 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
 			err = EINVAL;
 			break;
 		}
 		if (i2c.len > sizeof(i2c.data)) {
 			err = EINVAL;
 			break;
 		}
 
 		if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0)
 			err = copyout(&i2c, ifr_data_get_ptr(ifr),
 			    sizeof(i2c));
 		break;
 	}
 	case SIOCSIFCAP:
 	{
 		int mask, setmask, oldmask;
 
 		oldmask = if_getcapenable(ifp);
 		mask = ifr->ifr_reqcap ^ oldmask;
 		mask &= ctx->ifc_softc_ctx.isc_capabilities | IFCAP_NOMAP;
 		setmask = 0;
 #ifdef TCP_OFFLOAD
 		setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6);
 #endif
 		setmask |= (mask & IFCAP_FLAGS);
 		setmask |= (mask & IFCAP_WOL);
 
 		/*
 		 * If any RX csum has changed, change all the ones that
 		 * are supported by the driver.
 		 */
 		if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 			setmask |= ctx->ifc_softc_ctx.isc_capabilities &
 			    (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
 		}
 
 		/*
 		 * want to ensure that traffic has stopped before we change any of the flags
 		 */
 		if (setmask) {
 			CTX_LOCK(ctx);
 			bits = if_getdrvflags(ifp);
 			if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL)
 				iflib_stop(ctx);
 			STATE_LOCK(ctx);
 			if_togglecapenable(ifp, setmask);
 			STATE_UNLOCK(ctx);
 			if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL)
 				iflib_init_locked(ctx);
 			STATE_LOCK(ctx);
 			if_setdrvflags(ifp, bits);
 			STATE_UNLOCK(ctx);
 			CTX_UNLOCK(ctx);
 		}
 		if_vlancap(ifp);
 		break;
 	}
 	case SIOCGPRIVATE_0:
 	case SIOCSDRVSPEC:
 	case SIOCGDRVSPEC:
 		CTX_LOCK(ctx);
 		err = IFDI_PRIV_IOCTL(ctx, command, data);
 		CTX_UNLOCK(ctx);
 		break;
 	default:
 		err = ether_ioctl(ifp, command, data);
 		break;
 	}
 	if (reinit)
 		iflib_if_init(ctx);
 	return (err);
 }
 
 static uint64_t
 iflib_if_get_counter(if_t ifp, ift_counter cnt)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	return (IFDI_GET_COUNTER(ctx, cnt));
 }
 
 /*********************************************************************
  *
  *  OTHER FUNCTIONS EXPORTED TO THE STACK
  *
  **********************************************************************/
 
 static void
 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	if ((void *)ctx != arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))
 		return;
 
 	if (iflib_in_detach(ctx))
 		return;
 
 	CTX_LOCK(ctx);
 	IFDI_VLAN_REGISTER(ctx, vtag);
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		iflib_if_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static void
 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	if ((void *)ctx != arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))
 		return;
 
 	CTX_LOCK(ctx);
 	IFDI_VLAN_UNREGISTER(ctx, vtag);
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		iflib_if_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static void
 iflib_led_func(void *arg, int onoff)
 {
 	if_ctx_t ctx = arg;
 
 	CTX_LOCK(ctx);
 	IFDI_LED_FUNC(ctx, onoff);
 	CTX_UNLOCK(ctx);
 }
 
 /*********************************************************************
  *
  *  BUS FUNCTION DEFINITIONS
  *
  **********************************************************************/
 
 int
 iflib_device_probe(device_t dev)
 {
 	const pci_vendor_info_t *ent;
 	if_shared_ctx_t sctx;
 	uint16_t pci_device_id, pci_rev_id, pci_subdevice_id, pci_subvendor_id;
 	uint16_t pci_vendor_id;
 
 	if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
 		return (ENOTSUP);
 
 	pci_vendor_id = pci_get_vendor(dev);
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 	pci_rev_id = pci_get_revid(dev);
 	if (sctx->isc_parse_devinfo != NULL)
 		sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id);
 
 	ent = sctx->isc_vendor_info;
 	while (ent->pvi_vendor_id != 0) {
 		if (pci_vendor_id != ent->pvi_vendor_id) {
 			ent++;
 			continue;
 		}
 		if ((pci_device_id == ent->pvi_device_id) &&
 		    ((pci_subvendor_id == ent->pvi_subvendor_id) ||
 		     (ent->pvi_subvendor_id == 0)) &&
 		    ((pci_subdevice_id == ent->pvi_subdevice_id) ||
 		     (ent->pvi_subdevice_id == 0)) &&
 		    ((pci_rev_id == ent->pvi_rev_id) ||
 		     (ent->pvi_rev_id == 0))) {
 
 			device_set_desc_copy(dev, ent->pvi_name);
 			/* this needs to be changed to zero if the bus probing code
 			 * ever stops re-probing on best match because the sctx
 			 * may have its values over written by register calls
 			 * in subsequent probes
 			 */
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 	return (ENXIO);
 }
 
 int
 iflib_device_probe_vendor(device_t dev)
 {
 	int probe;
 
 	probe = iflib_device_probe(dev);
 	if (probe == BUS_PROBE_DEFAULT)
 		return (BUS_PROBE_VENDOR);
 	else
 		return (probe);
 }
 
 static void
 iflib_reset_qvalues(if_ctx_t ctx)
 {
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	device_t dev = ctx->ifc_dev;
 	int i;
 
 	if (ctx->ifc_sysctl_ntxqs != 0)
 		scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs;
 	if (ctx->ifc_sysctl_nrxqs != 0)
 		scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs;
 
 	for (i = 0; i < sctx->isc_ntxqs; i++) {
 		if (ctx->ifc_sysctl_ntxds[i] != 0)
 			scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i];
 		else
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
 	}
 
 	for (i = 0; i < sctx->isc_nrxqs; i++) {
 		if (ctx->ifc_sysctl_nrxds[i] != 0)
 			scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i];
 		else
 			scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
 	}
 
 	for (i = 0; i < sctx->isc_nrxqs; i++) {
 		if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) {
 			device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n",
 				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]);
 			scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i];
 		}
 		if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) {
 			device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n",
 				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]);
 			scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i];
 		}
 		if (!powerof2(scctx->isc_nrxd[i])) {
 			device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n",
 				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]);
 			scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
 		}
 	}
 
 	for (i = 0; i < sctx->isc_ntxqs; i++) {
 		if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) {
 			device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n",
 				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]);
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i];
 		}
 		if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) {
 			device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n",
 				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]);
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
 		}
 		if (!powerof2(scctx->isc_ntxd[i])) {
 			device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n",
 				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]);
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
 		}
 	}
 }
 
 static void
 iflib_add_pfil(if_ctx_t ctx)
 {
 	struct pfil_head *pfil;
 	struct pfil_head_args pa;
 	iflib_rxq_t rxq;
 	int i;
 
 	pa.pa_version = PFIL_VERSION;
 	pa.pa_flags = PFIL_IN;
 	pa.pa_type = PFIL_TYPE_ETHERNET;
 	pa.pa_headname = ctx->ifc_ifp->if_xname;
 	pfil = pfil_head_register(&pa);
 
 	for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
 		rxq->pfil = pfil;
 	}
 }
 
 static void
 iflib_rem_pfil(if_ctx_t ctx)
 {
 	struct pfil_head *pfil;
 	iflib_rxq_t rxq;
 	int i;
 
 	rxq = ctx->ifc_rxqs;
 	pfil = rxq->pfil;
 	for (i = 0; i < NRXQSETS(ctx); i++, rxq++) {
 		rxq->pfil = NULL;
 	}
 	pfil_head_unregister(pfil);
 }
 
 static uint16_t
 get_ctx_core_offset(if_ctx_t ctx)
 {
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	struct cpu_offset *op;
 	uint16_t qc;
 	uint16_t ret = ctx->ifc_sysctl_core_offset;
 
 	if (ret != CORE_OFFSET_UNSPECIFIED)
 		return (ret);
 
 	if (ctx->ifc_sysctl_separate_txrx)
 		qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets;
 	else
 		qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets);
 
 	mtx_lock(&cpu_offset_mtx);
 	SLIST_FOREACH(op, &cpu_offsets, entries) {
 		if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
 			ret = op->offset;
 			op->offset += qc;
 			MPASS(op->refcount < UINT_MAX);
 			op->refcount++;
 			break;
 		}
 	}
 	if (ret == CORE_OFFSET_UNSPECIFIED) {
 		ret = 0;
 		op = malloc(sizeof(struct cpu_offset), M_IFLIB,
 		    M_NOWAIT | M_ZERO);
 		if (op == NULL) {
 			device_printf(ctx->ifc_dev,
 			    "allocation for cpu offset failed.\n");
 		} else {
 			op->offset = qc;
 			op->refcount = 1;
 			CPU_COPY(&ctx->ifc_cpus, &op->set);
 			SLIST_INSERT_HEAD(&cpu_offsets, op, entries);
 		}
 	}
 	mtx_unlock(&cpu_offset_mtx);
 
 	return (ret);
 }
 
 static void
 unref_ctx_core_offset(if_ctx_t ctx)
 {
 	struct cpu_offset *op, *top;
 
 	mtx_lock(&cpu_offset_mtx);
 	SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) {
 		if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
 			MPASS(op->refcount > 0);
 			op->refcount--;
 			if (op->refcount == 0) {
 				SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries);
 				free(op, M_IFLIB);
 			}
 			break;
 		}
 	}
 	mtx_unlock(&cpu_offset_mtx);
 }
 
 int
 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
 {
 	if_ctx_t ctx;
 	if_t ifp;
 	if_softc_ctx_t scctx;
 	kobjop_desc_t kobj_desc;
 	kobj_method_t *kobj_method;
 	int err, msix, rid;
 	uint16_t main_rxq, main_txq;
 
 	ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
 
 	if (sc == NULL) {
 		sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
 		device_set_softc(dev, ctx);
 		ctx->ifc_flags |= IFC_SC_ALLOCATED;
 	}
 
 	ctx->ifc_sctx = sctx;
 	ctx->ifc_dev = dev;
 	ctx->ifc_softc = sc;
 
 	if ((err = iflib_register(ctx)) != 0) {
 		device_printf(dev, "iflib_register failed %d\n", err);
 		goto fail_ctx_free;
 	}
 	iflib_add_device_sysctl_pre(ctx);
 
 	scctx = &ctx->ifc_softc_ctx;
 	ifp = ctx->ifc_ifp;
 
 	iflib_reset_qvalues(ctx);
 	CTX_LOCK(ctx);
 	if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
 		goto fail_unlock;
 	}
 	_iflib_pre_assert(scctx);
 	ctx->ifc_txrx = *scctx->isc_txrx;
 
 	if (sctx->isc_flags & IFLIB_DRIVER_MEDIA)
 		ctx->ifc_mediap = scctx->isc_media;
 
 #ifdef INVARIANTS
 	if (scctx->isc_capabilities & IFCAP_TXCSUM)
 		MPASS(scctx->isc_tx_csum_flags);
 #endif
 
 	if_setcapabilities(ifp,
 	    scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_NOMAP);
 	if_setcapenable(ifp,
 	    scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_NOMAP);
 
 	if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
 		scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
 	if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
 		scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
 
 	main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
 	main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0;
 
 	/* XXX change for per-queue sizes */
 	device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n",
 	    scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
 
 	if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
 	    MAX_SINGLE_PACKET_FRACTION)
 		scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
 		    MAX_SINGLE_PACKET_FRACTION);
 	if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
 	    MAX_SINGLE_PACKET_FRACTION)
 		scctx->isc_tx_tso_segments_max = max(1,
 		    scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
 
 	/* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */
 	if (if_getcapabilities(ifp) & IFCAP_TSO) {
 		/*
 		 * The stack can't handle a TSO size larger than IP_MAXPACKET,
 		 * but some MACs do.
 		 */
 		if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max,
 		    IP_MAXPACKET));
 		/*
 		 * Take maximum number of m_pullup(9)'s in iflib_parse_header()
 		 * into account.  In the worst case, each of these calls will
 		 * add another mbuf and, thus, the requirement for another DMA
 		 * segment.  So for best performance, it doesn't make sense to
 		 * advertize a maximum of TSO segments that typically will
 		 * require defragmentation in iflib_encap().
 		 */
 		if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3);
 		if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max);
 	}
 	if (scctx->isc_rss_table_size == 0)
 		scctx->isc_rss_table_size = 64;
 	scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
 
 	GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
 	/* XXX format name */
 	taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx,
 	    NULL, NULL, "admin");
 
 	/* Set up cpu set.  If it fails, use the set of all CPUs. */
 	if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) {
 		device_printf(dev, "Unable to fetch CPU list\n");
 		CPU_COPY(&all_cpus, &ctx->ifc_cpus);
 	}
 	MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0);
 
 	/*
 	** Now set up MSI or MSI-X, should return us the number of supported
 	** vectors (will be 1 for a legacy interrupt and MSI).
 	*/
 	if (sctx->isc_flags & IFLIB_SKIP_MSIX) {
 		msix = scctx->isc_vectors;
 	} else if (scctx->isc_msix_bar != 0)
 	       /*
 		* The simple fact that isc_msix_bar is not 0 does not mean we
 		* we have a good value there that is known to work.
 		*/
 		msix = iflib_msix_init(ctx);
 	else {
 		scctx->isc_vectors = 1;
 		scctx->isc_ntxqsets = 1;
 		scctx->isc_nrxqsets = 1;
 		scctx->isc_intr = IFLIB_INTR_LEGACY;
 		msix = 0;
 	}
 	/* Get memory for the station queues */
 	if ((err = iflib_queues_alloc(ctx))) {
 		device_printf(dev, "Unable to allocate queue memory\n");
 		goto fail_intr_free;
 	}
 
 	if ((err = iflib_qset_structures_setup(ctx)))
 		goto fail_queues;
 
 	/*
 	 * Now that we know how many queues there are, get the core offset.
 	 */
 	ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx);
 
 	/*
 	 * Group taskqueues aren't properly set up until SMP is started,
 	 * so we disable interrupts until we can handle them post
 	 * SI_SUB_SMP.
 	 *
 	 * XXX: disabling interrupts doesn't actually work, at least for
 	 * the non-MSI case.  When they occur before SI_SUB_SMP completes,
 	 * we do null handling and depend on this not causing too large an
 	 * interrupt storm.
 	 */
 	IFDI_INTR_DISABLE(ctx);
 
 	if (msix > 1) {
 		/*
 		 * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable
 		 * aren't the default NULL implementation.
 		 */
 		kobj_desc = &ifdi_rx_queue_intr_enable_desc;
 		kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL,
 		    kobj_desc);
 		if (kobj_method == &kobj_desc->deflt) {
 			device_printf(dev,
 			    "MSI-X requires ifdi_rx_queue_intr_enable method");
 			err = EOPNOTSUPP;
 			goto fail_queues;
 		}
 		kobj_desc = &ifdi_tx_queue_intr_enable_desc;
 		kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL,
 		    kobj_desc);
 		if (kobj_method == &kobj_desc->deflt) {
 			device_printf(dev,
 			    "MSI-X requires ifdi_tx_queue_intr_enable method");
 			err = EOPNOTSUPP;
 			goto fail_queues;
 		}
 
 		/*
 		 * Assign the MSI-X vectors.
 		 * Note that the default NULL ifdi_msix_intr_assign method will
 		 * fail here, too.
 		 */
 		err = IFDI_MSIX_INTR_ASSIGN(ctx, msix);
 		if (err != 0) {
 			device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n",
 			    err);
 			goto fail_queues;
 		}
 	} else if (scctx->isc_intr != IFLIB_INTR_MSIX) {
 		rid = 0;
 		if (scctx->isc_intr == IFLIB_INTR_MSI) {
 			MPASS(msix == 1);
 			rid = 1;
 		}
 		if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) {
 			device_printf(dev, "iflib_legacy_setup failed %d\n", err);
 			goto fail_queues;
 		}
 	} else {
 		device_printf(dev,
 		    "Cannot use iflib with only 1 MSI-X interrupt!\n");
 		err = ENODEV;
 		goto fail_intr_free;
 	}
 
 	ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet);
 
 	if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
 		goto fail_detach;
 	}
 
 	/*
 	 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported.
 	 * This must appear after the call to ether_ifattach() because
 	 * ether_ifattach() sets if_hdrlen to the default value.
 	 */
 	if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU)
 		if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
 
 	if ((err = iflib_netmap_attach(ctx))) {
 		device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err);
 		goto fail_detach;
 	}
 	*ctxp = ctx;
 
 	DEBUGNET_SET(ctx->ifc_ifp, iflib);
 
 	if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 	iflib_add_device_sysctl_post(ctx);
 	iflib_add_pfil(ctx);
 	ctx->ifc_flags |= IFC_INIT_DONE;
 	CTX_UNLOCK(ctx);
 
 	return (0);
 
 fail_detach:
 	ether_ifdetach(ctx->ifc_ifp);
 fail_intr_free:
 	iflib_free_intr_mem(ctx);
 fail_queues:
 	iflib_tx_structures_free(ctx);
 	iflib_rx_structures_free(ctx);
 	taskqgroup_detach(qgroup_if_config_tqg, &ctx->ifc_admin_task);
 	IFDI_DETACH(ctx);
 fail_unlock:
 	CTX_UNLOCK(ctx);
 	iflib_deregister(ctx);
 fail_ctx_free:
 	device_set_softc(ctx->ifc_dev, NULL);
         if (ctx->ifc_flags & IFC_SC_ALLOCATED)
                 free(ctx->ifc_softc, M_IFLIB);
         free(ctx, M_IFLIB);
 	return (err);
 }
 
 int
 iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp,
 					  struct iflib_cloneattach_ctx *clctx)
 {
 	int err;
 	if_ctx_t ctx;
 	if_t ifp;
 	if_softc_ctx_t scctx;
 	int i;
 	void *sc;
 	uint16_t main_txq;
 	uint16_t main_rxq;
 
 	ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO);
 	sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
 	ctx->ifc_flags |= IFC_SC_ALLOCATED;
 	if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL))
 		ctx->ifc_flags |= IFC_PSEUDO;
 
 	ctx->ifc_sctx = sctx;
 	ctx->ifc_softc = sc;
 	ctx->ifc_dev = dev;
 
 	if ((err = iflib_register(ctx)) != 0) {
 		device_printf(dev, "%s: iflib_register failed %d\n", __func__, err);
 		goto fail_ctx_free;
 	}
 	iflib_add_device_sysctl_pre(ctx);
 
 	scctx = &ctx->ifc_softc_ctx;
 	ifp = ctx->ifc_ifp;
 
 	iflib_reset_qvalues(ctx);
 	CTX_LOCK(ctx);
 	if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
 		goto fail_unlock;
 	}
 	if (sctx->isc_flags & IFLIB_GEN_MAC)
 		ether_gen_addr(ifp, &ctx->ifc_mac);
 	if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name,
 								clctx->cc_params)) != 0) {
 		device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err);
 		goto fail_ctx_free;
 	}
 	ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 	ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO);
 
 #ifdef INVARIANTS
 	if (scctx->isc_capabilities & IFCAP_TXCSUM)
 		MPASS(scctx->isc_tx_csum_flags);
 #endif
 
 	if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE);
 	if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE);
 
 	ifp->if_flags |= IFF_NOGROUP;
 	if (sctx->isc_flags & IFLIB_PSEUDO) {
 		ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet);
 
 		if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
 			device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
 			goto fail_detach;
 		}
 		*ctxp = ctx;
 
 		/*
 		 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported.
 		 * This must appear after the call to ether_ifattach() because
 		 * ether_ifattach() sets if_hdrlen to the default value.
 		 */
 		if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU)
 			if_setifheaderlen(ifp,
 			    sizeof(struct ether_vlan_header));
 
 		if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 		iflib_add_device_sysctl_post(ctx);
 		ctx->ifc_flags |= IFC_INIT_DONE;
 		return (0);
 	}
 	_iflib_pre_assert(scctx);
 	ctx->ifc_txrx = *scctx->isc_txrx;
 
 	if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
 		scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
 	if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
 		scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
 
 	main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
 	main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0;
 
 	/* XXX change for per-queue sizes */
 	device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n",
 	    scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
 
 	if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
 	    MAX_SINGLE_PACKET_FRACTION)
 		scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
 		    MAX_SINGLE_PACKET_FRACTION);
 	if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
 	    MAX_SINGLE_PACKET_FRACTION)
 		scctx->isc_tx_tso_segments_max = max(1,
 		    scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
 
 	/* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */
 	if (if_getcapabilities(ifp) & IFCAP_TSO) {
 		/*
 		 * The stack can't handle a TSO size larger than IP_MAXPACKET,
 		 * but some MACs do.
 		 */
 		if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max,
 		    IP_MAXPACKET));
 		/*
 		 * Take maximum number of m_pullup(9)'s in iflib_parse_header()
 		 * into account.  In the worst case, each of these calls will
 		 * add another mbuf and, thus, the requirement for another DMA
 		 * segment.  So for best performance, it doesn't make sense to
 		 * advertize a maximum of TSO segments that typically will
 		 * require defragmentation in iflib_encap().
 		 */
 		if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3);
 		if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max);
 	}
 	if (scctx->isc_rss_table_size == 0)
 		scctx->isc_rss_table_size = 64;
 	scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
 
 	GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
 	/* XXX format name */
 	taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx,
 	    NULL, NULL, "admin");
 
 	/* XXX --- can support > 1 -- but keep it simple for now */
 	scctx->isc_intr = IFLIB_INTR_LEGACY;
 
 	/* Get memory for the station queues */
 	if ((err = iflib_queues_alloc(ctx))) {
 		device_printf(dev, "Unable to allocate queue memory\n");
 		goto fail_iflib_detach;
 	}
 
 	if ((err = iflib_qset_structures_setup(ctx))) {
 		device_printf(dev, "qset structure setup failed %d\n", err);
 		goto fail_queues;
 	}
 
 	/*
 	 * XXX What if anything do we want to do about interrupts?
 	 */
 	ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet);
 	if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
 		goto fail_detach;
 	}
 
 	/*
 	 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported.
 	 * This must appear after the call to ether_ifattach() because
 	 * ether_ifattach() sets if_hdrlen to the default value.
 	 */
 	if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU)
 		if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
 
 	/* XXX handle more than one queue */
 	for (i = 0; i < scctx->isc_nrxqsets; i++)
 		IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl);
 
 	*ctxp = ctx;
 
 	if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 	iflib_add_device_sysctl_post(ctx);
 	ctx->ifc_flags |= IFC_INIT_DONE;
 	CTX_UNLOCK(ctx);
 
 	return (0);
 fail_detach:
 	ether_ifdetach(ctx->ifc_ifp);
 fail_queues:
 	iflib_tx_structures_free(ctx);
 	iflib_rx_structures_free(ctx);
 fail_iflib_detach:
 	IFDI_DETACH(ctx);
 fail_unlock:
 	CTX_UNLOCK(ctx);
 	iflib_deregister(ctx);
 fail_ctx_free:
 	free(ctx->ifc_softc, M_IFLIB);
 	free(ctx, M_IFLIB);
 	return (err);
 }
 
 int
 iflib_pseudo_deregister(if_ctx_t ctx)
 {
 	if_t ifp = ctx->ifc_ifp;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	int i, j;
 	struct taskqgroup *tqg;
 	iflib_fl_t fl;
 
 	/* Unregister VLAN event handlers early */
 	iflib_unregister_vlan_handlers(ctx);
 
 	ether_ifdetach(ifp);
 	/* XXX drain any dependent tasks */
 	tqg = qgroup_if_io_tqg;
 	for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		callout_drain(&txq->ift_timer);
 		if (txq->ift_task.gt_uniq != NULL)
 			taskqgroup_detach(tqg, &txq->ift_task);
 	}
 	for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
 		callout_drain(&rxq->ifr_watchdog);
 		if (rxq->ifr_task.gt_uniq != NULL)
 			taskqgroup_detach(tqg, &rxq->ifr_task);
 
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
 			free(fl->ifl_rx_bitmap, M_IFLIB);
 	}
 	tqg = qgroup_if_config_tqg;
 	if (ctx->ifc_admin_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_admin_task);
 	if (ctx->ifc_vflr_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
 
 	iflib_tx_structures_free(ctx);
 	iflib_rx_structures_free(ctx);
 
 	iflib_deregister(ctx);
 
 	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
 		free(ctx->ifc_softc, M_IFLIB);
 	free(ctx, M_IFLIB);
 	return (0);
 }
 
 int
 iflib_device_attach(device_t dev)
 {
 	if_ctx_t ctx;
 	if_shared_ctx_t sctx;
 
 	if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
 		return (ENOTSUP);
 
 	pci_enable_busmaster(dev);
 
 	return (iflib_device_register(dev, NULL, sctx, &ctx));
 }
 
 int
 iflib_device_deregister(if_ctx_t ctx)
 {
 	if_t ifp = ctx->ifc_ifp;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	device_t dev = ctx->ifc_dev;
 	int i, j;
 	struct taskqgroup *tqg;
 	iflib_fl_t fl;
 
 	/* Make sure VLANS are not using driver */
 	if (if_vlantrunkinuse(ifp)) {
 		device_printf(dev, "Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 #ifdef PCI_IOV
 	if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) {
 		device_printf(dev, "SR-IOV in use; detach first.\n");
 		return (EBUSY);
 	}
 #endif
 
 	STATE_LOCK(ctx);
 	ctx->ifc_flags |= IFC_IN_DETACH;
 	STATE_UNLOCK(ctx);
 
 	/* Unregister VLAN handlers before calling iflib_stop() */
 	iflib_unregister_vlan_handlers(ctx);
 
 	iflib_netmap_detach(ifp);
 	ether_ifdetach(ifp);
 
 	CTX_LOCK(ctx);
 	iflib_stop(ctx);
 	CTX_UNLOCK(ctx);
 
 	iflib_rem_pfil(ctx);
 	if (ctx->ifc_led_dev != NULL)
 		led_destroy(ctx->ifc_led_dev);
 	/* XXX drain any dependent tasks */
 	tqg = qgroup_if_io_tqg;
 	for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		callout_drain(&txq->ift_timer);
 		if (txq->ift_task.gt_uniq != NULL)
 			taskqgroup_detach(tqg, &txq->ift_task);
 	}
 	for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
 		if (rxq->ifr_task.gt_uniq != NULL)
 			taskqgroup_detach(tqg, &rxq->ifr_task);
 
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
 			free(fl->ifl_rx_bitmap, M_IFLIB);
 	}
 	tqg = qgroup_if_config_tqg;
 	if (ctx->ifc_admin_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_admin_task);
 	if (ctx->ifc_vflr_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
 	CTX_LOCK(ctx);
 	IFDI_DETACH(ctx);
 	CTX_UNLOCK(ctx);
 
 	/* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
 	iflib_free_intr_mem(ctx);
 
 	bus_generic_detach(dev);
 
 	iflib_tx_structures_free(ctx);
 	iflib_rx_structures_free(ctx);
 
 	iflib_deregister(ctx);
 
 	device_set_softc(ctx->ifc_dev, NULL);
 	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
 		free(ctx->ifc_softc, M_IFLIB);
 	unref_ctx_core_offset(ctx);
 	free(ctx, M_IFLIB);
 	return (0);
 }
 
 static void
 iflib_free_intr_mem(if_ctx_t ctx)
 {
 
 	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) {
 		iflib_irq_free(ctx, &ctx->ifc_legacy_irq);
 	}
 	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
 		pci_release_msi(ctx->ifc_dev);
 	}
 	if (ctx->ifc_msix_mem != NULL) {
 		bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY,
 		    rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem);
 		ctx->ifc_msix_mem = NULL;
 	}
 }
 
 int
 iflib_device_detach(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	return (iflib_device_deregister(ctx));
 }
 
 int
 iflib_device_suspend(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	IFDI_SUSPEND(ctx);
 	CTX_UNLOCK(ctx);
 
 	return bus_generic_suspend(dev);
 }
 int
 iflib_device_shutdown(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	IFDI_SHUTDOWN(ctx);
 	CTX_UNLOCK(ctx);
 
 	return bus_generic_suspend(dev);
 }
 
 
 int
 iflib_device_resume(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 	iflib_txq_t txq = ctx->ifc_txqs;
 
 	CTX_LOCK(ctx);
 	IFDI_RESUME(ctx);
 	iflib_if_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 	for (int i = 0; i < NTXQSETS(ctx); i++, txq++)
 		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 
 	return (bus_generic_resume(dev));
 }
 
 int
 iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params)
 {
 	int error;
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	error = IFDI_IOV_INIT(ctx, num_vfs, params);
 	CTX_UNLOCK(ctx);
 
 	return (error);
 }
 
 void
 iflib_device_iov_uninit(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	IFDI_IOV_UNINIT(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 int
 iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params)
 {
 	int error;
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	error = IFDI_IOV_VF_ADD(ctx, vfnum, params);
 	CTX_UNLOCK(ctx);
 
 	return (error);
 }
 
 /*********************************************************************
  *
  *  MODULE FUNCTION DEFINITIONS
  *
  **********************************************************************/
 
 /*
  * - Start a fast taskqueue thread for each core
  * - Start a taskqueue for control operations
  */
 static int
 iflib_module_init(void)
 {
 	return (0);
 }
 
 static int
 iflib_module_event_handler(module_t mod, int what, void *arg)
 {
 	int err;
 
 	switch (what) {
 	case MOD_LOAD:
 		if ((err = iflib_module_init()) != 0)
 			return (err);
 		break;
 	case MOD_UNLOAD:
 		return (EBUSY);
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  PUBLIC FUNCTION DEFINITIONS
  *     ordered as in iflib.h
  *
  **********************************************************************/
 
 
 static void
 _iflib_assert(if_shared_ctx_t sctx)
 {
 	int i;
 
 	MPASS(sctx->isc_tx_maxsize);
 	MPASS(sctx->isc_tx_maxsegsize);
 
 	MPASS(sctx->isc_rx_maxsize);
 	MPASS(sctx->isc_rx_nsegments);
 	MPASS(sctx->isc_rx_maxsegsize);
 
 	MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8);
 	for (i = 0; i < sctx->isc_nrxqs; i++) {
 		MPASS(sctx->isc_nrxd_min[i]);
 		MPASS(powerof2(sctx->isc_nrxd_min[i]));
 		MPASS(sctx->isc_nrxd_max[i]);
 		MPASS(powerof2(sctx->isc_nrxd_max[i]));
 		MPASS(sctx->isc_nrxd_default[i]);
 		MPASS(powerof2(sctx->isc_nrxd_default[i]));
 	}
 
 	MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8);
 	for (i = 0; i < sctx->isc_ntxqs; i++) {
 		MPASS(sctx->isc_ntxd_min[i]);
 		MPASS(powerof2(sctx->isc_ntxd_min[i]));
 		MPASS(sctx->isc_ntxd_max[i]);
 		MPASS(powerof2(sctx->isc_ntxd_max[i]));
 		MPASS(sctx->isc_ntxd_default[i]);
 		MPASS(powerof2(sctx->isc_ntxd_default[i]));
 	}
 }
 
 static void
 _iflib_pre_assert(if_softc_ctx_t scctx)
 {
 
 	MPASS(scctx->isc_txrx->ift_txd_encap);
 	MPASS(scctx->isc_txrx->ift_txd_flush);
 	MPASS(scctx->isc_txrx->ift_txd_credits_update);
 	MPASS(scctx->isc_txrx->ift_rxd_available);
 	MPASS(scctx->isc_txrx->ift_rxd_pkt_get);
 	MPASS(scctx->isc_txrx->ift_rxd_refill);
 	MPASS(scctx->isc_txrx->ift_rxd_flush);
 }
 
 static int
 iflib_register(if_ctx_t ctx)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	driver_t *driver = sctx->isc_driver;
 	device_t dev = ctx->ifc_dev;
 	if_t ifp;
 
 	_iflib_assert(sctx);
 
 	CTX_LOCK_INIT(ctx);
 	STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
 	ifp = ctx->ifc_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (ENOMEM);
 	}
 
 	/*
 	 * Initialize our context's device specific methods
 	 */
 	kobj_init((kobj_t) ctx, (kobj_class_t) driver);
 	kobj_class_compile((kobj_class_t) driver);
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setsoftc(ifp, ctx);
 	if_setdev(ifp, dev);
 	if_setinitfn(ifp, iflib_if_init);
 	if_setioctlfn(ifp, iflib_if_ioctl);
 #ifdef ALTQ
 	if_setstartfn(ifp, iflib_altq_if_start);
 	if_settransmitfn(ifp, iflib_altq_if_transmit);
 	if_setsendqready(ifp);
 #else
 	if_settransmitfn(ifp, iflib_if_transmit);
 #endif
 	if_setqflushfn(ifp, iflib_if_qflush);
-	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
+	    IFF_KNOWSEPOCH);
 
 	ctx->ifc_vlan_attach_event =
 		EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
 							  EVENTHANDLER_PRI_FIRST);
 	ctx->ifc_vlan_detach_event =
 		EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx,
 							  EVENTHANDLER_PRI_FIRST);
 
 	if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) {
 		ctx->ifc_mediap = &ctx->ifc_media;
 		ifmedia_init(ctx->ifc_mediap, IFM_IMASK,
 		    iflib_media_change, iflib_media_status);
 	}
 	return (0);
 }
 
 static void
 iflib_unregister_vlan_handlers(if_ctx_t ctx)
 {
 	/* Unregister VLAN events */
 	if (ctx->ifc_vlan_attach_event != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event);
 		ctx->ifc_vlan_attach_event = NULL;
 	}
 	if (ctx->ifc_vlan_detach_event != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event);
 		ctx->ifc_vlan_detach_event = NULL;
 	}
 
 }
 
 static void
 iflib_deregister(if_ctx_t ctx)
 {
 	if_t ifp = ctx->ifc_ifp;
 
 	/* Remove all media */
 	ifmedia_removeall(&ctx->ifc_media);
 
 	/* Ensure that VLAN event handlers are unregistered */
 	iflib_unregister_vlan_handlers(ctx);
 
 	/* Release kobject reference */
 	kobj_delete((kobj_t) ctx, NULL);
 
 	/* Free the ifnet structure */
 	if_free(ifp);
 
 	STATE_LOCK_DESTROY(ctx);
 
 	/* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
 	CTX_LOCK_DESTROY(ctx);
 }
 
 static int
 iflib_queues_alloc(if_ctx_t ctx)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
 	int nrxqsets = scctx->isc_nrxqsets;
 	int ntxqsets = scctx->isc_ntxqsets;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	iflib_fl_t fl = NULL;
 	int i, j, cpu, err, txconf, rxconf;
 	iflib_dma_info_t ifdip;
 	uint32_t *rxqsizes = scctx->isc_rxqsizes;
 	uint32_t *txqsizes = scctx->isc_txqsizes;
 	uint8_t nrxqs = sctx->isc_nrxqs;
 	uint8_t ntxqs = sctx->isc_ntxqs;
 	int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1;
 	caddr_t *vaddrs;
 	uint64_t *paddrs;
 
 	KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
 	KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
 
 	/* Allocate the TX ring struct memory */
 	if (!(ctx->ifc_txqs =
 	    (iflib_txq_t) malloc(sizeof(struct iflib_txq) *
 	    ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 
 	/* Now allocate the RX */
 	if (!(ctx->ifc_rxqs =
 	    (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) *
 	    nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		err = ENOMEM;
 		goto rx_fail;
 	}
 
 	txq = ctx->ifc_txqs;
 	rxq = ctx->ifc_rxqs;
 
 	/*
 	 * XXX handle allocation failure
 	 */
 	for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) {
 		/* Set up some basics */
 
 		if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs,
 		    M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
 			device_printf(dev,
 			    "Unable to allocate TX DMA info memory\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 		txq->ift_ifdi = ifdip;
 		for (j = 0; j < ntxqs; j++, ifdip++) {
 			if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) {
 				device_printf(dev,
 				    "Unable to allocate TX descriptors\n");
 				err = ENOMEM;
 				goto err_tx_desc;
 			}
 			txq->ift_txd_size[j] = scctx->isc_txd_size[j];
 			bzero((void *)ifdip->idi_vaddr, txqsizes[j]);
 		}
 		txq->ift_ctx = ctx;
 		txq->ift_id = i;
 		if (sctx->isc_flags & IFLIB_HAS_TXCQ) {
 			txq->ift_br_offset = 1;
 		} else {
 			txq->ift_br_offset = 0;
 		}
 		/* XXX fix this */
 		txq->ift_timer.c_cpu = cpu;
 
 		if (iflib_txsd_alloc(txq)) {
 			device_printf(dev, "Critical Failure setting up TX buffers\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 
 		/* Initialize the TX lock */
 		snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout",
 		    device_get_nameunit(dev), txq->ift_id);
 		mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF);
 		callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0);
 
 		err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain,
 				      iflib_txq_can_drain, M_IFLIB, M_WAITOK);
 		if (err) {
 			/* XXX free any allocated rings */
 			device_printf(dev, "Unable to allocate buf_ring\n");
 			goto err_tx_desc;
 		}
 	}
 
 	for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) {
 		/* Set up some basics */
 		callout_init(&rxq->ifr_watchdog, 1);
 
 		if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs,
 		   M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
 			device_printf(dev,
 			    "Unable to allocate RX DMA info memory\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 
 		rxq->ifr_ifdi = ifdip;
 		/* XXX this needs to be changed if #rx queues != #tx queues */
 		rxq->ifr_ntxqirq = 1;
 		rxq->ifr_txqid[0] = i;
 		for (j = 0; j < nrxqs; j++, ifdip++) {
 			if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) {
 				device_printf(dev,
 				    "Unable to allocate RX descriptors\n");
 				err = ENOMEM;
 				goto err_tx_desc;
 			}
 			bzero((void *)ifdip->idi_vaddr, rxqsizes[j]);
 		}
 		rxq->ifr_ctx = ctx;
 		rxq->ifr_id = i;
 		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 			rxq->ifr_fl_offset = 1;
 		} else {
 			rxq->ifr_fl_offset = 0;
 		}
 		rxq->ifr_nfl = nfree_lists;
 		if (!(fl =
 			  (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev, "Unable to allocate free list memory\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 		rxq->ifr_fl = fl;
 		for (j = 0; j < nfree_lists; j++) {
 			fl[j].ifl_rxq = rxq;
 			fl[j].ifl_id = j;
 			fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
 			fl[j].ifl_rxd_size = scctx->isc_rxd_size[j];
 		}
 		/* Allocate receive buffers for the ring */
 		if (iflib_rxsd_alloc(rxq)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			err = ENOMEM;
 			goto err_rx_desc;
 		}
 
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 
 			fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB,
 			    M_WAITOK);
 	}
 
 	/* TXQs */
 	vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
 	paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
 	for (i = 0; i < ntxqsets; i++) {
 		iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi;
 
 		for (j = 0; j < ntxqs; j++, di++) {
 			vaddrs[i*ntxqs + j] = di->idi_vaddr;
 			paddrs[i*ntxqs + j] = di->idi_paddr;
 		}
 	}
 	if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) {
 		device_printf(ctx->ifc_dev,
 		    "Unable to allocate device TX queue\n");
 		iflib_tx_structures_free(ctx);
 		free(vaddrs, M_IFLIB);
 		free(paddrs, M_IFLIB);
 		goto err_rx_desc;
 	}
 	free(vaddrs, M_IFLIB);
 	free(paddrs, M_IFLIB);
 
 	/* RXQs */
 	vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
 	paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
 	for (i = 0; i < nrxqsets; i++) {
 		iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi;
 
 		for (j = 0; j < nrxqs; j++, di++) {
 			vaddrs[i*nrxqs + j] = di->idi_vaddr;
 			paddrs[i*nrxqs + j] = di->idi_paddr;
 		}
 	}
 	if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) {
 		device_printf(ctx->ifc_dev,
 		    "Unable to allocate device RX queue\n");
 		iflib_tx_structures_free(ctx);
 		free(vaddrs, M_IFLIB);
 		free(paddrs, M_IFLIB);
 		goto err_rx_desc;
 	}
 	free(vaddrs, M_IFLIB);
 	free(paddrs, M_IFLIB);
 
 	return (0);
 
 /* XXX handle allocation failure changes */
 err_rx_desc:
 err_tx_desc:
 rx_fail:
 	if (ctx->ifc_rxqs != NULL)
 		free(ctx->ifc_rxqs, M_IFLIB);
 	ctx->ifc_rxqs = NULL;
 	if (ctx->ifc_txqs != NULL)
 		free(ctx->ifc_txqs, M_IFLIB);
 	ctx->ifc_txqs = NULL;
 fail:
 	return (err);
 }
 
 static int
 iflib_tx_structures_setup(if_ctx_t ctx)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	int i;
 
 	for (i = 0; i < NTXQSETS(ctx); i++, txq++)
 		iflib_txq_setup(txq);
 
 	return (0);
 }
 
 static void
 iflib_tx_structures_free(if_ctx_t ctx)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	int i, j;
 
 	for (i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		for (j = 0; j < sctx->isc_ntxqs; j++)
 			iflib_dma_free(&txq->ift_ifdi[j]);
 		iflib_txq_destroy(txq);
 	}
 	free(ctx->ifc_txqs, M_IFLIB);
 	ctx->ifc_txqs = NULL;
 	IFDI_QUEUES_FREE(ctx);
 }
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 iflib_rx_structures_setup(if_ctx_t ctx)
 {
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	int q;
 #if defined(INET6) || defined(INET)
 	int err, i;
 #endif
 
 	for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
 #if defined(INET6) || defined(INET)
 		if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) {
 			err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
 			    TCP_LRO_ENTRIES, min(1024,
 			    ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]));
 			if (err != 0) {
 				device_printf(ctx->ifc_dev,
 				    "LRO Initialization failed!\n");
 				goto fail;
 			}
 		}
 #endif
 		IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
 	}
 	return (0);
 #if defined(INET6) || defined(INET)
 fail:
 	/*
 	 * Free LRO resources allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself.  'q' failed, so its the terminus.
 	 */
 	rxq = ctx->ifc_rxqs;
 	for (i = 0; i < q; ++i, rxq++) {
 		if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO)
 			tcp_lro_free(&rxq->ifr_lc);
 	}
 	return (err);
 #endif
 }
 
 /*********************************************************************
  *
  *  Free all receive rings.
  *
  **********************************************************************/
 static void
 iflib_rx_structures_free(if_ctx_t ctx)
 {
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	int i, j;
 
 	for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
 		for (j = 0; j < sctx->isc_nrxqs; j++)
 			iflib_dma_free(&rxq->ifr_ifdi[j]);
 		iflib_rx_sds_free(rxq);
 #if defined(INET6) || defined(INET)
 		if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO)
 			tcp_lro_free(&rxq->ifr_lc);
 #endif
 	}
 	free(ctx->ifc_rxqs, M_IFLIB);
 	ctx->ifc_rxqs = NULL;
 }
 
 static int
 iflib_qset_structures_setup(if_ctx_t ctx)
 {
 	int err;
 
 	/*
 	 * It is expected that the caller takes care of freeing queues if this
 	 * fails.
 	 */
 	if ((err = iflib_tx_structures_setup(ctx)) != 0) {
 		device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err);
 		return (err);
 	}
 
 	if ((err = iflib_rx_structures_setup(ctx)) != 0)
 		device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err);
 
 	return (err);
 }
 
 int
 iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
 		driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name)
 {
 
 	return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
 }
 
 #ifdef SMP
 static int
 find_nth(if_ctx_t ctx, int qid)
 {
 	cpuset_t cpus;
 	int i, cpuid, eqid, count;
 
 	CPU_COPY(&ctx->ifc_cpus, &cpus);
 	count = CPU_COUNT(&cpus);
 	eqid = qid % count;
 	/* clear up to the qid'th bit */
 	for (i = 0; i < eqid; i++) {
 		cpuid = CPU_FFS(&cpus);
 		MPASS(cpuid != 0);
 		CPU_CLR(cpuid-1, &cpus);
 	}
 	cpuid = CPU_FFS(&cpus);
 	MPASS(cpuid != 0);
 	return (cpuid-1);
 }
 
 #ifdef SCHED_ULE
 extern struct cpu_group *cpu_top;              /* CPU topology */
 
 static int
 find_child_with_core(int cpu, struct cpu_group *grp)
 {
 	int i;
 
 	if (grp->cg_children == 0)
 		return -1;
 
 	MPASS(grp->cg_child);
 	for (i = 0; i < grp->cg_children; i++) {
 		if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
 			return i;
 	}
 
 	return -1;
 }
 
 /*
  * Find the nth "close" core to the specified core
  * "close" is defined as the deepest level that shares
  * at least an L2 cache.  With threads, this will be
  * threads on the same core.  If the shared cache is L3
  * or higher, simply returns the same core.
  */
 static int
 find_close_core(int cpu, int core_offset)
 {
 	struct cpu_group *grp;
 	int i;
 	int fcpu;
 	cpuset_t cs;
 
 	grp = cpu_top;
 	if (grp == NULL)
 		return cpu;
 	i = 0;
 	while ((i = find_child_with_core(cpu, grp)) != -1) {
 		/* If the child only has one cpu, don't descend */
 		if (grp->cg_child[i].cg_count <= 1)
 			break;
 		grp = &grp->cg_child[i];
 	}
 
 	/* If they don't share at least an L2 cache, use the same CPU */
 	if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
 		return cpu;
 
 	/* Now pick one */
 	CPU_COPY(&grp->cg_mask, &cs);
 
 	/* Add the selected CPU offset to core offset. */
 	for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) {
 		if (fcpu - 1 == cpu)
 			break;
 		CPU_CLR(fcpu - 1, &cs);
 	}
 	MPASS(fcpu);
 
 	core_offset += i;
 
 	CPU_COPY(&grp->cg_mask, &cs);
 	for (i = core_offset % grp->cg_count; i > 0; i--) {
 		MPASS(CPU_FFS(&cs));
 		CPU_CLR(CPU_FFS(&cs) - 1, &cs);
 	}
 	MPASS(CPU_FFS(&cs));
 	return CPU_FFS(&cs) - 1;
 }
 #else
 static int
 find_close_core(int cpu, int core_offset __unused)
 {
 	return cpu;
 }
 #endif
 
 static int
 get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid)
 {
 	switch (type) {
 	case IFLIB_INTR_TX:
 		/* TX queues get cores which share at least an L2 cache with the corresponding RX queue */
 		/* XXX handle multiple RX threads per core and more than two core per L2 group */
 		return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
 	case IFLIB_INTR_RX:
 	case IFLIB_INTR_RXTX:
 		/* RX queues get the specified core */
 		return qid / CPU_COUNT(&ctx->ifc_cpus);
 	default:
 		return -1;
 	}
 }
 #else
 #define get_core_offset(ctx, type, qid)	CPU_FIRST()
 #define find_close_core(cpuid, tid)	CPU_FIRST()
 #define find_nth(ctx, gid)		CPU_FIRST()
 #endif
 
 /* Just to avoid copy/paste */
 static inline int
 iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type,
     int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq,
     const char *name)
 {
 	device_t dev;
 	int co, cpuid, err, tid;
 
 	dev = ctx->ifc_dev;
 	co = ctx->ifc_sysctl_core_offset;
 	if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX)
 		co += ctx->ifc_softc_ctx.isc_nrxqsets;
 	cpuid = find_nth(ctx, qid + co);
 	tid = get_core_offset(ctx, type, qid);
 	if (tid < 0) {
 		device_printf(dev, "get_core_offset failed\n");
 		return (EOPNOTSUPP);
 	}
 	cpuid = find_close_core(cpuid, tid);
 	err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res,
 	    name);
 	if (err) {
 		device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err);
 		return (err);
 	}
 #ifdef notyet
 	if (cpuid > ctx->ifc_cpuid_highest)
 		ctx->ifc_cpuid_highest = cpuid;
 #endif
 	return (0);
 }
 
 int
 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 			iflib_intr_type_t type, driver_filter_t *filter,
 			void *filter_arg, int qid, const char *name)
 {
 	device_t dev;
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
 	iflib_filter_info_t info;
 	gtask_fn_t *fn;
 	int tqrid, err;
 	driver_filter_t *intr_fast;
 	void *q;
 
 	info = &ctx->ifc_filter_info;
 	tqrid = rid;
 
 	switch (type) {
 	/* XXX merge tx/rx for netmap? */
 	case IFLIB_INTR_TX:
 		q = &ctx->ifc_txqs[qid];
 		info = &ctx->ifc_txqs[qid].ift_filter_info;
 		gtask = &ctx->ifc_txqs[qid].ift_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_tx;
 		intr_fast = iflib_fast_intr;
 		GROUPTASK_INIT(gtask, 0, fn, q);
 		ctx->ifc_flags |= IFC_NETMAP_TX_IRQ;
 		break;
 	case IFLIB_INTR_RX:
 		q = &ctx->ifc_rxqs[qid];
 		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
 		intr_fast = iflib_fast_intr;
 		NET_GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_RXTX:
 		q = &ctx->ifc_rxqs[qid];
 		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
 		intr_fast = iflib_fast_intr_rxtx;
 		NET_GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_ADMIN:
 		q = ctx;
 		tqrid = -1;
 		info = &ctx->ifc_filter_info;
 		gtask = &ctx->ifc_admin_task;
 		tqg = qgroup_if_config_tqg;
 		fn = _task_fn_admin;
 		intr_fast = iflib_fast_intr_ctx;
 		break;
 	default:
 		device_printf(ctx->ifc_dev, "%s: unknown net intr type\n",
 		    __func__);
 		return (EINVAL);
 	}
 
 	info->ifi_filter = filter;
 	info->ifi_filter_arg = filter_arg;
 	info->ifi_task = gtask;
 	info->ifi_ctx = q;
 
 	dev = ctx->ifc_dev;
 	err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info,  name);
 	if (err != 0) {
 		device_printf(dev, "_iflib_irq_alloc failed %d\n", err);
 		return (err);
 	}
 	if (type == IFLIB_INTR_ADMIN)
 		return (0);
 
 	if (tqrid != -1) {
 		err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg,
 		    q, name);
 		if (err)
 			return (err);
 	} else {
 		taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name);
 	}
 
 	return (0);
 }
 
 void
 iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name)
 {
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
 	gtask_fn_t *fn;
 	void *q;
 	int err;
 
 	switch (type) {
 	case IFLIB_INTR_TX:
 		q = &ctx->ifc_txqs[qid];
 		gtask = &ctx->ifc_txqs[qid].ift_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_tx;
 		GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_RX:
 		q = &ctx->ifc_rxqs[qid];
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
 		NET_GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_IOV:
 		q = ctx;
 		gtask = &ctx->ifc_vflr_task;
 		tqg = qgroup_if_config_tqg;
 		fn = _task_fn_iov;
 		GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	default:
 		panic("unknown net intr type");
 	}
 	if (irq != NULL) {
 		err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg,
 		    q, name);
 		if (err)
 			taskqgroup_attach(tqg, gtask, q, ctx->ifc_dev,
 			    irq->ii_res, name);
 	} else {
 		taskqgroup_attach(tqg, gtask, q, NULL, NULL, name);
 	}
 }
 
 void
 iflib_irq_free(if_ctx_t ctx, if_irq_t irq)
 {
 
 	if (irq->ii_tag)
 		bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag);
 
 	if (irq->ii_res)
 		bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ,
 		    rman_get_rid(irq->ii_res), irq->ii_res);
 }
 
 static int
 iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	if_irq_t irq = &ctx->ifc_legacy_irq;
 	iflib_filter_info_t info;
 	device_t dev;
 	struct grouptask *gtask;
 	struct resource *res;
 	struct taskqgroup *tqg;
 	void *q;
 	int err, tqrid;
 	bool rx_only;
 
 	q = &ctx->ifc_rxqs[0];
 	info = &rxq[0].ifr_filter_info;
 	gtask = &rxq[0].ifr_task;
 	tqg = qgroup_if_io_tqg;
 	tqrid = *rid;
 	rx_only = (ctx->ifc_sctx->isc_flags & IFLIB_SINGLE_IRQ_RX_ONLY) != 0;
 
 	ctx->ifc_flags |= IFC_LEGACY;
 	info->ifi_filter = filter;
 	info->ifi_filter_arg = filter_arg;
 	info->ifi_task = gtask;
 	info->ifi_ctx = rx_only ? ctx : q;
 
 	dev = ctx->ifc_dev;
 	/* We allocate a single interrupt resource */
 	err = _iflib_irq_alloc(ctx, irq, tqrid, rx_only ? iflib_fast_intr_ctx :
 	    iflib_fast_intr_rxtx, NULL, info, name);
 	if (err != 0)
 		return (err);
 	NET_GROUPTASK_INIT(gtask, 0, _task_fn_rx, q);
 	res = irq->ii_res;
 	taskqgroup_attach(tqg, gtask, q, dev, res, name);
 
 	GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
 	taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res,
 	    "tx");
 	return (0);
 }
 
 void
 iflib_led_create(if_ctx_t ctx)
 {
 
 	ctx->ifc_led_dev = led_create(iflib_led_func, ctx,
 	    device_get_nameunit(ctx->ifc_dev));
 }
 
 void
 iflib_tx_intr_deferred(if_ctx_t ctx, int txqid)
 {
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
 }
 
 void
 iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid)
 {
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task);
 }
 
 void
 iflib_admin_intr_deferred(if_ctx_t ctx)
 {
 #ifdef INVARIANTS
 	struct grouptask *gtask;
 
 	gtask = &ctx->ifc_admin_task;
 	MPASS(gtask != NULL && gtask->gt_taskqueue != NULL);
 #endif
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
 }
 
 void
 iflib_iov_intr_deferred(if_ctx_t ctx)
 {
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task);
 }
 
 void
 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name)
 {
 
 	taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL,
 	    name);
 }
 
 void
 iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
 	const char *name)
 {
 
 	GROUPTASK_INIT(gtask, 0, fn, ctx);
 	taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL,
 	    name);
 }
 
 void
 iflib_config_gtask_deinit(struct grouptask *gtask)
 {
 
 	taskqgroup_detach(qgroup_if_config_tqg, gtask);	
 }
 
 void
 iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate)
 {
 	if_t ifp = ctx->ifc_ifp;
 	iflib_txq_t txq = ctx->ifc_txqs;
 
 	if_setbaudrate(ifp, baudrate);
 	if (baudrate >= IF_Gbps(10)) {
 		STATE_LOCK(ctx);
 		ctx->ifc_flags |= IFC_PREFETCH;
 		STATE_UNLOCK(ctx);
 	}
 	/* If link down, disable watchdog */
 	if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
 		for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++)
 			txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 	}
 	ctx->ifc_link_state = link_state;
 	if_link_state_change(ifp, link_state);
 }
 
 static int
 iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
 {
 	int credits;
 #ifdef INVARIANTS
 	int credits_pre = txq->ift_cidx_processed;
 #endif
 
 	bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 	    BUS_DMASYNC_POSTREAD);
 	if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0)
 		return (0);
 
 	txq->ift_processed += credits;
 	txq->ift_cidx_processed += credits;
 
 	MPASS(credits_pre + credits == txq->ift_cidx_processed);
 	if (txq->ift_cidx_processed >= txq->ift_size)
 		txq->ift_cidx_processed -= txq->ift_size;
 	return (credits);
 }
 
 static int
 iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget)
 {
 	iflib_fl_t fl;
 	u_int i;
 
 	for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++)
 		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
 	    budget));
 }
 
 void
 iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name,
 	const char *description, if_int_delay_info_t info,
 	int offset, int value)
 {
 	info->iidi_ctx = ctx;
 	info->iidi_offset = offset;
 	info->iidi_value = value;
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
 	    info, 0, iflib_sysctl_int_delay, "I", description);
 }
 
 struct sx *
 iflib_ctx_lock_get(if_ctx_t ctx)
 {
 
 	return (&ctx->ifc_ctx_sx);
 }
 
 static int
 iflib_msix_init(if_ctx_t ctx)
 {
 	device_t dev = ctx->ifc_dev;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues;
 	int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors;
 
 	iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
 	iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
 
 	if (bootverbose)
 		device_printf(dev, "msix_init qsets capped at %d\n",
 		    imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets));
 
 	/* Override by tuneable */
 	if (scctx->isc_disable_msix)
 		goto msi;
 
 	/* First try MSI-X */
 	if ((msgs = pci_msix_count(dev)) == 0) {
 		if (bootverbose)
 			device_printf(dev, "MSI-X not supported or disabled\n");
 		goto msi;
 	}
 
 	bar = ctx->ifc_softc_ctx.isc_msix_bar;
 	/*
 	 * bar == -1 => "trust me I know what I'm doing"
 	 * Some drivers are for hardware that is so shoddily
 	 * documented that no one knows which bars are which
 	 * so the developer has to map all bars. This hack
 	 * allows shoddy garbage to use MSI-X in this framework.
 	 */
 	if (bar != -1) {
 		ctx->ifc_msix_mem = bus_alloc_resource_any(dev,
 	            SYS_RES_MEMORY, &bar, RF_ACTIVE);
 		if (ctx->ifc_msix_mem == NULL) {
 			device_printf(dev, "Unable to map MSI-X table\n");
 			goto msi;
 		}
 	}
 
 	admincnt = sctx->isc_admin_intrcnt;
 #if IFLIB_DEBUG
 	/* use only 1 qset in debug mode */
 	queuemsgs = min(msgs - admincnt, 1);
 #else
 	queuemsgs = msgs - admincnt;
 #endif
 #ifdef RSS
 	queues = imin(queuemsgs, rss_getnumbuckets());
 #else
 	queues = queuemsgs;
 #endif
 	queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
 	if (bootverbose)
 		device_printf(dev,
 		    "intr CPUs: %d queue msgs: %d admincnt: %d\n",
 		    CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
 #ifdef  RSS
 	/* If we're doing RSS, clamp at the number of RSS buckets */
 	if (queues > rss_getnumbuckets())
 		queues = rss_getnumbuckets();
 #endif
 	if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt)
 		rx_queues = iflib_num_rx_queues;
 	else
 		rx_queues = queues;
 
 	if (rx_queues > scctx->isc_nrxqsets)
 		rx_queues = scctx->isc_nrxqsets;
 
 	/*
 	 * We want this to be all logical CPUs by default
 	 */
 	if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues)
 		tx_queues = iflib_num_tx_queues;
 	else
 		tx_queues = mp_ncpus;
 
 	if (tx_queues > scctx->isc_ntxqsets)
 		tx_queues = scctx->isc_ntxqsets;
 
 	if (ctx->ifc_sysctl_qs_eq_override == 0) {
 #ifdef INVARIANTS
 		if (tx_queues != rx_queues)
 			device_printf(dev,
 			    "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n",
 			    min(rx_queues, tx_queues), min(rx_queues, tx_queues));
 #endif
 		tx_queues = min(rx_queues, tx_queues);
 		rx_queues = min(rx_queues, tx_queues);
 	}
 
 	vectors = rx_queues + admincnt;
 	if (msgs < vectors) {
 		device_printf(dev,
 		    "insufficient number of MSI-X vectors "
 		    "(supported %d, need %d)\n", msgs, vectors);
 		goto msi;
 	}
 
 	device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues,
 	    tx_queues);
 	msgs = vectors;
 	if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
 		if (vectors != msgs) {
 			device_printf(dev,
 			    "Unable to allocate sufficient MSI-X vectors "
 			    "(got %d, need %d)\n", vectors, msgs);
 			pci_release_msi(dev);
 			if (bar != -1) {
 				bus_release_resource(dev, SYS_RES_MEMORY, bar,
 				    ctx->ifc_msix_mem);
 				ctx->ifc_msix_mem = NULL;
 			}
 			goto msi;
 		}
 		device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
 		    vectors);
 		scctx->isc_vectors = vectors;
 		scctx->isc_nrxqsets = rx_queues;
 		scctx->isc_ntxqsets = tx_queues;
 		scctx->isc_intr = IFLIB_INTR_MSIX;
 
 		return (vectors);
 	} else {
 		device_printf(dev,
 		    "failed to allocate %d MSI-X vectors, err: %d\n", vectors,
 		    err);
 		if (bar != -1) {
 			bus_release_resource(dev, SYS_RES_MEMORY, bar,
 			    ctx->ifc_msix_mem);
 			ctx->ifc_msix_mem = NULL;
 		}
 	}
 
 msi:
 	vectors = pci_msi_count(dev);
 	scctx->isc_nrxqsets = 1;
 	scctx->isc_ntxqsets = 1;
 	scctx->isc_vectors = vectors;
 	if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) {
 		device_printf(dev,"Using an MSI interrupt\n");
 		scctx->isc_intr = IFLIB_INTR_MSI;
 	} else {
 		scctx->isc_vectors = 1;
 		device_printf(dev,"Using a Legacy interrupt\n");
 		scctx->isc_intr = IFLIB_INTR_LEGACY;
 	}
 
 	return (vectors);
 }
 
 static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" };
 
 static int
 mp_ring_state_handler(SYSCTL_HANDLER_ARGS)
 {
 	int rc;
 	uint16_t *state = ((uint16_t *)oidp->oid_arg1);
 	struct sbuf *sb;
 	const char *ring_state = "UNKNOWN";
 
 	/* XXX needed ? */
 	rc = sysctl_wire_old_buffer(req, 0);
 	MPASS(rc == 0);
 	if (rc != 0)
 		return (rc);
 	sb = sbuf_new_for_sysctl(NULL, NULL, 80, req);
 	MPASS(sb != NULL);
 	if (sb == NULL)
 		return (ENOMEM);
 	if (state[3] <= 3)
 		ring_state = ring_states[state[3]];
 
 	sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s",
 		    state[0], state[1], state[2], ring_state);
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
         return(rc);
 }
 
 enum iflib_ndesc_handler {
 	IFLIB_NTXD_HANDLER,
 	IFLIB_NRXD_HANDLER,
 };
 
 static int
 mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
 {
 	if_ctx_t ctx = (void *)arg1;
 	enum iflib_ndesc_handler type = arg2;
 	char buf[256] = {0};
 	qidx_t *ndesc;
 	char *p, *next;
 	int nqs, rc, i;
 
 	nqs = 8;
 	switch(type) {
 	case IFLIB_NTXD_HANDLER:
 		ndesc = ctx->ifc_sysctl_ntxds;
 		if (ctx->ifc_sctx)
 			nqs = ctx->ifc_sctx->isc_ntxqs;
 		break;
 	case IFLIB_NRXD_HANDLER:
 		ndesc = ctx->ifc_sysctl_nrxds;
 		if (ctx->ifc_sctx)
 			nqs = ctx->ifc_sctx->isc_nrxqs;
 		break;
 	default:
 		printf("%s: unhandled type\n", __func__);
 		return (EINVAL);
 	}
 	if (nqs == 0)
 		nqs = 8;
 
 	for (i=0; i<8; i++) {
 		if (i >= nqs)
 			break;
 		if (i)
 			strcat(buf, ",");
 		sprintf(strchr(buf, 0), "%d", ndesc[i]);
 	}
 
 	rc = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (rc || req->newptr == NULL)
 		return rc;
 
 	for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p;
 	    i++, p = strsep(&next, " ,")) {
 		ndesc[i] = strtoul(p, NULL, 10);
 	}
 
 	return(rc);
 }
 
 #define NAME_BUFLEN 32
 static void
 iflib_add_device_sysctl_pre(if_ctx_t ctx)
 {
         device_t dev = iflib_get_dev(ctx);
 	struct sysctl_oid_list *child, *oid_list;
 	struct sysctl_ctx_list *ctx_list;
 	struct sysctl_oid *node;
 
 	ctx_list = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 	ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib",
 						      CTLFLAG_RD, NULL, "IFLIB fields");
 	oid_list = SYSCTL_CHILDREN(node);
 
 	SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
 		       CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version,
 		       "driver version");
 
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
 			"# of txqs to use, 0 => use default #");
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
 			"# of rxqs to use, 0 => use default #");
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
                        "permit #txq != #rxq");
 	SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
                       CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
                       "disable MSI-X (default 0)");
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
 		       "set the RX budget");
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
 		       "cause TX to abdicate instead of running to completion");
 	ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED;
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset",
 		       CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0,
 		       "offset to start using cores at");
 	SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx",
 		       CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0,
 		       "use separate cores for TX and RX");
 
 	/* XXX change for per-queue sizes */
 	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
 		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER,
                        mp_ndesc_handler, "A",
 		       "list of # of TX descriptors to use, 0 = use default #");
 	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
 		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
                        mp_ndesc_handler, "A",
 		       "list of # of RX descriptors to use, 0 = use default #");
 }
 
 static void
 iflib_add_device_sysctl_post(if_ctx_t ctx)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
         device_t dev = iflib_get_dev(ctx);
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx_list;
 	iflib_fl_t fl;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	int i, j;
 	char namebuf[NAME_BUFLEN];
 	char *qfmt;
 	struct sysctl_oid *queue_node, *fl_node, *node;
 	struct sysctl_oid_list *queue_list, *fl_list;
 	ctx_list = device_get_sysctl_ctx(dev);
 
 	node = ctx->ifc_sysctl_node;
 	child = SYSCTL_CHILDREN(node);
 
 	if (scctx->isc_ntxqsets > 100)
 		qfmt = "txq%03d";
 	else if (scctx->isc_ntxqsets > 10)
 		qfmt = "txq%02d";
 	else
 		qfmt = "txq%d";
 	for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) {
 		snprintf(namebuf, NAME_BUFLEN, qfmt, i);
 		queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
 					     CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 #if MEMORY_LOGGING
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued",
 				CTLFLAG_RD,
 				&txq->ift_dequeued, "total mbufs freed");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued",
 				CTLFLAG_RD,
 				&txq->ift_enqueued, "total mbufs enqueued");
 #endif
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag",
 				   CTLFLAG_RD,
 				   &txq->ift_mbuf_defrag, "# of times m_defrag was called");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups",
 				   CTLFLAG_RD,
 				   &txq->ift_pullups, "# of times m_pullup was called");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed",
 				   CTLFLAG_RD,
 				   &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail",
 				   CTLFLAG_RD,
 				   &txq->ift_no_desc_avail, "# of times no descriptors were available");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed",
 				   CTLFLAG_RD,
 				   &txq->ift_map_failed, "# of times DMA map failed");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig",
 				   CTLFLAG_RD,
 				   &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup",
 				   CTLFLAG_RD,
 				   &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx",
 				   CTLFLAG_RD,
 				   &txq->ift_pidx, 1, "Producer Index");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx",
 				   CTLFLAG_RD,
 				   &txq->ift_cidx, 1, "Consumer Index");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed",
 				   CTLFLAG_RD,
 				   &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use",
 				   CTLFLAG_RD,
 				   &txq->ift_in_use, 1, "descriptors in use");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed",
 				   CTLFLAG_RD,
 				   &txq->ift_processed, "descriptors procesed for clean");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned",
 				   CTLFLAG_RD,
 				   &txq->ift_cleaned, "total cleaned");
 		SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state",
 				CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state),
 				0, mp_ring_state_handler, "A", "soft ring state");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues",
 				       CTLFLAG_RD, &txq->ift_br->enqueues,
 				       "# of enqueues to the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops",
 				       CTLFLAG_RD, &txq->ift_br->drops,
 				       "# of drops in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts",
 				       CTLFLAG_RD, &txq->ift_br->starts,
 				       "# of normal consumer starts in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls",
 				       CTLFLAG_RD, &txq->ift_br->stalls,
 					       "# of consumer stalls in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts",
 			       CTLFLAG_RD, &txq->ift_br->restarts,
 				       "# of consumer restarts in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
 				       CTLFLAG_RD, &txq->ift_br->abdications,
 				       "# of consumer abdications in the mp_ring for this queue");
 	}
 
 	if (scctx->isc_nrxqsets > 100)
 		qfmt = "rxq%03d";
 	else if (scctx->isc_nrxqsets > 10)
 		qfmt = "rxq%02d";
 	else
 		qfmt = "rxq%d";
 	for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) {
 		snprintf(namebuf, NAME_BUFLEN, qfmt, i);
 		queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
 					     CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 			SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx",
 				       CTLFLAG_RD,
 				       &rxq->ifr_cq_cidx, 1, "Consumer Index");
 		}
 
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
 			fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,
 						     CTLFLAG_RD, NULL, "freelist Name");
 			fl_list = SYSCTL_CHILDREN(fl_node);
 			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx",
 				       CTLFLAG_RD,
 				       &fl->ifl_pidx, 1, "Producer Index");
 			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx",
 				       CTLFLAG_RD,
 				       &fl->ifl_cidx, 1, "Consumer Index");
 			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits",
 				       CTLFLAG_RD,
 				       &fl->ifl_credits, 1, "credits available");
 #if MEMORY_LOGGING
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued",
 					CTLFLAG_RD,
 					&fl->ifl_m_enqueued, "mbufs allocated");
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued",
 					CTLFLAG_RD,
 					&fl->ifl_m_dequeued, "mbufs freed");
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued",
 					CTLFLAG_RD,
 					&fl->ifl_cl_enqueued, "clusters allocated");
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued",
 					CTLFLAG_RD,
 					&fl->ifl_cl_dequeued, "clusters freed");
 #endif
 
 		}
 	}
 
 }
 
 void
 iflib_request_reset(if_ctx_t ctx)
 {
 
 	STATE_LOCK(ctx);
 	ctx->ifc_flags |= IFC_DO_RESET;
 	STATE_UNLOCK(ctx);
 }
 
 #ifndef __NO_STRICT_ALIGNMENT
 static struct mbuf *
 iflib_fixup_rx(struct mbuf *m)
 {
 	struct mbuf *n;
 
 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
 		m->m_data += ETHER_HDR_LEN;
 		n = m;
 	} else {
 		MGETHDR(n, M_NOWAIT, MT_DATA);
 		if (n == NULL) {
 			m_freem(m);
 			return (NULL);
 		}
 		bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
 		m->m_data += ETHER_HDR_LEN;
 		m->m_len -= ETHER_HDR_LEN;
 		n->m_len = ETHER_HDR_LEN;
 		M_MOVE_PKTHDR(n, m);
 		n->m_next = m;
 	}
 	return (n);
 }
 #endif
 
 #ifdef DEBUGNET
 static void
 iflib_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
 {
 	if_ctx_t ctx;
 
 	ctx = if_getsoftc(ifp);
 	CTX_LOCK(ctx);
 	*nrxr = NRXQSETS(ctx);
 	*ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size;
 	*clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size;
 	CTX_UNLOCK(ctx);
 }
 
 static void
 iflib_debugnet_event(if_t ifp, enum debugnet_ev event)
 {
 	if_ctx_t ctx;
 	if_softc_ctx_t scctx;
 	iflib_fl_t fl;
 	iflib_rxq_t rxq;
 	int i, j;
 
 	ctx = if_getsoftc(ifp);
 	scctx = &ctx->ifc_softc_ctx;
 
 	switch (event) {
 	case DEBUGNET_START:
 		for (i = 0; i < scctx->isc_nrxqsets; i++) {
 			rxq = &ctx->ifc_rxqs[i];
 			for (j = 0; j < rxq->ifr_nfl; j++) {
 				fl = rxq->ifr_fl;
 				fl->ifl_zone = m_getzone(fl->ifl_buf_size);
 			}
 		}
 		iflib_no_tx_batch = 1;
 		break;
 	default:
 		break;
 	}
 }
 
 static int
 iflib_debugnet_transmit(if_t ifp, struct mbuf *m)
 {
 	if_ctx_t ctx;
 	iflib_txq_t txq;
 	int error;
 
 	ctx = if_getsoftc(ifp);
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (EBUSY);
 
 	txq = &ctx->ifc_txqs[0];
 	error = iflib_encap(txq, &m);
 	if (error == 0)
 		(void)iflib_txd_db_check(ctx, txq, true, txq->ift_in_use);
 	return (error);
 }
 
 static int
 iflib_debugnet_poll(if_t ifp, int count)
 {
 	struct epoch_tracker et;
 	if_ctx_t ctx;
 	if_softc_ctx_t scctx;
 	iflib_txq_t txq;
 	int i;
 
 	ctx = if_getsoftc(ifp);
 	scctx = &ctx->ifc_softc_ctx;
 
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (EBUSY);
 
 	txq = &ctx->ifc_txqs[0];
 	(void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
 
 	NET_EPOCH_ENTER(et);
 	for (i = 0; i < scctx->isc_nrxqsets; i++)
 		(void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */);
 	NET_EPOCH_EXIT(et);
 	return (0);
 }
 #endif /* DEBUGNET */