diff --git a/sys/dev/firewire/if_fwip.c b/sys/dev/firewire/if_fwip.c
index 5237c555d999..b698db6c9620 100644
--- a/sys/dev/firewire/if_fwip.c
+++ b/sys/dev/firewire/if_fwip.c
@@ -1,937 +1,937 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2004
  *	Doug Rabson
  * Copyright (c) 2002-2003
  * 	Hidetoshi Shimokawa. All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *
  *	This product includes software developed by Hidetoshi Shimokawa.
  *
  * 4. Neither the name of the author nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
  */
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #include "opt_inet.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <machine/bus.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/firewire.h>
 #include <net/if_arp.h>
 #include <net/if_types.h>
 #include <dev/firewire/firewire.h>
 #include <dev/firewire/firewirereg.h>
 #include <dev/firewire/iec13213.h>
 #include <dev/firewire/if_fwipvar.h>
 
 /*
  * We really need a mechanism for allocating regions in the FIFO
  * address space. We pick a address in the OHCI controller's 'middle'
  * address space. This means that the controller will automatically
  * send responses for us, which is fine since we don't have any
  * important information to put in the response anyway.
  */
 #define INET_FIFO	0xfffe00000000LL
 
 #define FWIPDEBUG	if (fwipdebug) if_printf
 #define TX_MAX_QUEUE	(FWMAXQUEUE - 1)
 
 /* network interface */
 static void fwip_start (if_t);
 static int fwip_ioctl (if_t, u_long, caddr_t);
 static void fwip_init (void *);
 
 static void fwip_post_busreset (void *);
 static void fwip_output_callback (struct fw_xfer *);
 static void fwip_async_output (struct fwip_softc *, if_t);
 static void fwip_start_send (void *, int);
 static void fwip_stream_input (struct fw_xferq *);
 static void fwip_unicast_input(struct fw_xfer *);
 
 static int fwipdebug = 0;
 static int broadcast_channel = 0xc0 | 0x1f; /*  tag | channel(XXX) */
 static int tx_speed = 2;
 static int rx_queue_len = FWMAXQUEUE;
 
 static MALLOC_DEFINE(M_FWIP, "if_fwip", "IP over FireWire interface");
 SYSCTL_INT(_debug, OID_AUTO, if_fwip_debug, CTLFLAG_RW, &fwipdebug, 0, "");
 SYSCTL_DECL(_hw_firewire);
 static SYSCTL_NODE(_hw_firewire, OID_AUTO, fwip, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 	"Firewire ip subsystem");
 SYSCTL_INT(_hw_firewire_fwip, OID_AUTO, rx_queue_len, CTLFLAG_RWTUN, &rx_queue_len,
 	0, "Length of the receive queue");
 
 #ifdef DEVICE_POLLING
 static poll_handler_t fwip_poll;
 
 static int
 fwip_poll(if_t ifp, enum poll_cmd cmd, int count)
 {
 	struct fwip_softc *fwip;
 	struct firewire_comm *fc;
 
 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
 		return (0);
 
 	fwip = ((struct fwip_eth_softc *)if_getsoftc(ifp))->fwip;
 	fc = fwip->fd.fc;
 	fc->poll(fc, (cmd == POLL_AND_CHECK_STATUS)?0:1, count);
 	return (0);
 }
 #endif /* DEVICE_POLLING */
 
 static void
 fwip_identify(driver_t *driver, device_t parent)
 {
 	BUS_ADD_CHILD(parent, 0, "fwip", device_get_unit(parent));
 }
 
 static int
 fwip_probe(device_t dev)
 {
 	device_t pa;
 
 	pa = device_get_parent(dev);
 	if (device_get_unit(dev) != device_get_unit(pa)) {
 		return (ENXIO);
 	}
 
 	device_set_desc(dev, "IP over FireWire");
 	return (0);
 }
 
 static int
 fwip_attach(device_t dev)
 {
 	struct fwip_softc *fwip;
 	if_t ifp;
 	int unit, s;
 	struct fw_hwaddr *hwaddr;
 
 	fwip = ((struct fwip_softc *)device_get_softc(dev));
 	unit = device_get_unit(dev);
 	ifp = fwip->fw_softc.fwip_ifp = if_alloc(IFT_IEEE1394);
 	if (ifp == NULL)
 		return (ENOSPC);
 
 	mtx_init(&fwip->mtx, "fwip", NULL, MTX_DEF);
 	/* XXX */
 	fwip->dma_ch = -1;
 
 	fwip->fd.fc = device_get_ivars(dev);
 	if (tx_speed < 0)
 		tx_speed = fwip->fd.fc->speed;
 
 	fwip->fd.dev = dev;
 	fwip->fd.post_explore = NULL;
 	fwip->fd.post_busreset = fwip_post_busreset;
 	fwip->fw_softc.fwip = fwip;
 	TASK_INIT(&fwip->start_send, 0, fwip_start_send, fwip);
 
 	/*
 	 * Encode our hardware the way that arp likes it.
 	 */
 	hwaddr = &IFP2FWC(fwip->fw_softc.fwip_ifp)->fc_hwaddr;
 	hwaddr->sender_unique_ID_hi = htonl(fwip->fd.fc->eui.hi);
 	hwaddr->sender_unique_ID_lo = htonl(fwip->fd.fc->eui.lo);
 	hwaddr->sender_max_rec = fwip->fd.fc->maxrec;
 	hwaddr->sspd = fwip->fd.fc->speed;
 	hwaddr->sender_unicast_FIFO_hi = htons((uint16_t)(INET_FIFO >> 32));
 	hwaddr->sender_unicast_FIFO_lo = htonl((uint32_t)INET_FIFO);
 
 	/* fill the rest and attach interface */	
 	if_setsoftc(ifp, &fwip->fw_softc);
 
 	if_initname(ifp, device_get_name(dev), unit);
 	if_setinitfn(ifp, fwip_init);
 	if_setstartfn(ifp, fwip_start);
 	if_setioctlfn(ifp, fwip_ioctl);
 	if_setflags(ifp, (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST));
 	if_setsendqlen(ifp, TX_MAX_QUEUE);
 #ifdef DEVICE_POLLING
 	if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0);
 #endif
 
 	s = splimp();
 	firewire_ifattach(ifp, hwaddr);
 	splx(s);
 
 	FWIPDEBUG(ifp, "interface created\n");
 	return 0;
 }
 
 static void
 fwip_stop(struct fwip_softc *fwip)
 {
 	struct firewire_comm *fc;
 	struct fw_xferq *xferq;
 	if_t ifp = fwip->fw_softc.fwip_ifp;
 	struct fw_xfer *xfer, *next;
 	int i;
 
 	fc = fwip->fd.fc;
 
 	if (fwip->dma_ch >= 0) {
 		xferq = fc->ir[fwip->dma_ch];
 
 		if (xferq->flag & FWXFERQ_RUNNING)
 			fc->irx_disable(fc, fwip->dma_ch);
 		xferq->flag &= 
 			~(FWXFERQ_MODEMASK | FWXFERQ_OPEN | FWXFERQ_STREAM |
 			FWXFERQ_EXTBUF | FWXFERQ_HANDLER | FWXFERQ_CHTAGMASK);
 		xferq->hand =  NULL;
 
 		for (i = 0; i < xferq->bnchunk; i++)
 			m_freem(xferq->bulkxfer[i].mbuf);
 		free(xferq->bulkxfer, M_FWIP);
 
 		fw_bindremove(fc, &fwip->fwb);
 		for (xfer = STAILQ_FIRST(&fwip->fwb.xferlist); xfer != NULL;
 					xfer = next) {
 			next = STAILQ_NEXT(xfer, link);
 			fw_xfer_free(xfer);
 		}
 
 		for (xfer = STAILQ_FIRST(&fwip->xferlist); xfer != NULL;
 					xfer = next) {
 			next = STAILQ_NEXT(xfer, link);
 			fw_xfer_free(xfer);
 		}
 		STAILQ_INIT(&fwip->xferlist);
 
 		xferq->bulkxfer =  NULL;
 		fwip->dma_ch = -1;
 	}
 
 	if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
 }
 
 static int
 fwip_detach(device_t dev)
 {
 	struct fwip_softc *fwip;
 	if_t ifp;
 	int s;
 
 	fwip = (struct fwip_softc *)device_get_softc(dev);
 	ifp = fwip->fw_softc.fwip_ifp;
 
 #ifdef DEVICE_POLLING
 	if (if_getcapenable(ifp) & IFCAP_POLLING)
 		ether_poll_deregister(ifp);
 #endif
 
 	s = splimp();
 
 	fwip_stop(fwip);
 	firewire_ifdetach(ifp);
 	if_free(ifp);
 	mtx_destroy(&fwip->mtx);
 
 	splx(s);
 	return 0;
 }
 
 static void
 fwip_init(void *arg)
 {
 	struct fwip_softc *fwip = ((struct fwip_eth_softc *)arg)->fwip;
 	struct firewire_comm *fc;
 	if_t ifp = fwip->fw_softc.fwip_ifp;
 	struct fw_xferq *xferq;
 	struct fw_xfer *xfer;
 	struct mbuf *m;
 	int i;
 
 	FWIPDEBUG(ifp, "initializing\n");
 
 	fc = fwip->fd.fc;
 #define START 0
 	if (fwip->dma_ch < 0) {
 		fwip->dma_ch = fw_open_isodma(fc, /* tx */0);
 		if (fwip->dma_ch < 0)
 			return;
 		xferq = fc->ir[fwip->dma_ch];
 		xferq->flag |= FWXFERQ_EXTBUF |
 				FWXFERQ_HANDLER | FWXFERQ_STREAM;
 		xferq->flag &= ~0xff;
 		xferq->flag |= broadcast_channel & 0xff;
 		/* register fwip_input handler */
 		xferq->sc = (caddr_t) fwip;
 		xferq->hand = fwip_stream_input;
 		xferq->bnchunk = rx_queue_len;
 		xferq->bnpacket = 1;
 		xferq->psize = MCLBYTES;
 		xferq->queued = 0;
 		xferq->buf = NULL;
 		xferq->bulkxfer = (struct fw_bulkxfer *) malloc(
 			sizeof(struct fw_bulkxfer) * xferq->bnchunk,
 							M_FWIP, M_WAITOK);
 		if (xferq->bulkxfer == NULL) {
 			printf("if_fwip: malloc failed\n");
 			return;
 		}
 		STAILQ_INIT(&xferq->stvalid);
 		STAILQ_INIT(&xferq->stfree);
 		STAILQ_INIT(&xferq->stdma);
 		xferq->stproc = NULL;
 		for (i = 0; i < xferq->bnchunk; i++) {
 			m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
 			xferq->bulkxfer[i].mbuf = m;
 			m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
 			STAILQ_INSERT_TAIL(&xferq->stfree,
 					&xferq->bulkxfer[i], link);
 		}
 
 		fwip->fwb.start = INET_FIFO;
 		fwip->fwb.end = INET_FIFO + 16384; /* S3200 packet size */
 
 		/* pre-allocate xfer */
 		STAILQ_INIT(&fwip->fwb.xferlist);
 		for (i = 0; i < rx_queue_len; i++) {
 			xfer = fw_xfer_alloc(M_FWIP);
 			if (xfer == NULL)
 				break;
 			m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
 			xfer->recv.payload = mtod(m, uint32_t *);
 			xfer->recv.pay_len = MCLBYTES;
 			xfer->hand = fwip_unicast_input;
 			xfer->fc = fc;
 			xfer->sc = (caddr_t)fwip;
 			xfer->mbuf = m;
 			STAILQ_INSERT_TAIL(&fwip->fwb.xferlist, xfer, link);
 		}
 		fw_bindadd(fc, &fwip->fwb);
 
 		STAILQ_INIT(&fwip->xferlist);
 		for (i = 0; i < TX_MAX_QUEUE; i++) {
 			xfer = fw_xfer_alloc(M_FWIP);
 			if (xfer == NULL)
 				break;
 			xfer->send.spd = tx_speed;
 			xfer->fc = fwip->fd.fc;
 			xfer->sc = (caddr_t)fwip;
 			xfer->hand = fwip_output_callback;
 			STAILQ_INSERT_TAIL(&fwip->xferlist, xfer, link);
 		}
 	} else
 		xferq = fc->ir[fwip->dma_ch];
 
 	fwip->last_dest.hi = 0;
 	fwip->last_dest.lo = 0;
 
 	/* start dma */
 	if ((xferq->flag & FWXFERQ_RUNNING) == 0)
 		fc->irx_enable(fc, fwip->dma_ch);
 
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 	if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 
 #if 0
 	/* attempt to start output */
 	fwip_start(ifp);
 #endif
 }
 
 static int
 fwip_ioctl(if_t ifp, u_long cmd, caddr_t data)
 {
 	struct fwip_softc *fwip = ((struct fwip_eth_softc *)if_getsoftc(ifp))->fwip;
 	int s, error;
 
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		s = splimp();
 		if (if_getflags(ifp) & IFF_UP) {
 			if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
 				fwip_init(&fwip->fw_softc);
 		} else {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 				fwip_stop(fwip);
 		}
 		splx(s);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 	case SIOCSIFCAP:
 #ifdef DEVICE_POLLING
 	    {
 		struct ifreq *ifr = (struct ifreq *) data;
 		struct firewire_comm *fc = fwip->fd.fc;
 
 		if (ifr->ifr_reqcap & IFCAP_POLLING &&
 		    !(if_getcapenable(ifp) & IFCAP_POLLING)) {
 			error = ether_poll_register(fwip_poll, ifp);
 			if (error)
 				return (error);
 			/* Disable interrupts */
 			fc->set_intr(fc, 0);
 			if_setcapenablebit(ifp, IFCAP_POLLING, 0);
 			return (error);
 		}
 		if (!(ifr->ifr_reqcap & IFCAP_POLLING) &&
 		    if_getcapenable(ifp) & IFCAP_POLLING) {
 			error = ether_poll_deregister(ifp);
 			/* Enable interrupts. */
 			fc->set_intr(fc, 1);
 			if_setcapenablebit(ifp, 0, IFCAP_POLLING);
 			return (error);
 		}
 	    }
 #endif /* DEVICE_POLLING */
 		break;
 	default:
 		s = splimp();
 		error = firewire_ioctl(ifp, cmd, data);
 		splx(s);
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 fwip_post_busreset(void *arg)
 {
 	struct fwip_softc *fwip = arg;
 	struct crom_src *src;
 	struct crom_chunk *root;
 
 	src = fwip->fd.fc->crom_src;
 	root = fwip->fd.fc->crom_root;
 
 	/* RFC2734 IPv4 over IEEE1394 */
 	bzero(&fwip->unit4, sizeof(struct crom_chunk));
 	crom_add_chunk(src, root, &fwip->unit4, CROM_UDIR);
 	crom_add_entry(&fwip->unit4, CSRKEY_SPEC, CSRVAL_IETF);
 	crom_add_simple_text(src, &fwip->unit4, &fwip->spec4, "IANA");
 	crom_add_entry(&fwip->unit4, CSRKEY_VER, 1);
 	crom_add_simple_text(src, &fwip->unit4, &fwip->ver4, "IPv4");
 
 	/* RFC3146 IPv6 over IEEE1394 */
 	bzero(&fwip->unit6, sizeof(struct crom_chunk));
 	crom_add_chunk(src, root, &fwip->unit6, CROM_UDIR);
 	crom_add_entry(&fwip->unit6, CSRKEY_SPEC, CSRVAL_IETF);
 	crom_add_simple_text(src, &fwip->unit6, &fwip->spec6, "IANA");
 	crom_add_entry(&fwip->unit6, CSRKEY_VER, 2);
 	crom_add_simple_text(src, &fwip->unit6, &fwip->ver6, "IPv6");
 
 	fwip->last_dest.hi = 0;
 	fwip->last_dest.lo = 0;
 	firewire_busreset(fwip->fw_softc.fwip_ifp);
 }
 
 static void
 fwip_output_callback(struct fw_xfer *xfer)
 {
 	struct fwip_softc *fwip;
 	if_t ifp;
 	int s;
 
 	fwip = (struct fwip_softc *)xfer->sc;
 	ifp = fwip->fw_softc.fwip_ifp;
 	/* XXX error check */
 	FWIPDEBUG(ifp, "resp = %d\n", xfer->resp);
 	if (xfer->resp != 0)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	m_freem(xfer->mbuf);
 	fw_xfer_unload(xfer);
 
 	s = splimp();
 	FWIP_LOCK(fwip);
 	STAILQ_INSERT_TAIL(&fwip->xferlist, xfer, link);
 	FWIP_UNLOCK(fwip);
 	splx(s);
 
 	/* for queue full */
 	if (!if_sendq_empty(ifp)) {
 		fwip_start(ifp);
 	}
 }
 
 static void
 fwip_start(if_t ifp)
 {
 	struct fwip_softc *fwip = ((struct fwip_eth_softc *)if_getsoftc(ifp))->fwip;
 	int s;
 
 	FWIPDEBUG(ifp, "starting\n");
 
 	if (fwip->dma_ch < 0) {
 		struct mbuf	*m = NULL;
 
 		FWIPDEBUG(ifp, "not ready\n");
 
 		s = splimp();
 		do {
 			m = if_dequeue(ifp);
 			if (m != NULL)
 				m_freem(m);
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		} while (m != NULL);
 		splx(s);
 
 		return;
 	}
 
 	s = splimp();
 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
 
 	if (!if_sendq_empty(ifp))
 		fwip_async_output(fwip, ifp);
 
 	if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 	splx(s);
 }
 
 /* Async. stream output */
 static void
 fwip_async_output(struct fwip_softc *fwip, if_t ifp)
 {
 	struct firewire_comm *fc = fwip->fd.fc;
 	struct mbuf *m;
 	struct m_tag *mtag;
 	struct fw_hwaddr *destfw;
 	struct fw_xfer *xfer;
 	struct fw_xferq *xferq;
 	struct fw_pkt *fp;
 	uint16_t nodeid;
 	int error;
 	int i = 0;
 
 	xfer = NULL;
 	xferq = fc->atq;
 	while ((xferq->queued < xferq->maxq - 1) &&
 			!if_sendq_empty(ifp)) {
 		FWIP_LOCK(fwip);
 		xfer = STAILQ_FIRST(&fwip->xferlist);
 		if (xfer == NULL) {
 			FWIP_UNLOCK(fwip);
 #if 0
 			printf("if_fwip: lack of xfer\n");
 #endif
 			break;
 		}
 		STAILQ_REMOVE_HEAD(&fwip->xferlist, link);
 		FWIP_UNLOCK(fwip);
 
 		m = if_dequeue(ifp);
 		if (m == NULL) {
 			FWIP_LOCK(fwip);
 			STAILQ_INSERT_HEAD(&fwip->xferlist, xfer, link);
 			FWIP_UNLOCK(fwip);
 			break;
 		}
 
 		/*
 		 * Dig out the link-level address which
 		 * firewire_output got via arp or neighbour
 		 * discovery. If we don't have a link-level address,
 		 * just stick the thing on the broadcast channel.
 		 */
 		mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, 0);
 		if (mtag == NULL)
 			destfw = NULL;
 		else
 			destfw = (struct fw_hwaddr *) (mtag + 1);
 
 
 		/*
 		 * We don't do any bpf stuff here - the generic code
 		 * in firewire_output gives the packet to bpf before
 		 * it adds the link-level encapsulation.
 		 */
 
 		/*
 		 * Put the mbuf in the xfer early in case we hit an
 		 * error case below - fwip_output_callback will free
 		 * the mbuf.
 		 */
 		xfer->mbuf = m;
 
 		/*
 		 * We use the arp result (if any) to add a suitable firewire
 		 * packet header before handing off to the bus.
 		 */
 		fp = &xfer->send.hdr;
 		nodeid = FWLOCALBUS | fc->nodeid;
 		if ((m->m_flags & M_BCAST) || !destfw) {
 			/*
 			 * Broadcast packets are sent as GASP packets with
 			 * specifier ID 0x00005e, version 1 on the broadcast
 			 * channel. To be conservative, we send at the
 			 * slowest possible speed.
 			 */
 			uint32_t *p;
 
 			M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
 			p = mtod(m, uint32_t *);
 			fp->mode.stream.len = m->m_pkthdr.len;
 			fp->mode.stream.chtag = broadcast_channel;
 			fp->mode.stream.tcode = FWTCODE_STREAM;
 			fp->mode.stream.sy = 0;
 			xfer->send.spd = 0;
 			p[0] = htonl(nodeid << 16);
 			p[1] = htonl((0x5e << 24) | 1);
 		} else {
 			/*
 			 * Unicast packets are sent as block writes to the
 			 * target's unicast fifo address. If we can't
 			 * find the node address, we just give up. We
 			 * could broadcast it but that might overflow
 			 * the packet size limitations due to the
 			 * extra GASP header. Note: the hardware
 			 * address is stored in network byte order to
 			 * make life easier for ARP.
 			 */
 			struct fw_device *fd;
 			struct fw_eui64 eui;
 
 			eui.hi = ntohl(destfw->sender_unique_ID_hi);
 			eui.lo = ntohl(destfw->sender_unique_ID_lo);
 			if (fwip->last_dest.hi != eui.hi ||
 			    fwip->last_dest.lo != eui.lo) {
 				fd = fw_noderesolve_eui64(fc, &eui);
 				if (!fd) {
 					/* error */
 					if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 					/* XXX set error code */
 					fwip_output_callback(xfer);
 					continue;
 
 				}
 				fwip->last_hdr.mode.wreqb.dst = FWLOCALBUS | fd->dst;
 				fwip->last_hdr.mode.wreqb.tlrt = 0;
 				fwip->last_hdr.mode.wreqb.tcode = FWTCODE_WREQB;
 				fwip->last_hdr.mode.wreqb.pri = 0;
 				fwip->last_hdr.mode.wreqb.src = nodeid;
 				fwip->last_hdr.mode.wreqb.dest_hi =
 					ntohs(destfw->sender_unicast_FIFO_hi);
 				fwip->last_hdr.mode.wreqb.dest_lo =
 					ntohl(destfw->sender_unicast_FIFO_lo);
 				fwip->last_hdr.mode.wreqb.extcode = 0;
 				fwip->last_dest = eui;
 			}
 
 			fp->mode.wreqb = fwip->last_hdr.mode.wreqb;
 			fp->mode.wreqb.len = m->m_pkthdr.len;
 			xfer->send.spd = min(destfw->sspd, fc->speed);
 		}
 
 		xfer->send.pay_len = m->m_pkthdr.len;
 
 		error = fw_asyreq(fc, -1, xfer);
 		if (error == EAGAIN) {
 			/*
 			 * We ran out of tlabels - requeue the packet
 			 * for later transmission.
 			 */
 			xfer->mbuf = 0;
 			FWIP_LOCK(fwip);
 			STAILQ_INSERT_TAIL(&fwip->xferlist, xfer, link);
 			FWIP_UNLOCK(fwip);
 			if_sendq_prepend(ifp, m);
 			break;
 		}
 		if (error) {
 			/* error */
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			/* XXX set error code */
 			fwip_output_callback(xfer);
 			continue;
 		} else {
 			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 			i++;
 		}
 	}
 #if 0
 	if (i > 1)
 		printf("%d queued\n", i);
 #endif
 	if (i > 0)
 		xferq->start(fc);
 }
 
 static void
 fwip_start_send (void *arg, int count)
 {
 	struct fwip_softc *fwip = arg;
 
 	fwip->fd.fc->atq->start(fwip->fd.fc);
 }
 
 /* Async. stream output */
 static void
 fwip_stream_input(struct fw_xferq *xferq)
 {
 	struct epoch_tracker et;
 	struct mbuf *m, *m0;
 	struct m_tag *mtag;
 	if_t ifp;
 	struct fwip_softc *fwip;
 	struct fw_bulkxfer *sxfer;
 	struct fw_pkt *fp;
 	uint16_t src;
 	uint32_t *p;
 
 	fwip = (struct fwip_softc *)xferq->sc;
 	ifp = fwip->fw_softc.fwip_ifp;
 
 	NET_EPOCH_ENTER(et);
 	while ((sxfer = STAILQ_FIRST(&xferq->stvalid)) != NULL) {
 		STAILQ_REMOVE_HEAD(&xferq->stvalid, link);
 		fp = mtod(sxfer->mbuf, struct fw_pkt *);
 		if (fwip->fd.fc->irx_post != NULL)
 			fwip->fd.fc->irx_post(fwip->fd.fc, fp->mode.ld);
 		m = sxfer->mbuf;
 
 		/* insert new rbuf */
 		sxfer->mbuf = m0 = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m0 != NULL) {
 			m0->m_len = m0->m_pkthdr.len = m0->m_ext.ext_size;
 			STAILQ_INSERT_TAIL(&xferq->stfree, sxfer, link);
 		} else
 			printf("fwip_as_input: m_getcl failed\n");
 
 		/*
 		 * We must have a GASP header - leave the
 		 * encapsulation sanity checks to the generic
 		 * code. Remember that we also have the firewire async
 		 * stream header even though that isn't accounted for
 		 * in mode.stream.len.
 		 */
 		if (sxfer->resp != 0 || fp->mode.stream.len <
 		    2*sizeof(uint32_t)) {
 			m_freem(m);
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			continue;
 		}
 		m->m_len = m->m_pkthdr.len = fp->mode.stream.len
 			+ sizeof(fp->mode.stream);
 
 		/*
 		 * If we received the packet on the broadcast channel,
 		 * mark it as broadcast, otherwise we assume it must
 		 * be multicast.
 		 */
 		if (fp->mode.stream.chtag == broadcast_channel)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 
 		/*
 		 * Make sure we recognise the GASP specifier and
 		 * version.
 		 */
 		p = mtod(m, uint32_t *);
 		if ((((ntohl(p[1]) & 0xffff) << 8) | ntohl(p[2]) >> 24) != 0x00005e
 		    || (ntohl(p[2]) & 0xffffff) != 1) {
 			FWIPDEBUG(ifp, "Unrecognised GASP header %#08x %#08x\n",
 			    ntohl(p[1]), ntohl(p[2]));
 			m_freem(m);
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			continue;
 		}
 
 		/*
 		 * Record the sender ID for possible BPF usage.
 		 */
 		src = ntohl(p[1]) >> 16;
-		if (bpf_peers_present(if_getbpf(ifp))) {
+		if (bpf_peers_present_if(ifp)) {
 			mtag = m_tag_alloc(MTAG_FIREWIRE,
 			    MTAG_FIREWIRE_SENDER_EUID,
 			    2*sizeof(uint32_t), M_NOWAIT);
 			if (mtag) {
 				/* bpf wants it in network byte order */
 				struct fw_device *fd;
 				uint32_t *p = (uint32_t *) (mtag + 1);
 				fd = fw_noderesolve_nodeid(fwip->fd.fc,
 				    src & 0x3f);
 				if (fd) {
 					p[0] = htonl(fd->eui.hi);
 					p[1] = htonl(fd->eui.lo);
 				} else {
 					p[0] = 0;
 					p[1] = 0;
 				}
 				m_tag_prepend(m, mtag);
 			}
 		}
 
 		/*
 		 * Trim off the GASP header
 		 */
 		m_adj(m, 3*sizeof(uint32_t));
 		m->m_pkthdr.rcvif = ifp;
 		firewire_input(ifp, m, src);
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	}
 	NET_EPOCH_EXIT(et);
 	if (STAILQ_FIRST(&xferq->stfree) != NULL)
 		fwip->fd.fc->irx_enable(fwip->fd.fc, fwip->dma_ch);
 }
 
 static __inline void
 fwip_unicast_input_recycle(struct fwip_softc *fwip, struct fw_xfer *xfer)
 {
 	struct mbuf *m;
 
 	/*
 	 * We have finished with a unicast xfer. Allocate a new
 	 * cluster and stick it on the back of the input queue.
 	 */
 	m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
 	xfer->mbuf = m;
 	xfer->recv.payload = mtod(m, uint32_t *);
 	xfer->recv.pay_len = MCLBYTES;
 	xfer->mbuf = m;
 	STAILQ_INSERT_TAIL(&fwip->fwb.xferlist, xfer, link);
 }
 
 static void
 fwip_unicast_input(struct fw_xfer *xfer)
 {
 	uint64_t address;
 	struct mbuf *m;
 	struct m_tag *mtag;
 	struct epoch_tracker et;
 	if_t ifp;
 	struct fwip_softc *fwip;
 	struct fw_pkt *fp;
 	//struct fw_pkt *sfp;
 	int rtcode;
 
 	fwip = (struct fwip_softc *)xfer->sc;
 	ifp = fwip->fw_softc.fwip_ifp;
 	m = xfer->mbuf;
 	xfer->mbuf = 0;
 	fp = &xfer->recv.hdr;
 
 	/*
 	 * Check the fifo address - we only accept addresses of
 	 * exactly INET_FIFO.
 	 */
 	address = ((uint64_t)fp->mode.wreqb.dest_hi << 32)
 		| fp->mode.wreqb.dest_lo;
 	if (fp->mode.wreqb.tcode != FWTCODE_WREQB) {
 		rtcode = FWRCODE_ER_TYPE;
 	} else if (address != INET_FIFO) {
 		rtcode = FWRCODE_ER_ADDR;
 	} else {
 		rtcode = FWRCODE_COMPLETE;
 	}
 	NET_EPOCH_ENTER(et);
 
 	/*
 	 * Pick up a new mbuf and stick it on the back of the receive
 	 * queue.
 	 */
 	fwip_unicast_input_recycle(fwip, xfer);
 
 	/*
 	 * If we've already rejected the packet, give up now.
 	 */
 	if (rtcode != FWRCODE_COMPLETE) {
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto done;
 	}
 
-	if (bpf_peers_present(if_getbpf(ifp))) {
+	if (bpf_peers_present_if(ifp)) {
 		/*
 		 * Record the sender ID for possible BPF usage.
 		 */
 		mtag = m_tag_alloc(MTAG_FIREWIRE, MTAG_FIREWIRE_SENDER_EUID,
 		    2*sizeof(uint32_t), M_NOWAIT);
 		if (mtag) {
 			/* bpf wants it in network byte order */
 			struct fw_device *fd;
 			uint32_t *p = (uint32_t *) (mtag + 1);
 			fd = fw_noderesolve_nodeid(fwip->fd.fc,
 			    fp->mode.wreqb.src & 0x3f);
 			if (fd) {
 				p[0] = htonl(fd->eui.hi);
 				p[1] = htonl(fd->eui.lo);
 			} else {
 				p[0] = 0;
 				p[1] = 0;
 			}
 			m_tag_prepend(m, mtag);
 		}
 	}
 
 	/*
 	 * Hand off to the generic encapsulation code. We don't use
 	 * ifp->if_input so that we can pass the source nodeid as an 
 	 * argument to facilitate link-level fragment reassembly.
 	 */
 	m->m_len = m->m_pkthdr.len = fp->mode.wreqb.len;
 	m->m_pkthdr.rcvif = ifp;
 	firewire_input(ifp, m, fp->mode.wreqb.src);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 done:
 	NET_EPOCH_EXIT(et);
 }
 
 static device_method_t fwip_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_identify,	fwip_identify),
 	DEVMETHOD(device_probe,		fwip_probe),
 	DEVMETHOD(device_attach,	fwip_attach),
 	DEVMETHOD(device_detach,	fwip_detach),
 	{ 0, 0 }
 };
 
 static driver_t fwip_driver = {
         "fwip",
 	fwip_methods,
 	sizeof(struct fwip_softc),
 };
 
 
 DRIVER_MODULE(fwip, firewire, fwip_driver, 0, 0);
 MODULE_VERSION(fwip, 1);
 MODULE_DEPEND(fwip, firewire, 1, 1, 1);
diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c
index 7d8e1914163e..f6f885873a79 100644
--- a/sys/dev/hyperv/netvsc/if_hn.c
+++ b/sys/dev/hyperv/netvsc/if_hn.c
@@ -1,7680 +1,7680 @@
 /*-
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_hn.h"
 #include "opt_inet6.h"
 #include "opt_inet.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/counter.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rmlock.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/buf_ring.h>
 #include <sys/eventhandler.h>
 #include <sys/epoch.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 
 #include <machine/atomic.h>
 #include <machine/in_cksum.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/rndis.h>
 #ifdef RSS
 #include <net/rss_config.h>
 #endif
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/udp.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 
 #include <dev/hyperv/netvsc/ndis.h>
 #include <dev/hyperv/netvsc/if_hnreg.h>
 #include <dev/hyperv/netvsc/if_hnvar.h>
 #include <dev/hyperv/netvsc/hn_nvs.h>
 #include <dev/hyperv/netvsc/hn_rndis.h>
 
 #include "vmbus_if.h"
 
 #define HN_IFSTART_SUPPORT
 
 #define HN_RING_CNT_DEF_MAX		8
 
 #define HN_VFMAP_SIZE_DEF		8
 
 #define HN_XPNT_VF_ATTWAIT_MIN		2	/* seconds */
 
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT			512
 
 #define HN_RNDIS_PKT_LEN					\
 	(sizeof(struct rndis_packet_msg) +			\
 	 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) +	\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) +		\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +		\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
 #define HN_RNDIS_PKT_BOUNDARY		PAGE_SIZE
 #define HN_RNDIS_PKT_ALIGN		CACHE_LINE_SIZE
 
 #define HN_TX_DATA_BOUNDARY		PAGE_SIZE
 #define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
 #define HN_TX_DATA_SEGSIZE		PAGE_SIZE
 /* -1 for RNDIS packet message */
 #define HN_TX_DATA_SEGCNT_MAX		(HN_GPACNT_MAX - 1)
 
 #define HN_DIRECT_TX_SIZE_DEF		128
 
 #define HN_EARLY_TXEOF_THRESH		8
 
 #define HN_PKTBUF_LEN_DEF		(16 * 1024)
 
 #define HN_LROENT_CNT_DEF		128
 
 #define HN_LRO_LENLIM_MULTIRX_DEF	(12 * ETHERMTU)
 #define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
 #define HN_LRO_LENLIM_MIN(ifp)		(2 * if_getmtu(ifp))
 
 #define HN_LRO_ACKCNT_DEF		1
 
 #define HN_LOCK_INIT(sc)		\
 	sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
 #define HN_LOCK_DESTROY(sc)		sx_destroy(&(sc)->hn_lock)
 #define HN_LOCK_ASSERT(sc)		sx_assert(&(sc)->hn_lock, SA_XLOCKED)
 #define HN_LOCK(sc)					\
 do {							\
 	while (sx_try_xlock(&(sc)->hn_lock) == 0) {	\
 		/* Relinquish cpu to avoid deadlock */	\
 		sched_relinquish(curthread);		\
 		DELAY(1000);				\
 	}						\
 } while (0)
 #define HN_UNLOCK(sc)			sx_xunlock(&(sc)->hn_lock)
 
 #define HN_CSUM_IP_MASK			(CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
 #define HN_CSUM_IP6_MASK		(CSUM_IP6_TCP | CSUM_IP6_UDP)
 #define HN_CSUM_IP_HWASSIST(sc)		\
 	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
 #define HN_CSUM_IP6_HWASSIST(sc)	\
 	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
 
 #define HN_PKTSIZE_MIN(align)		\
 	roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
 	    HN_RNDIS_PKT_LEN, (align))
 #define HN_PKTSIZE(m, align)		\
 	roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
 
 #ifdef RSS
 #define HN_RING_IDX2CPU(sc, idx)	rss_getcpu((idx) % rss_getnumbuckets())
 #else
 #define HN_RING_IDX2CPU(sc, idx)	(((sc)->hn_cpu + (idx)) % mp_ncpus)
 #endif
 
 struct hn_txdesc {
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_ENTRY(hn_txdesc)		link;
 #endif
 	STAILQ_ENTRY(hn_txdesc)		agg_link;
 
 	/* Aggregated txdescs, in sending order. */
 	STAILQ_HEAD(, hn_txdesc)	agg_list;
 
 	/* The oldest packet, if transmission aggregation happens. */
 	struct mbuf			*m;
 	struct hn_tx_ring		*txr;
 	int				refs;
 	uint32_t			flags;	/* HN_TXD_FLAG_ */
 	struct hn_nvs_sendctx		send_ctx;
 	uint32_t			chim_index;
 	int				chim_size;
 
 	bus_dmamap_t			data_dmap;
 
 	bus_addr_t			rndis_pkt_paddr;
 	struct rndis_packet_msg		*rndis_pkt;
 	bus_dmamap_t			rndis_pkt_dmap;
 };
 
 #define HN_TXD_FLAG_ONLIST		0x0001
 #define HN_TXD_FLAG_DMAMAP		0x0002
 #define HN_TXD_FLAG_ONAGG		0x0004
 
 #define	HN_NDIS_PKTINFO_SUBALLOC	0x01
 #define	HN_NDIS_PKTINFO_1ST_FRAG	0x02
 #define	HN_NDIS_PKTINFO_LAST_FRAG	0x04
 
 struct packet_info_id {
 	uint8_t				ver;
 	uint8_t				flag;
 	uint16_t			pkt_id;
 };
 
 #define NDIS_PKTINFOID_SZ		sizeof(struct packet_info_id)
 
 
 struct hn_rxinfo {
 	const uint32_t			*vlan_info;
 	const uint32_t			*csum_info;
 	const uint32_t			*hash_info;
 	const uint32_t			*hash_value;
 	const struct packet_info_id	*pktinfo_id;
 };
 
 struct hn_rxvf_setarg {
 	struct hn_rx_ring	*rxr;
 	if_t			vf_ifp;
 };
 
 #define HN_RXINFO_VLAN			0x0001
 #define HN_RXINFO_CSUM			0x0002
 #define HN_RXINFO_HASHINF		0x0004
 #define HN_RXINFO_HASHVAL		0x0008
 #define HN_RXINFO_PKTINFO_ID		0x0010
 #define HN_RXINFO_ALL			\
 	(HN_RXINFO_VLAN |		\
 	 HN_RXINFO_CSUM |		\
 	 HN_RXINFO_HASHINF |		\
 	 HN_RXINFO_HASHVAL |		\
 	 HN_RXINFO_PKTINFO_ID)
 
 static int			hn_probe(device_t);
 static int			hn_attach(device_t);
 static int			hn_detach(device_t);
 static int			hn_shutdown(device_t);
 static void			hn_chan_callback(struct vmbus_channel *,
 				    void *);
 
 static void			hn_init(void *);
 static int			hn_ioctl(if_t, u_long, caddr_t);
 #ifdef HN_IFSTART_SUPPORT
 static void			hn_start(if_t);
 #endif
 static int			hn_transmit(if_t, struct mbuf *);
 static void			hn_xmit_qflush(if_t);
 static int			hn_ifmedia_upd(if_t);
 static void			hn_ifmedia_sts(if_t,
 				    struct ifmediareq *);
 
 static void			hn_ifnet_event(void *, if_t, int);
 static void			hn_ifaddr_event(void *, if_t);
 static void			hn_ifnet_attevent(void *, if_t);
 static void			hn_ifnet_detevent(void *, if_t);
 static void			hn_ifnet_lnkevent(void *, if_t, int);
 
 static bool			hn_ismyvf(const struct hn_softc *,
 				    const if_t);
 static void			hn_rxvf_change(struct hn_softc *,
 				    if_t, bool);
 static void			hn_rxvf_set(struct hn_softc *, if_t);
 static void			hn_rxvf_set_task(void *, int);
 static void			hn_xpnt_vf_input(if_t, struct mbuf *);
 static int			hn_xpnt_vf_iocsetflags(struct hn_softc *);
 static int			hn_xpnt_vf_iocsetcaps(struct hn_softc *,
 				    struct ifreq *);
 static void			hn_xpnt_vf_saveifflags(struct hn_softc *);
 static bool			hn_xpnt_vf_isready(struct hn_softc *);
 static void			hn_xpnt_vf_setready(struct hn_softc *);
 static void			hn_xpnt_vf_init_taskfunc(void *, int);
 static void			hn_xpnt_vf_init(struct hn_softc *);
 static void			hn_xpnt_vf_setenable(struct hn_softc *);
 static void			hn_xpnt_vf_setdisable(struct hn_softc *, bool);
 static void			hn_vf_rss_fixup(struct hn_softc *, bool);
 static void			hn_vf_rss_restore(struct hn_softc *);
 
 static int			hn_rndis_rxinfo(const void *, int,
 				    struct hn_rxinfo *);
 static void			hn_rndis_rx_data(struct hn_rx_ring *,
 				    const void *, int);
 static void			hn_rndis_rx_status(struct hn_softc *,
 				    const void *, int);
 static void			hn_rndis_init_fixat(struct hn_softc *, int);
 
 static void			hn_nvs_handle_notify(struct hn_softc *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_handle_comp(struct hn_softc *,
 				    struct vmbus_channel *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_handle_rxbuf(struct hn_rx_ring *,
 				    struct vmbus_channel *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_ack_rxbuf(struct hn_rx_ring *,
 				    struct vmbus_channel *, uint64_t);
 
 static int			hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
 #ifndef RSS
 static int			hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int			hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_vflist_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS);
 
 static void			hn_stop(struct hn_softc *, bool);
 static void			hn_init_locked(struct hn_softc *);
 static int			hn_chan_attach(struct hn_softc *,
 				    struct vmbus_channel *);
 static void			hn_chan_detach(struct hn_softc *,
 				    struct vmbus_channel *);
 static int			hn_attach_subchans(struct hn_softc *);
 static void			hn_detach_allchans(struct hn_softc *);
 static void			hn_chan_rollup(struct hn_rx_ring *,
 				    struct hn_tx_ring *);
 static void			hn_set_ring_inuse(struct hn_softc *, int);
 static int			hn_synth_attach(struct hn_softc *, int);
 static void			hn_synth_detach(struct hn_softc *);
 static int			hn_synth_alloc_subchans(struct hn_softc *,
 				    int *);
 static bool			hn_synth_attachable(const struct hn_softc *);
 static void			hn_suspend(struct hn_softc *);
 static void			hn_suspend_data(struct hn_softc *);
 static void			hn_suspend_mgmt(struct hn_softc *);
 static void			hn_resume(struct hn_softc *);
 static void			hn_resume_data(struct hn_softc *);
 static void			hn_resume_mgmt(struct hn_softc *);
 static void			hn_suspend_mgmt_taskfunc(void *, int);
 static void			hn_chan_drain(struct hn_softc *,
 				    struct vmbus_channel *);
 static void			hn_disable_rx(struct hn_softc *);
 static void			hn_drain_rxtx(struct hn_softc *, int);
 static void			hn_polling(struct hn_softc *, u_int);
 static void			hn_chan_polling(struct vmbus_channel *, u_int);
 static void			hn_mtu_change_fixup(struct hn_softc *);
 
 static void			hn_update_link_status(struct hn_softc *);
 static void			hn_change_network(struct hn_softc *);
 static void			hn_link_taskfunc(void *, int);
 static void			hn_netchg_init_taskfunc(void *, int);
 static void			hn_netchg_status_taskfunc(void *, int);
 static void			hn_link_status(struct hn_softc *);
 
 static int			hn_create_rx_data(struct hn_softc *, int);
 static void			hn_destroy_rx_data(struct hn_softc *);
 static int			hn_check_iplen(const struct mbuf *, int);
 static void			hn_rxpkt_proto(const struct mbuf *, int *, int *);
 static int			hn_set_rxfilter(struct hn_softc *, uint32_t);
 static int			hn_rxfilter_config(struct hn_softc *);
 static int			hn_rss_reconfig(struct hn_softc *);
 static void			hn_rss_ind_fixup(struct hn_softc *);
 static void			hn_rss_mbuf_hash(struct hn_softc *, uint32_t);
 static int			hn_rxpkt(struct hn_rx_ring *);
 static uint32_t			hn_rss_type_fromndis(uint32_t);
 static uint32_t			hn_rss_type_tondis(uint32_t);
 
 static int			hn_tx_ring_create(struct hn_softc *, int);
 static void			hn_tx_ring_destroy(struct hn_tx_ring *);
 static int			hn_create_tx_data(struct hn_softc *, int);
 static void			hn_fixup_tx_data(struct hn_softc *);
 static void			hn_fixup_rx_data(struct hn_softc *);
 static void			hn_destroy_tx_data(struct hn_softc *);
 static void			hn_txdesc_dmamap_destroy(struct hn_txdesc *);
 static void			hn_txdesc_gc(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_encap(if_t, struct hn_tx_ring *,
 				    struct hn_txdesc *, struct mbuf **);
 static int			hn_txpkt(if_t, struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static void			hn_set_chim_size(struct hn_softc *, int);
 static void			hn_set_tso_maxsize(struct hn_softc *, int, int);
 static bool			hn_tx_ring_pending(struct hn_tx_ring *);
 static void			hn_tx_ring_qflush(struct hn_tx_ring *);
 static void			hn_resume_tx(struct hn_softc *, int);
 static void			hn_set_txagg(struct hn_softc *);
 static void			*hn_try_txagg(if_t,
 				    struct hn_tx_ring *, struct hn_txdesc *,
 				    int);
 static int			hn_get_txswq_depth(const struct hn_tx_ring *);
 static void			hn_txpkt_done(struct hn_nvs_sendctx *,
 				    struct hn_softc *, struct vmbus_channel *,
 				    const void *, int);
 static int			hn_txpkt_sglist(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_txpkt_chim(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_xmit(struct hn_tx_ring *, int);
 static void			hn_xmit_taskfunc(void *, int);
 static void			hn_xmit_txeof(struct hn_tx_ring *);
 static void			hn_xmit_txeof_taskfunc(void *, int);
 #ifdef HN_IFSTART_SUPPORT
 static int			hn_start_locked(struct hn_tx_ring *, int);
 static void			hn_start_taskfunc(void *, int);
 static void			hn_start_txeof(struct hn_tx_ring *);
 static void			hn_start_txeof_taskfunc(void *, int);
 #endif
 
 static int			hn_rsc_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "Hyper-V network interface");
 
 /* Trust tcp segment verification on host side. */
 static int			hn_trust_hosttcp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
     &hn_trust_hosttcp, 0,
     "Trust tcp segment verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust udp datagrams verification on host side. */
 static int			hn_trust_hostudp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
     &hn_trust_hostudp, 0,
     "Trust udp datagram verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust ip packets verification on host side. */
 static int			hn_trust_hostip = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
     &hn_trust_hostip, 0,
     "Trust ip packet verification on host side, "
     "when csum info is missing (global setting)");
 
 /*
  * Offload UDP/IPv4 checksum.
  */
 static int			hn_enable_udp4cs = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN,
     &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum");
 
 /*
  * Offload UDP/IPv6 checksum.
  */
 static int			hn_enable_udp6cs = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN,
     &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum");
 
 /* Stats. */
 static counter_u64_t		hn_udpcs_fixup;
 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW,
     &hn_udpcs_fixup, "# of UDP checksum fixup");
 
 /*
  * See hn_set_hlen().
  *
  * This value is for Azure.  For Hyper-V, set this above
  * 65536 to disable UDP datagram checksum fixup.
  */
 static int			hn_udpcs_fixup_mtu = 1420;
 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN,
     &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold");
 
 /* Limit TSO burst size */
 static int			hn_tso_maxlen = IP_MAXPACKET;
 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
     &hn_tso_maxlen, 0, "TSO burst limit");
 
 /* Limit chimney send size */
 static int			hn_tx_chimney_size = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
     &hn_tx_chimney_size, 0, "Chimney send packet size limit");
 
 /* Limit the size of packet for direct transmission */
 static int			hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
     &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
 
 /* # of LRO entries per RX ring */
 #if defined(INET) || defined(INET6)
 static int			hn_lro_entry_count = HN_LROENT_CNT_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
     &hn_lro_entry_count, 0, "LRO entry count");
 #endif
 
 static int			hn_tx_taskq_cnt = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
     &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
 
 #define HN_TX_TASKQ_M_INDEP	0
 #define HN_TX_TASKQ_M_GLOBAL	1
 #define HN_TX_TASKQ_M_EVTTQ	2
 
 static int			hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
     &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
     "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
 
 #ifndef HN_USE_TXDESC_BUFRING
 static int			hn_use_txdesc_bufring = 0;
 #else
 static int			hn_use_txdesc_bufring = 1;
 #endif
 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
     &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
 
 #ifdef HN_IFSTART_SUPPORT
 /* Use ifnet.if_start instead of ifnet.if_transmit */
 static int			hn_use_if_start = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
     &hn_use_if_start, 0, "Use if_start TX method");
 #endif
 
 /* # of channels to use */
 static int			hn_chan_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
     &hn_chan_cnt, 0,
     "# of channels to use; each channel has one RX ring and one TX ring");
 
 /* # of transmit rings to use */
 static int			hn_tx_ring_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
     &hn_tx_ring_cnt, 0, "# of TX rings to use");
 
 /* Software TX ring deptch */
 static int			hn_tx_swq_depth = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
     &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
 
 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
 static u_int			hn_lro_mbufq_depth = 0;
 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
     &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
 
 /* Packet transmission aggregation size limit */
 static int			hn_tx_agg_size = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
     &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
 
 /* Packet transmission aggregation count limit */
 static int			hn_tx_agg_pkts = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
     &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
 
 /* VF list */
 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist,
     CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0,
     hn_vflist_sysctl, "A",
     "VF list");
 
 /* VF mapping */
 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap,
     CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0,
     hn_vfmap_sysctl, "A",
     "VF mapping");
 
 /* Transparent VF */
 static int			hn_xpnt_vf = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN,
     &hn_xpnt_vf, 0, "Transparent VF mod");
 
 /* Accurate BPF support for Transparent VF */
 static int			hn_xpnt_vf_accbpf = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN,
     &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF");
 
 /* Extra wait for transparent VF attach routing; unit seconds. */
 static int			hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN,
     &hn_xpnt_vf_attwait, 0,
     "Extra wait for transparent VF attach routing; unit: seconds");
 
 static u_int			hn_cpu_index;	/* next CPU for channel */
 static struct taskqueue		**hn_tx_taskque;/* shared TX taskqueues */
 
 static struct rmlock		hn_vfmap_lock;
 static int			hn_vfmap_size;
 static if_t			*hn_vfmap;
 
 #ifndef RSS
 static const uint8_t
 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
 #endif	/* !RSS */
 
 static const struct hyperv_guid	hn_guid = {
 	.hv_guid = {
 	    0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
 	    0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
 };
 
 static device_method_t hn_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		hn_probe),
 	DEVMETHOD(device_attach,	hn_attach),
 	DEVMETHOD(device_detach,	hn_detach),
 	DEVMETHOD(device_shutdown,	hn_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t hn_driver = {
 	"hn",
 	hn_methods,
 	sizeof(struct hn_softc)
 };
 
 DRIVER_MODULE(hn, vmbus, hn_driver, 0, 0);
 MODULE_VERSION(hn, 1);
 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
 
 static void
 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
 }
 
 static int
 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
 	    txd->chim_size == 0, ("invalid rndis sglist txd"));
 	return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
 	    &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
 }
 
 static int
 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	struct hn_nvs_rndis rndis;
 
 	KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
 	    txd->chim_size > 0, ("invalid rndis chim txd"));
 
 	rndis.nvs_type = HN_NVS_TYPE_RNDIS;
 	rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
 	rndis.nvs_chim_idx = txd->chim_index;
 	rndis.nvs_chim_sz = txd->chim_size;
 
 	return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
 	    &rndis, sizeof(rndis), &txd->send_ctx));
 }
 
 static __inline uint32_t
 hn_chim_alloc(struct hn_softc *sc)
 {
 	int i, bmap_cnt = sc->hn_chim_bmap_cnt;
 	u_long *bmap = sc->hn_chim_bmap;
 	uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
 
 	for (i = 0; i < bmap_cnt; ++i) {
 		int idx;
 
 		idx = ffsl(~bmap[i]);
 		if (idx == 0)
 			continue;
 
 		--idx; /* ffsl is 1-based */
 		KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
 		    ("invalid i %d and idx %d", i, idx));
 
 		if (atomic_testandset_long(&bmap[i], idx))
 			continue;
 
 		ret = i * LONG_BIT + idx;
 		break;
 	}
 	return (ret);
 }
 
 static __inline void
 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
 {
 	u_long mask;
 	uint32_t idx;
 
 	idx = chim_idx / LONG_BIT;
 	KASSERT(idx < sc->hn_chim_bmap_cnt,
 	    ("invalid chimney index 0x%x", chim_idx));
 
 	mask = 1UL << (chim_idx % LONG_BIT);
 	KASSERT(sc->hn_chim_bmap[idx] & mask,
 	    ("index bitmap 0x%lx, chimney index %u, "
 	     "bitmap idx %d, bitmask 0x%lx",
 	     sc->hn_chim_bmap[idx], chim_idx, idx, mask));
 
 	atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
 }
 
 #if defined(INET6) || defined(INET)
 
 #define PULLUP_HDR(m, len)				\
 do {							\
 	if (__predict_false((m)->m_len < (len))) {	\
 		(m) = m_pullup((m), (len));		\
 		if ((m) == NULL)			\
 			return (NULL);			\
 	}						\
 } while (0)
 
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
 static __inline struct mbuf *
 hn_tso_fixup(struct mbuf *m_head)
 {
 	struct ether_vlan_header *evl;
 	struct tcphdr *th;
 	int ehlen;
 
 	KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
 
 	PULLUP_HDR(m_head, sizeof(*evl));
 	evl = mtod(m_head, struct ether_vlan_header *);
 	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
 		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	else
 		ehlen = ETHER_HDR_LEN;
 	m_head->m_pkthdr.l2hlen = ehlen;
 
 #ifdef INET
 	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 		struct ip *ip;
 		int iphlen;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
 		ip = mtodo(m_head, ehlen);
 		iphlen = ip->ip_hl << 2;
 		m_head->m_pkthdr.l3hlen = iphlen;
 
 		PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
 		th = mtodo(m_head, ehlen + iphlen);
 
 		ip->ip_len = 0;
 		ip->ip_sum = 0;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET6
 	{
 		struct ip6_hdr *ip6;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
 		ip6 = mtodo(m_head, ehlen);
 		if (ip6->ip6_nxt != IPPROTO_TCP) {
 			m_freem(m_head);
 			return (NULL);
 		}
 		m_head->m_pkthdr.l3hlen = sizeof(*ip6);
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
 		th = mtodo(m_head, ehlen + sizeof(*ip6));
 
 		ip6->ip6_plen = 0;
 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 	}
 #endif
 	return (m_head);
 }
 
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
 static __inline struct mbuf *
 hn_set_hlen(struct mbuf *m_head)
 {
 	const struct ether_vlan_header *evl;
 	int ehlen;
 
 	PULLUP_HDR(m_head, sizeof(*evl));
 	evl = mtod(m_head, const struct ether_vlan_header *);
 	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
 		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	else
 		ehlen = ETHER_HDR_LEN;
 	m_head->m_pkthdr.l2hlen = ehlen;
 
 #ifdef INET
 	if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) {
 		const struct ip *ip;
 		int iphlen;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
 		ip = mtodo(m_head, ehlen);
 		iphlen = ip->ip_hl << 2;
 		m_head->m_pkthdr.l3hlen = iphlen;
 
 		/*
 		 * UDP checksum offload does not work in Azure, if the
 		 * following conditions meet:
 		 * - sizeof(IP hdr + UDP hdr + payload) > 1420.
 		 * - IP_DF is not set in the IP hdr.
 		 *
 		 * Fallback to software checksum for these UDP datagrams.
 		 */
 		if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) &&
 		    m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen &&
 		    (ntohs(ip->ip_off) & IP_DF) == 0) {
 			uint16_t off = ehlen + iphlen;
 
 			counter_u64_add(hn_udpcs_fixup, 1);
 			PULLUP_HDR(m_head, off + sizeof(struct udphdr));
 			*(uint16_t *)(m_head->m_data + off +
                             m_head->m_pkthdr.csum_data) = in_cksum_skip(
 			    m_head, m_head->m_pkthdr.len, off);
 			m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP;
 		}
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET6
 	{
 		const struct ip6_hdr *ip6;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
 		ip6 = mtodo(m_head, ehlen);
 		if (ip6->ip6_nxt != IPPROTO_TCP &&
 		    ip6->ip6_nxt != IPPROTO_UDP) {
 			m_freem(m_head);
 			return (NULL);
 		}
 		m_head->m_pkthdr.l3hlen = sizeof(*ip6);
 	}
 #endif
 	return (m_head);
 }
 
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
 static __inline struct mbuf *
 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn)
 {
 	const struct tcphdr *th;
 	int ehlen, iphlen;
 
 	*tcpsyn = 0;
 	ehlen = m_head->m_pkthdr.l2hlen;
 	iphlen = m_head->m_pkthdr.l3hlen;
 
 	PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
 	th = mtodo(m_head, ehlen + iphlen);
 	if (th->th_flags & TH_SYN)
 		*tcpsyn = 1;
 	return (m_head);
 }
 
 #undef PULLUP_HDR
 
 #endif	/* INET6 || INET */
 
 static int
 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
 {
 	int error = 0;
 
 	HN_LOCK_ASSERT(sc);
 
 	if (sc->hn_rx_filter != filter) {
 		error = hn_rndis_set_rxfilter(sc, filter);
 		if (!error)
 			sc->hn_rx_filter = filter;
 	}
 	return (error);
 }
 
 static int
 hn_rxfilter_config(struct hn_softc *sc)
 {
 	if_t ifp = sc->hn_ifp;
 	uint32_t filter;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * If the non-transparent mode VF is activated, we don't know how
 	 * its RX filter is configured, so stick the synthetic device in
 	 * the promiscous mode.
 	 */
 	if ((if_getflags(ifp) & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) {
 		filter = NDIS_PACKET_TYPE_PROMISCUOUS;
 	} else {
 		filter = NDIS_PACKET_TYPE_DIRECTED;
 		if (if_getflags(ifp) & IFF_BROADCAST)
 			filter |= NDIS_PACKET_TYPE_BROADCAST;
 		/* TODO: support multicast list */
 		if ((if_getflags(ifp) & IFF_ALLMULTI) ||
 		    !if_maddr_empty(ifp))
 			filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 	}
 	return (hn_set_rxfilter(sc, filter));
 }
 
 static void
 hn_set_txagg(struct hn_softc *sc)
 {
 	uint32_t size, pkts;
 	int i;
 
 	/*
 	 * Setup aggregation size.
 	 */
 	if (sc->hn_agg_size < 0)
 		size = UINT32_MAX;
 	else
 		size = sc->hn_agg_size;
 
 	if (sc->hn_rndis_agg_size < size)
 		size = sc->hn_rndis_agg_size;
 
 	/* NOTE: We only aggregate packets using chimney sending buffers. */
 	if (size > (uint32_t)sc->hn_chim_szmax)
 		size = sc->hn_chim_szmax;
 
 	if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 		goto done;
 	}
 
 	/* NOTE: Type of the per TX ring setting is 'int'. */
 	if (size > INT_MAX)
 		size = INT_MAX;
 
 	/*
 	 * Setup aggregation packet count.
 	 */
 	if (sc->hn_agg_pkts < 0)
 		pkts = UINT32_MAX;
 	else
 		pkts = sc->hn_agg_pkts;
 
 	if (sc->hn_rndis_agg_pkts < pkts)
 		pkts = sc->hn_rndis_agg_pkts;
 
 	if (pkts <= 1) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 		goto done;
 	}
 
 	/* NOTE: Type of the per TX ring setting is 'short'. */
 	if (pkts > SHRT_MAX)
 		pkts = SHRT_MAX;
 
 done:
 	/* NOTE: Type of the per TX ring setting is 'short'. */
 	if (sc->hn_rndis_agg_align > SHRT_MAX) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 	}
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
 		    size, pkts, sc->hn_rndis_agg_align);
 	}
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_agg_szmax = size;
 		txr->hn_agg_pktmax = pkts;
 		txr->hn_agg_align = sc->hn_rndis_agg_align;
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 }
 
 static int
 hn_get_txswq_depth(const struct hn_tx_ring *txr)
 {
 
 	KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
 	if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
 		return txr->hn_txdesc_cnt;
 	return hn_tx_swq_depth;
 }
 
 static int
 hn_rss_reconfig(struct hn_softc *sc)
 {
 	int error;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		return (ENXIO);
 
 	/*
 	 * Disable RSS first.
 	 *
 	 * NOTE:
 	 * Direct reconfiguration by setting the UNCHG flags does
 	 * _not_ work properly.
 	 */
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "disable RSS\n");
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
 	if (error) {
 		if_printf(sc->hn_ifp, "RSS disable failed\n");
 		return (error);
 	}
 
 	/*
 	 * Reenable the RSS w/ the updated RSS key or indirect
 	 * table.
 	 */
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "reconfig RSS\n");
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
 	if (error) {
 		if_printf(sc->hn_ifp, "RSS reconfig failed\n");
 		return (error);
 	}
 	return (0);
 }
 
 static void
 hn_rss_ind_fixup(struct hn_softc *sc)
 {
 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
 	int i, nchan;
 
 	nchan = sc->hn_rx_ring_inuse;
 	KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
 
 	/*
 	 * Check indirect table to make sure that all channels in it
 	 * can be used.
 	 */
 	for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
 		if (rss->rss_ind[i] >= nchan) {
 			if_printf(sc->hn_ifp,
 			    "RSS indirect table %d fixup: %u -> %d\n",
 			    i, rss->rss_ind[i], nchan - 1);
 			rss->rss_ind[i] = nchan - 1;
 		}
 	}
 }
 
 static int
 hn_ifmedia_upd(if_t ifp __unused)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 hn_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
 {
 	struct hn_softc *sc = if_getsoftc(ifp);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
 		ifmr->ifm_active |= IFM_NONE;
 		return;
 	}
 	ifmr->ifm_status |= IFM_ACTIVE;
 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 }
 
 static void
 hn_rxvf_set_task(void *xarg, int pending __unused)
 {
 	struct hn_rxvf_setarg *arg = xarg;
 
 	arg->rxr->hn_rxvf_ifp = arg->vf_ifp;
 }
 
 static void
 hn_rxvf_set(struct hn_softc *sc, if_t vf_ifp)
 {
 	struct hn_rx_ring *rxr;
 	struct hn_rxvf_setarg arg;
 	struct task task;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	TASK_INIT(&task, 0, hn_rxvf_set_task, &arg);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 
 		if (i < sc->hn_rx_ring_inuse) {
 			arg.rxr = rxr;
 			arg.vf_ifp = vf_ifp;
 			vmbus_chan_run_task(rxr->hn_chan, &task);
 		} else {
 			rxr->hn_rxvf_ifp = vf_ifp;
 		}
 	}
 }
 
 static bool
 hn_ismyvf(const struct hn_softc *sc, const if_t ifp)
 {
 	if_t hn_ifp;
 
 	hn_ifp = sc->hn_ifp;
 
 	if (ifp == hn_ifp)
 		return (false);
 
 	if (if_getalloctype(ifp) != IFT_ETHER)
 		return (false);
 
 	/* Ignore lagg/vlan interfaces */
 	if (strcmp(if_getdname(ifp), "lagg") == 0 ||
 	    strcmp(if_getdname(ifp), "vlan") == 0)
 		return (false);
 
 	/*
 	 * During detach events if_getifaddr(ifp) might be NULL.
 	 * Make sure the bcmp() below doesn't panic on that:
 	 */
 	if (if_getifaddr(ifp) == NULL || if_getifaddr(hn_ifp) == NULL)
 		return (false);
 
 	if (bcmp(if_getlladdr(ifp), if_getlladdr(hn_ifp), ETHER_ADDR_LEN) != 0)
 		return (false);
 
 	return (true);
 }
 
 static void
 hn_rxvf_change(struct hn_softc *sc, if_t ifp, bool rxvf)
 {
 	if_t hn_ifp;
 
 	HN_LOCK(sc);
 
 	if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
 		goto out;
 
 	if (!hn_ismyvf(sc, ifp))
 		goto out;
 	hn_ifp = sc->hn_ifp;
 
 	if (rxvf) {
 		if (sc->hn_flags & HN_FLAG_RXVF)
 			goto out;
 
 		sc->hn_flags |= HN_FLAG_RXVF;
 		hn_rxfilter_config(sc);
 	} else {
 		if (!(sc->hn_flags & HN_FLAG_RXVF))
 			goto out;
 
 		sc->hn_flags &= ~HN_FLAG_RXVF;
 		if (if_getdrvflags(hn_ifp) & IFF_DRV_RUNNING)
 			hn_rxfilter_config(sc);
 		else
 			hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
 	}
 
 	hn_nvs_set_datapath(sc,
 	    rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH);
 
 	hn_rxvf_set(sc, rxvf ? ifp : NULL);
 
 	if (rxvf) {
 		hn_vf_rss_fixup(sc, true);
 		hn_suspend_mgmt(sc);
 		sc->hn_link_flags &=
 		    ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
 		if_link_state_change(hn_ifp, LINK_STATE_DOWN);
 	} else {
 		hn_vf_rss_restore(sc);
 		hn_resume_mgmt(sc);
 	}
 
 	devctl_notify("HYPERV_NIC_VF", if_name(hn_ifp),
 	    rxvf ? "VF_UP" : "VF_DOWN", NULL);
 
 	if (bootverbose) {
 		if_printf(hn_ifp, "datapath is switched %s %s\n",
 		    rxvf ? "to" : "from", if_name(ifp));
 	}
 out:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_event(void *arg, if_t ifp, int event)
 {
 
 	if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
 		return;
 	hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP);
 }
 
 static void
 hn_ifaddr_event(void *arg, if_t ifp)
 {
 
 	hn_rxvf_change(arg, ifp, if_getflags(ifp) & IFF_UP);
 }
 
 static int
 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr __unused)
 {
 	if_t ifp, vf_ifp;
 
 	HN_LOCK_ASSERT(sc);
 	ifp = sc->hn_ifp;
 	vf_ifp = sc->hn_vf_ifp;
 
 	/*
 	 * Just sync up with VF's enabled capabilities.
 	 */
 	if_setcapenable(ifp, if_getcapenable(vf_ifp));
 	if_sethwassist(ifp, if_gethwassist(vf_ifp));
 
 	return (0);
 }
 
 static int
 hn_xpnt_vf_iocsetflags(struct hn_softc *sc)
 {
 	if_t vf_ifp;
 	struct ifreq ifr;
 
 	HN_LOCK_ASSERT(sc);
 	vf_ifp = sc->hn_vf_ifp;
 
 	memset(&ifr, 0, sizeof(ifr));
 	strlcpy(ifr.ifr_name, if_name(vf_ifp), sizeof(ifr.ifr_name));
 	ifr.ifr_flags = if_getflags(vf_ifp) & 0xffff;
 	ifr.ifr_flagshigh = if_getflags(vf_ifp) >> 16;
 	return (ifhwioctl(SIOCSIFFLAGS, vf_ifp, (caddr_t)&ifr, curthread));
 }
 
 static void
 hn_xpnt_vf_saveifflags(struct hn_softc *sc)
 {
 	if_t ifp = sc->hn_ifp;
 	int allmulti = 0;
 
 	HN_LOCK_ASSERT(sc);
 
 	/* XXX vlan(4) style mcast addr maintenance */
 	if (!if_maddr_empty(ifp))
 		allmulti = IFF_ALLMULTI;
 
 	/* Always set the VF's if_flags */
 	if_setflags(sc->hn_vf_ifp, if_getflags(ifp) | allmulti);
 }
 
 static void
 hn_xpnt_vf_input(if_t vf_ifp, struct mbuf *m)
 {
 	struct rm_priotracker pt;
 	if_t hn_ifp = NULL;
 	struct mbuf *mn;
 
 	/*
 	 * XXX racy, if hn(4) ever detached.
 	 */
 	rm_rlock(&hn_vfmap_lock, &pt);
 	if (if_getindex(vf_ifp) < hn_vfmap_size)
 		hn_ifp = hn_vfmap[if_getindex(vf_ifp)];
 	rm_runlock(&hn_vfmap_lock, &pt);
 
 	if (hn_ifp != NULL) {
 		for (mn = m; mn != NULL; mn = mn->m_nextpkt) {
 			/*
 			 * Allow tapping on the VF.
 			 */
 			ETHER_BPF_MTAP(vf_ifp, mn);
 
 			/*
 			 * Update VF stats.
 			 */
 			if ((if_getcapenable(vf_ifp) & IFCAP_HWSTATS) == 0) {
 				if_inc_counter(vf_ifp, IFCOUNTER_IBYTES,
 				    mn->m_pkthdr.len);
 			}
 			/*
 			 * XXX IFCOUNTER_IMCAST
 			 * This stat updating is kinda invasive, since it
 			 * requires two checks on the mbuf: the length check
 			 * and the ethernet header check.  As of this write,
 			 * all multicast packets go directly to hn(4), which
 			 * makes imcast stat updating in the VF a try in vian.
 			 */
 
 			/*
 			 * Fix up rcvif and increase hn(4)'s ipackets.
 			 */
 			mn->m_pkthdr.rcvif = hn_ifp;
 			if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
 		}
 		/*
 		 * Go through hn(4)'s if_input.
 		 */
 		if_input(hn_ifp, m);
 	} else {
 		/*
 		 * In the middle of the transition; free this
 		 * mbuf chain.
 		 */
 		while (m != NULL) {
 			mn = m->m_nextpkt;
 			m->m_nextpkt = NULL;
 			m_freem(m);
 			m = mn;
 		}
 	}
 }
 
 static void
 hn_mtu_change_fixup(struct hn_softc *sc)
 {
 	if_t ifp;
 
 	HN_LOCK_ASSERT(sc);
 	ifp = sc->hn_ifp;
 
 	hn_set_tso_maxsize(sc, hn_tso_maxlen, if_getmtu(ifp));
 	if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp))
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
 }
 
 static uint32_t
 hn_rss_type_fromndis(uint32_t rss_hash)
 {
 	uint32_t types = 0;
 
 	if (rss_hash & NDIS_HASH_IPV4)
 		types |= RSS_TYPE_IPV4;
 	if (rss_hash & NDIS_HASH_TCP_IPV4)
 		types |= RSS_TYPE_TCP_IPV4;
 	if (rss_hash & NDIS_HASH_IPV6)
 		types |= RSS_TYPE_IPV6;
 	if (rss_hash & NDIS_HASH_IPV6_EX)
 		types |= RSS_TYPE_IPV6_EX;
 	if (rss_hash & NDIS_HASH_TCP_IPV6)
 		types |= RSS_TYPE_TCP_IPV6;
 	if (rss_hash & NDIS_HASH_TCP_IPV6_EX)
 		types |= RSS_TYPE_TCP_IPV6_EX;
 	if (rss_hash & NDIS_HASH_UDP_IPV4_X)
 		types |= RSS_TYPE_UDP_IPV4;
 	return (types);
 }
 
 static uint32_t
 hn_rss_type_tondis(uint32_t types)
 {
 	uint32_t rss_hash = 0;
 
 	KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0,
 	    ("UDP6 and UDP6EX are not supported"));
 
 	if (types & RSS_TYPE_IPV4)
 		rss_hash |= NDIS_HASH_IPV4;
 	if (types & RSS_TYPE_TCP_IPV4)
 		rss_hash |= NDIS_HASH_TCP_IPV4;
 	if (types & RSS_TYPE_IPV6)
 		rss_hash |= NDIS_HASH_IPV6;
 	if (types & RSS_TYPE_IPV6_EX)
 		rss_hash |= NDIS_HASH_IPV6_EX;
 	if (types & RSS_TYPE_TCP_IPV6)
 		rss_hash |= NDIS_HASH_TCP_IPV6;
 	if (types & RSS_TYPE_TCP_IPV6_EX)
 		rss_hash |= NDIS_HASH_TCP_IPV6_EX;
 	if (types & RSS_TYPE_UDP_IPV4)
 		rss_hash |= NDIS_HASH_UDP_IPV4_X;
 	return (rss_hash);
 }
 
 static void
 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash;
 }
 
 static void
 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf)
 {
 	if_t ifp, vf_ifp;
 	struct ifrsshash ifrh;
 	struct ifrsskey ifrk;
 	int error;
 	uint32_t my_types, diff_types, mbuf_types = 0;
 
 	HN_LOCK_ASSERT(sc);
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("%s: synthetic parts are not attached", if_name(sc->hn_ifp)));
 
 	if (sc->hn_rx_ring_inuse == 1) {
 		/* No RSS on synthetic parts; done. */
 		return;
 	}
 	if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) {
 		/* Synthetic parts do not support Toeplitz; done. */
 		return;
 	}
 
 	ifp = sc->hn_ifp;
 	vf_ifp = sc->hn_vf_ifp;
 
 	/*
 	 * Extract VF's RSS key.  Only 40 bytes key for Toeplitz is
 	 * supported.
 	 */
 	memset(&ifrk, 0, sizeof(ifrk));
 	strlcpy(ifrk.ifrk_name, if_name(vf_ifp), sizeof(ifrk.ifrk_name));
 	error = ifhwioctl(SIOCGIFRSSKEY, vf_ifp, (caddr_t)&ifrk, curthread);
 	if (error) {
 		if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n",
 		    if_name(vf_ifp), error);
 		goto done;
 	}
 	if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) {
 		if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
 		    if_name(vf_ifp), ifrk.ifrk_func);
 		goto done;
 	}
 	if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) {
 		if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n",
 		    if_name(vf_ifp), ifrk.ifrk_keylen);
 		goto done;
 	}
 
 	/*
 	 * Extract VF's RSS hash.  Only Toeplitz is supported.
 	 */
 	memset(&ifrh, 0, sizeof(ifrh));
 	strlcpy(ifrh.ifrh_name, if_name(vf_ifp), sizeof(ifrh.ifrh_name));
 	error = ifhwioctl(SIOCGIFRSSHASH, vf_ifp, (caddr_t)&ifrh, curthread);
 	if (error) {
 		if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n",
 		    if_name(vf_ifp), error);
 		goto done;
 	}
 	if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) {
 		if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
 		    if_name(vf_ifp), ifrh.ifrh_func);
 		goto done;
 	}
 
 	my_types = hn_rss_type_fromndis(sc->hn_rss_hcap);
 	if ((ifrh.ifrh_types & my_types) == 0) {
 		/* This disables RSS; ignore it then */
 		if_printf(ifp, "%s intersection of RSS types failed.  "
 		    "VF %#x, mine %#x\n", if_name(vf_ifp),
 		    ifrh.ifrh_types, my_types);
 		goto done;
 	}
 
 	diff_types = my_types ^ ifrh.ifrh_types;
 	my_types &= ifrh.ifrh_types;
 	mbuf_types = my_types;
 
 	/*
 	 * Detect RSS hash value/type confliction.
 	 *
 	 * NOTE:
 	 * We don't disable the hash type, but stop delivery the hash
 	 * value/type through mbufs on RX path.
 	 *
 	 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple
 	 * hash is delivered with type of TCP_IPV4.  This means if
 	 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at
 	 * least to hn_mbuf_hash.  However, given that _all_ of the
 	 * NICs implement TCP_IPV4, this will _not_ impose any issues
 	 * here.
 	 */
 	if ((my_types & RSS_TYPE_IPV4) &&
 	    (diff_types & ifrh.ifrh_types &
 	     (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) {
 		/* Conflict; disable IPV4 hash type/value delivery. */
 		if_printf(ifp, "disable IPV4 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_IPV4;
 	}
 	if ((my_types & RSS_TYPE_IPV6) &&
 	    (diff_types & ifrh.ifrh_types &
 	     (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
 	      RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
 	      RSS_TYPE_IPV6_EX))) {
 		/* Conflict; disable IPV6 hash type/value delivery. */
 		if_printf(ifp, "disable IPV6 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_IPV6;
 	}
 	if ((my_types & RSS_TYPE_IPV6_EX) &&
 	    (diff_types & ifrh.ifrh_types &
 	     (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
 	      RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
 	      RSS_TYPE_IPV6))) {
 		/* Conflict; disable IPV6_EX hash type/value delivery. */
 		if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_IPV6_EX;
 	}
 	if ((my_types & RSS_TYPE_TCP_IPV6) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) {
 		/* Conflict; disable TCP_IPV6 hash type/value delivery. */
 		if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_TCP_IPV6;
 	}
 	if ((my_types & RSS_TYPE_TCP_IPV6_EX) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) {
 		/* Conflict; disable TCP_IPV6_EX hash type/value delivery. */
 		if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX;
 	}
 	if ((my_types & RSS_TYPE_UDP_IPV6) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) {
 		/* Conflict; disable UDP_IPV6 hash type/value delivery. */
 		if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_UDP_IPV6;
 	}
 	if ((my_types & RSS_TYPE_UDP_IPV6_EX) &&
 	    (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) {
 		/* Conflict; disable UDP_IPV6_EX hash type/value delivery. */
 		if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n");
 		mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX;
 	}
 
 	/*
 	 * Indirect table does not matter.
 	 */
 
 	sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) |
 	    hn_rss_type_tondis(my_types);
 	memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key));
 	sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 
 	if (reconf) {
 		error = hn_rss_reconfig(sc);
 		if (error) {
 			/* XXX roll-back? */
 			if_printf(ifp, "hn_rss_reconfig failed: %d\n", error);
 			/* XXX keep going. */
 		}
 	}
 done:
 	/* Hash deliverability for mbufs. */
 	hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types));
 }
 
 static void
 hn_vf_rss_restore(struct hn_softc *sc)
 {
 
 	HN_LOCK_ASSERT(sc);
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("%s: synthetic parts are not attached", if_name(sc->hn_ifp)));
 
 	if (sc->hn_rx_ring_inuse == 1)
 		goto done;
 
 	/*
 	 * Restore hash types.  Key does _not_ matter.
 	 */
 	if (sc->hn_rss_hash != sc->hn_rss_hcap) {
 		int error;
 
 		sc->hn_rss_hash = sc->hn_rss_hcap;
 		error = hn_rss_reconfig(sc);
 		if (error) {
 			if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n",
 			    error);
 			/* XXX keep going. */
 		}
 	}
 done:
 	/* Hash deliverability for mbufs. */
 	hn_rss_mbuf_hash(sc, NDIS_HASH_ALL);
 }
 
 static void
 hn_xpnt_vf_setready(struct hn_softc *sc)
 {
 	if_t ifp, vf_ifp;
 	struct ifreq ifr;
 
 	HN_LOCK_ASSERT(sc);
 	ifp = sc->hn_ifp;
 	vf_ifp = sc->hn_vf_ifp;
 
 	/*
 	 * Mark the VF ready.
 	 */
 	sc->hn_vf_rdytick = 0;
 
 	/*
 	 * Save information for restoration.
 	 */
 	sc->hn_saved_caps = if_getcapabilities(ifp);
 	sc->hn_saved_tsomax = if_gethwtsomax(ifp);
 	sc->hn_saved_tsosegcnt = if_gethwtsomaxsegcount(ifp);
 	sc->hn_saved_tsosegsz = if_gethwtsomaxsegsize(ifp);
 	sc->hn_saved_capenable = if_getcapenable(ifp);
 	sc->hn_saved_hwassist = if_gethwassist(ifp);
 
 	/*
 	 * Intersect supported/enabled capabilities.
 	 *
 	 * NOTE:
 	 * if_hwassist is not changed here.
 	 */
 	if_setcapabilitiesbit(ifp, 0, if_getcapabilities(vf_ifp));
 	if_setcapenablebit(ifp, 0, if_getcapabilities(ifp));
 
 	/*
 	 * Fix TSO settings.
 	 */
 	if (if_gethwtsomax(ifp) > if_gethwtsomax(vf_ifp))
 		if_sethwtsomax(ifp, if_gethwtsomax(vf_ifp));
 	if (if_gethwtsomaxsegcount(ifp) > if_gethwtsomaxsegcount(vf_ifp))
 		if_sethwtsomaxsegcount(ifp, if_gethwtsomaxsegcount(vf_ifp));
 	if (if_gethwtsomaxsegsize(ifp) > if_gethwtsomaxsegsize(vf_ifp))
 		if_sethwtsomaxsegsize(ifp, if_gethwtsomaxsegsize(vf_ifp));
 
 	/*
 	 * Change VF's enabled capabilities.
 	 */
 	memset(&ifr, 0, sizeof(ifr));
 	strlcpy(ifr.ifr_name, if_name(vf_ifp), sizeof(ifr.ifr_name));
 	ifr.ifr_reqcap = if_getcapenable(ifp);
 	hn_xpnt_vf_iocsetcaps(sc, &ifr);
 
 	if (if_getmtu(ifp) != ETHERMTU) {
 		int error;
 
 		/*
 		 * Change VF's MTU.
 		 */
 		memset(&ifr, 0, sizeof(ifr));
 		strlcpy(ifr.ifr_name, if_name(vf_ifp), sizeof(ifr.ifr_name));
 		ifr.ifr_mtu = if_getmtu(ifp);
 		error = ifhwioctl(SIOCSIFMTU, vf_ifp, (caddr_t)&ifr, curthread);
 		if (error) {
 			if_printf(ifp, "%s SIOCSIFMTU %u failed\n",
 			    if_name(vf_ifp), if_getmtu(ifp));
 			if (if_getmtu(ifp) > ETHERMTU) {
 				if_printf(ifp, "change MTU to %d\n", ETHERMTU);
 
 				/*
 				 * XXX
 				 * No need to adjust the synthetic parts' MTU;
 				 * failure of the adjustment will cause us
 				 * infinite headache.
 				 */
 				if_setmtu(ifp, ETHERMTU);
 				hn_mtu_change_fixup(sc);
 			}
 		}
 	}
 }
 
 static bool
 hn_xpnt_vf_isready(struct hn_softc *sc)
 {
 
 	HN_LOCK_ASSERT(sc);
 
 	if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL)
 		return (false);
 
 	if (sc->hn_vf_rdytick == 0)
 		return (true);
 
 	if (sc->hn_vf_rdytick > ticks)
 		return (false);
 
 	/* Mark VF as ready. */
 	hn_xpnt_vf_setready(sc);
 	return (true);
 }
 
 static void
 hn_xpnt_vf_setenable(struct hn_softc *sc)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
 	rm_wlock(&sc->hn_vf_lock);
 	sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
 	rm_wunlock(&sc->hn_vf_lock);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF;
 }
 
 static void
 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
 	rm_wlock(&sc->hn_vf_lock);
 	sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
 	if (clear_vf)
 		sc->hn_vf_ifp = NULL;
 	rm_wunlock(&sc->hn_vf_lock);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF;
 }
 
 static void
 hn_xpnt_vf_init(struct hn_softc *sc)
 {
 	int error;
 
 	HN_LOCK_ASSERT(sc);
 
 	KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
 	    ("%s: transparent VF was enabled", if_name(sc->hn_ifp)));
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "try bringing up %s\n",
 		    if_name(sc->hn_vf_ifp));
 	}
 
 	/*
 	 * Bring the VF up.
 	 */
 	hn_xpnt_vf_saveifflags(sc);
 	if_setflagbits(sc->hn_ifp, IFF_UP, 0);
 	error = hn_xpnt_vf_iocsetflags(sc);
 	if (error) {
 		if_printf(sc->hn_ifp, "bringing up %s failed: %d\n",
 		    if_name(sc->hn_vf_ifp), error);
 		return;
 	}
 
 	/*
 	 * NOTE:
 	 * Datapath setting must happen _after_ bringing the VF up.
 	 */
 	hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
 
 	/*
 	 * NOTE:
 	 * Fixup RSS related bits _after_ the VF is brought up, since
 	 * many VFs generate RSS key during it's initialization.
 	 */
 	hn_vf_rss_fixup(sc, true);
 
 	/* Mark transparent mode VF as enabled. */
 	hn_xpnt_vf_setenable(sc);
 }
 
 static void
 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		goto done;
 	if (sc->hn_vf_ifp == NULL)
 		goto done;
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 		goto done;
 
 	if (sc->hn_vf_rdytick != 0) {
 		/* Mark VF as ready. */
 		hn_xpnt_vf_setready(sc);
 	}
 
 	if (if_getdrvflags(sc->hn_ifp) & IFF_DRV_RUNNING) {
 		/*
 		 * Delayed VF initialization.
 		 */
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "delayed initialize %s\n",
 			    if_name(sc->hn_vf_ifp));
 		}
 		hn_xpnt_vf_init(sc);
 	}
 done:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_attevent(void *xsc, if_t ifp)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 
 	if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
 		goto done;
 
 	if (!hn_ismyvf(sc, ifp))
 		goto done;
 
 	if (sc->hn_vf_ifp != NULL) {
 		if_printf(sc->hn_ifp, "%s was attached as VF\n",
 		    if_name(sc->hn_vf_ifp));
 		goto done;
 	}
 
 	if (hn_xpnt_vf && if_getstartfn(ifp) != NULL) {
 		/*
 		 * ifnet.if_start is _not_ supported by transparent
 		 * mode VF; mainly due to the IFF_DRV_OACTIVE flag.
 		 */
 		if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported "
 		    "in transparent VF mode.\n", if_name(sc->hn_vf_ifp));
 
 		goto done;
 	}
 
 	rm_wlock(&hn_vfmap_lock);
 
 	if (if_getindex(ifp) >= hn_vfmap_size) {
 		if_t *newmap;
 		int newsize;
 
 		newsize = if_getindex(ifp) + HN_VFMAP_SIZE_DEF;
 		newmap = malloc(sizeof(if_t) * newsize, M_DEVBUF,
 		    M_WAITOK | M_ZERO);
 
 		memcpy(newmap, hn_vfmap,
 		    sizeof(if_t) * hn_vfmap_size);
 		free(hn_vfmap, M_DEVBUF);
 		hn_vfmap = newmap;
 		hn_vfmap_size = newsize;
 	}
 	KASSERT(hn_vfmap[if_getindex(ifp)] == NULL,
 	    ("%s: ifindex %d was mapped to %s",
 	     if_name(ifp), if_getindex(ifp), if_name(hn_vfmap[if_getindex(ifp)])));
 	hn_vfmap[if_getindex(ifp)] = sc->hn_ifp;
 
 	rm_wunlock(&hn_vfmap_lock);
 
 	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
 	rm_wlock(&sc->hn_vf_lock);
 	KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
 	    ("%s: transparent VF was enabled", if_name(sc->hn_ifp)));
 	sc->hn_vf_ifp = ifp;
 	rm_wunlock(&sc->hn_vf_lock);
 
 	if (hn_xpnt_vf) {
 		int wait_ticks;
 
 		/*
 		 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp.
 		 * Save vf_ifp's current if_input for later restoration.
 		 */
 		sc->hn_vf_input = if_getinputfn(ifp);
 		if_setinputfn(ifp, hn_xpnt_vf_input);
 
 		/*
 		 * Stop link status management; use the VF's.
 		 */
 		hn_suspend_mgmt(sc);
 
 		/*
 		 * Give VF sometime to complete its attach routing.
 		 */
 		wait_ticks = hn_xpnt_vf_attwait * hz;
 		sc->hn_vf_rdytick = ticks + wait_ticks;
 
 		taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init,
 		    wait_ticks);
 	}
 done:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_detevent(void *xsc, if_t ifp)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 
 	if (sc->hn_vf_ifp == NULL)
 		goto done;
 
 	if (!hn_ismyvf(sc, ifp))
 		goto done;
 
 	if (hn_xpnt_vf) {
 		/*
 		 * Make sure that the delayed initialization is not running.
 		 *
 		 * NOTE:
 		 * - This lock _must_ be released, since the hn_vf_init task
 		 *   will try holding this lock.
 		 * - It is safe to release this lock here, since the
 		 *   hn_ifnet_attevent() is interlocked by the hn_vf_ifp.
 		 *
 		 * XXX racy, if hn(4) ever detached.
 		 */
 		HN_UNLOCK(sc);
 		taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init);
 		HN_LOCK(sc);
 
 		KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved",
 		    if_name(sc->hn_ifp)));
 		if_setinputfn(ifp, sc->hn_vf_input);
 		sc->hn_vf_input = NULL;
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) &&
 		    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED))
 			hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
 
 		if (sc->hn_vf_rdytick == 0) {
 			/*
 			 * The VF was ready; restore some settings.
 			 */
 			if_setcapabilities(ifp, sc->hn_saved_caps);
 
 			if_sethwtsomax(ifp, sc->hn_saved_tsomax);
 			if_sethwtsomaxsegcount(sc->hn_ifp,
 			    sc->hn_saved_tsosegcnt);
 			if_sethwtsomaxsegsize(ifp, sc->hn_saved_tsosegsz);
 
 			if_setcapenable(ifp, sc->hn_saved_capenable);
 			if_sethwassist(ifp, sc->hn_saved_hwassist);
 		}
 
 		if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
 			/*
 			 * Restore RSS settings.
 			 */
 			hn_vf_rss_restore(sc);
 
 			/*
 			 * Resume link status management, which was suspended
 			 * by hn_ifnet_attevent().
 			 */
 			hn_resume_mgmt(sc);
 		}
 	}
 
 	/* Mark transparent mode VF as disabled. */
 	hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */);
 
 	rm_wlock(&hn_vfmap_lock);
 
 	KASSERT(if_getindex(ifp) < hn_vfmap_size,
 	    ("ifindex %d, vfmapsize %d", if_getindex(ifp), hn_vfmap_size));
 	if (hn_vfmap[if_getindex(ifp)] != NULL) {
 		KASSERT(hn_vfmap[if_getindex(ifp)] == sc->hn_ifp,
 		    ("%s: ifindex %d was mapped to %s",
 		     if_name(ifp), if_getindex(ifp),
 		     if_name(hn_vfmap[if_getindex(ifp)])));
 		hn_vfmap[if_getindex(ifp)] = NULL;
 	}
 
 	rm_wunlock(&hn_vfmap_lock);
 done:
 	HN_UNLOCK(sc);
 }
 
 static void
 hn_ifnet_lnkevent(void *xsc, if_t ifp, int link_state)
 {
 	struct hn_softc *sc = xsc;
 
 	if (sc->hn_vf_ifp == ifp)
 		if_link_state_change(sc->hn_ifp, link_state);
 }
 
 static int
 hn_tsomax_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int tsomax;
 	int error;
 
 	tsomax = if_gethwtsomax(sc->hn_ifp);
 	error = sysctl_handle_int(oidp, &tsomax, 0, req);
 	return error;
 }
 
 static int
 hn_tsomaxsegcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int tsomaxsegcnt;
 	int error;
 
 	tsomaxsegcnt = if_gethwtsomaxsegcount(sc->hn_ifp);
 	error = sysctl_handle_int(oidp, &tsomaxsegcnt, 0, req);
 	return error;
 }
 
 static int
 hn_tsomaxsegsz_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int tsomaxsegsz;
 	int error;
 
 	tsomaxsegsz = if_gethwtsomaxsegsize(sc->hn_ifp);
 	error = sysctl_handle_int(oidp, &tsomaxsegsz, 0, req);
 	return error;
 }
 
 static int
 hn_probe(device_t dev)
 {
 
 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) {
 		device_set_desc(dev, "Hyper-V Network Interface");
 		return BUS_PROBE_DEFAULT;
 	}
 	return ENXIO;
 }
 
 static int
 hn_attach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	uint8_t eaddr[ETHER_ADDR_LEN];
 	if_t ifp = NULL;
 	int error, ring_cnt, tx_ring_cnt;
 	uint32_t mtu;
 
 	sc->hn_dev = dev;
 	sc->hn_prichan = vmbus_get_channel(dev);
 	HN_LOCK_INIT(sc);
 	rm_init(&sc->hn_vf_lock, "hnvf");
 	if (hn_xpnt_vf && hn_xpnt_vf_accbpf)
 		sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
 
 	/*
 	 * Initialize these tunables once.
 	 */
 	sc->hn_agg_size = hn_tx_agg_size;
 	sc->hn_agg_pkts = hn_tx_agg_pkts;
 
 	/*
 	 * Setup taskqueue for transmission.
 	 */
 	if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
 		int i;
 
 		sc->hn_tx_taskqs =
 		    malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
 		    M_DEVBUF, M_WAITOK);
 		for (i = 0; i < hn_tx_taskq_cnt; ++i) {
 			sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
 			    M_WAITOK, taskqueue_thread_enqueue,
 			    &sc->hn_tx_taskqs[i]);
 			taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
 			    "%s tx%d", device_get_nameunit(dev), i);
 		}
 	} else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
 		sc->hn_tx_taskqs = hn_tx_taskque;
 	}
 
 	/*
 	 * Setup taskqueue for mangement tasks, e.g. link status.
 	 */
 	sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
 	    taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
 	taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
 	    device_get_nameunit(dev));
 	TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
 	TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
 	TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
 	    hn_netchg_status_taskfunc, sc);
 
 	if (hn_xpnt_vf) {
 		/*
 		 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up.
 		 */
 		sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK,
 		    taskqueue_thread_enqueue, &sc->hn_vf_taskq);
 		taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf",
 		    device_get_nameunit(dev));
 		TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0,
 		    hn_xpnt_vf_init_taskfunc, sc);
 	}
 
 	/*
 	 * Allocate ifnet and setup its name earlier, so that if_printf
 	 * can be used by functions, which will be called after
 	 * ether_ifattach().
 	 */
 	ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
 	if_setsoftc(ifp, sc);
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
 	/*
 	 * Initialize ifmedia earlier so that it can be unconditionally
 	 * destroyed, if error happened later on.
 	 */
 	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
 
 	/*
 	 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
 	 * to use (tx_ring_cnt).
 	 *
 	 * NOTE:
 	 * The # of RX rings to use is same as the # of channels to use.
 	 */
 	ring_cnt = hn_chan_cnt;
 	if (ring_cnt <= 0) {
 		/* Default */
 		ring_cnt = mp_ncpus;
 		if (ring_cnt > HN_RING_CNT_DEF_MAX)
 			ring_cnt = HN_RING_CNT_DEF_MAX;
 	} else if (ring_cnt > mp_ncpus) {
 		ring_cnt = mp_ncpus;
 	}
 #ifdef RSS
 	if (ring_cnt > rss_getnumbuckets())
 		ring_cnt = rss_getnumbuckets();
 #endif
 
 	tx_ring_cnt = hn_tx_ring_cnt;
 	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
 		tx_ring_cnt = ring_cnt;
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		/* ifnet.if_start only needs one TX ring. */
 		tx_ring_cnt = 1;
 	}
 #endif
 
 	/*
 	 * Set the leader CPU for channels.
 	 */
 	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
 	/*
 	 * Create enough TX/RX rings, even if only limited number of
 	 * channels can be allocated.
 	 */
 	error = hn_create_tx_data(sc, tx_ring_cnt);
 	if (error)
 		goto failed;
 	error = hn_create_rx_data(sc, ring_cnt);
 	if (error)
 		goto failed;
 
 	/*
 	 * Create transaction context for NVS and RNDIS transactions.
 	 */
 	sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
 	    HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
 	if (sc->hn_xact == NULL) {
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Install orphan handler for the revocation of this device's
 	 * primary channel.
 	 *
 	 * NOTE:
 	 * The processing order is critical here:
 	 * Install the orphan handler, _before_ testing whether this
 	 * device's primary channel has been revoked or not.
 	 */
 	vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
 	if (vmbus_chan_is_revoked(sc->hn_prichan)) {
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Attach the synthetic parts, i.e. NVS and RNDIS.
 	 */
 	error = hn_synth_attach(sc, ETHERMTU);
 	if (error)
 		goto failed;
 
 	error = hn_rndis_get_eaddr(sc, eaddr);
 	if (error)
 		goto failed;
 
 	error = hn_rndis_get_mtu(sc, &mtu);
 	if (error)
 		mtu = ETHERMTU;
 	else if (bootverbose)
 		device_printf(dev, "RNDIS mtu %u\n", mtu);
 
 	if (sc->hn_rx_ring_inuse > 1) {
 		/*
 		 * Reduce TCP segment aggregation limit for multiple
 		 * RX rings to increase ACK timeliness.
 		 */
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
 	}
 
 	/*
 	 * Fixup TX/RX stuffs after synthetic parts are attached.
 	 */
 	hn_fixup_tx_data(sc);
 	hn_fixup_rx_data(sc);
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
 	    &sc->hn_nvs_ver, 0, "NVS version");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_ndis_version_sysctl, "A", "NDIS version");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_caps_sysctl, "A", "capabilities");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_hwassist_sysctl, "A", "hwassist");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tso_max",
 	    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, hn_tsomax_sysctl,
 	    "IU", "max TSO size");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tso_maxsegcnt",
 	    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, hn_tsomaxsegcnt_sysctl,
 	    "IU", "max # of TSO segments");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tso_maxsegsz",
 	    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, hn_tsomaxsegsz_sysctl,
 	    "IU", "max size of TSO segment");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rxfilter_sysctl, "A", "rxfilter");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_hash_sysctl, "A", "RSS hash");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_hcap_sysctl, "A", "RSS hash capabilities");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
 	    CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
 #ifndef RSS
 	/*
 	 * Don't allow RSS key/indirect table changes, if RSS is defined.
 	 */
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
 	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_key_sysctl, "IU", "RSS key");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
 	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_ind_sysctl, "IU", "RSS indirect table");
 #endif
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
 	    "RNDIS offered packet transmission aggregation size limit");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
 	    "RNDIS offered packet transmission aggregation count limit");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
 	    "RNDIS packet transmission aggregation alignment");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_size_sysctl, "I",
 	    "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_pkts_sysctl, "I",
 	    "Packet transmission aggregation packets, "
 	    "0 -- disable, -1 -- auto");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_polling_sysctl, "I",
 	    "Polling frequency: [100,1000000], 0 disable polling");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_vf_sysctl, "A", "Virtual Function's name");
 	if (!hn_xpnt_vf) {
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
 		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 		    hn_rxvf_sysctl, "A", "activated Virtual Function's name");
 	} else {
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled",
 		    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 		    hn_xpnt_vf_enabled_sysctl, "I",
 		    "Transparent VF enabled");
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf",
 		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 		    hn_xpnt_vf_accbpf_sysctl, "I",
 		    "Accurate BPF for transparent VF");
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rsc_switch",
 	    CTLTYPE_UINT | CTLFLAG_RW, sc, 0, hn_rsc_sysctl, "A",
 	    "switch to rsc");
 
 	/*
 	 * Setup the ifmedia, which has been initialized earlier.
 	 */
 	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
 	/* XXX ifmedia_set really should do this for us */
 	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
 
 	/*
 	 * Setup the ifnet for this interface.
 	 */
 
 	if_setbaudrate(ifp, IF_Gbps(10));
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 	if_setioctlfn(ifp, hn_ioctl);
 	if_setinitfn(ifp, hn_init);
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
 
 		if_setstartfn(ifp, hn_start);
 		if_setsendqlen(ifp, qdepth);
 		if_setsendqready(ifp);
 	} else
 #endif
 	{
 		if_settransmitfn(ifp, hn_transmit);
 		if_setqflushfn(ifp, hn_xmit_qflush);
 	}
 
 	if_setcapabilitiesbit(ifp, IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE, 0);
 #ifdef foo
 	/* We can't diff IPv6 packets from IPv4 packets on RX path. */
 	if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
 #endif
 	if (sc->hn_caps & HN_CAP_VLAN) {
 		/* XXX not sure about VLAN_MTU. */
 		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
 	}
 
 	if_sethwassist(ifp, sc->hn_tx_ring[0].hn_csum_assist);
 	if (if_gethwassist(ifp) & HN_CSUM_IP_MASK)
 		if_setcapabilitiesbit(ifp, IFCAP_TXCSUM, 0);
 	if (if_gethwassist(ifp) & HN_CSUM_IP6_MASK)
 		if_setcapabilitiesbit(ifp, IFCAP_TXCSUM_IPV6, 0);
 	if (sc->hn_caps & HN_CAP_TSO4) {
 		if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
 		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
 	}
 	if (sc->hn_caps & HN_CAP_TSO6) {
 		if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
 		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
 	}
 
 	/* Enable all available capabilities by default. */
 	if_setcapenable(ifp, if_getcapabilities(ifp));
 
 	/*
 	 * Disable IPv6 TSO and TXCSUM by default, they still can
 	 * be enabled through SIOCSIFCAP.
 	 */
 	if_setcapenablebit(ifp, 0, (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6));
 	if_sethwassistbits(ifp, 0, (HN_CSUM_IP6_MASK | CSUM_IP6_TSO));
 
 	if (if_getcapabilities(ifp) & (IFCAP_TSO6 | IFCAP_TSO4)) {
 		/*
 		 * Lock hn_set_tso_maxsize() to simplify its
 		 * internal logic.
 		 */
 		HN_LOCK(sc);
 		hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
 		HN_UNLOCK(sc);
 		if_sethwtsomaxsegcount(ifp, HN_TX_DATA_SEGCNT_MAX);
 		if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
 	}
 
 	ether_ifattach(ifp, eaddr);
 
 	if ((if_getcapabilities(ifp) & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
 		if_printf(ifp, "TSO segcnt %u segsz %u\n",
 		    if_gethwtsomaxsegcount(ifp), if_gethwtsomaxsegsize(ifp));
 	}
 	if (mtu < ETHERMTU) {
 
 		if_setmtu(ifp, mtu);
 	}
 
 	/* Inform the upper layer about the long frame support. */
 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
 
 	/*
 	 * Kick off link status check.
 	 */
 	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
 	hn_update_link_status(sc);
 
 	if (!hn_xpnt_vf) {
 		sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
 		    hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
 		sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
 		    hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
 	} else {
 		sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event,
 		    hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY);
 	}
 
 	/*
 	 * NOTE:
 	 * Subscribe ether_ifattach event, instead of ifnet_arrival event,
 	 * since interface's LLADDR is needed; interface LLADDR is not
 	 * available when ifnet_arrival event is triggered.
 	 */
 	sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event,
 	    hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY);
 	sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event,
 	    hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY);
 
 	return (0);
 failed:
 	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
 		hn_synth_detach(sc);
 	hn_detach(dev);
 	return (error);
 }
 
 static int
 hn_detach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 	if_t ifp = sc->hn_ifp, vf_ifp;
 
 	if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
 		/*
 		 * In case that the vmbus missed the orphan handler
 		 * installation.
 		 */
 		vmbus_xact_ctx_orphan(sc->hn_xact);
 	}
 
 	if (sc->hn_ifaddr_evthand != NULL)
 		EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
 	if (sc->hn_ifnet_evthand != NULL)
 		EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
 	if (sc->hn_ifnet_atthand != NULL) {
 		EVENTHANDLER_DEREGISTER(ether_ifattach_event,
 		    sc->hn_ifnet_atthand);
 	}
 	if (sc->hn_ifnet_dethand != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    sc->hn_ifnet_dethand);
 	}
 	if (sc->hn_ifnet_lnkhand != NULL)
 		EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand);
 
 	vf_ifp = sc->hn_vf_ifp;
 	__compiler_membar();
 	if (vf_ifp != NULL)
 		hn_ifnet_detevent(sc, vf_ifp);
 
 	if (device_is_attached(dev)) {
 		HN_LOCK(sc);
 		if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 				hn_stop(sc, true);
 			/*
 			 * NOTE:
 			 * hn_stop() only suspends data, so managment
 			 * stuffs have to be suspended manually here.
 			 */
 			hn_suspend_mgmt(sc);
 			hn_synth_detach(sc);
 		}
 		HN_UNLOCK(sc);
 		ether_ifdetach(ifp);
 	}
 
 	ifmedia_removeall(&sc->hn_media);
 	hn_destroy_rx_data(sc);
 	hn_destroy_tx_data(sc);
 
 	if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
 		int i;
 
 		for (i = 0; i < hn_tx_taskq_cnt; ++i)
 			taskqueue_free(sc->hn_tx_taskqs[i]);
 		free(sc->hn_tx_taskqs, M_DEVBUF);
 	}
 	taskqueue_free(sc->hn_mgmt_taskq0);
 	if (sc->hn_vf_taskq != NULL)
 		taskqueue_free(sc->hn_vf_taskq);
 
 	if (sc->hn_xact != NULL) {
 		/*
 		 * Uninstall the orphan handler _before_ the xact is
 		 * destructed.
 		 */
 		vmbus_chan_unset_orphan(sc->hn_prichan);
 		vmbus_xact_ctx_destroy(sc->hn_xact);
 	}
 
 	if_free(ifp);
 
 	HN_LOCK_DESTROY(sc);
 	rm_destroy(&sc->hn_vf_lock);
 	return (0);
 }
 
 static int
 hn_shutdown(device_t dev)
 {
 
 	return (0);
 }
 
 static void
 hn_link_status(struct hn_softc *sc)
 {
 	uint32_t link_status;
 	int error;
 
 	error = hn_rndis_get_linkstatus(sc, &link_status);
 	if (error) {
 		/* XXX what to do? */
 		return;
 	}
 
 	if (link_status == NDIS_MEDIA_STATE_CONNECTED)
 		sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
 	else
 		sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
 	if_link_state_change(sc->hn_ifp,
 	    (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
 	    LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static void
 hn_link_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
 		return;
 	hn_link_status(sc);
 }
 
 static void
 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	/* Prevent any link status checks from running. */
 	sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
 
 	/*
 	 * Fake up a [link down --> link up] state change; 5 seconds
 	 * delay is used, which closely simulates miibus reaction
 	 * upon link down event.
 	 */
 	sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
 	if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
 	taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
 	    &sc->hn_netchg_status, 5 * hz);
 }
 
 static void
 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	/* Re-allow link status checks. */
 	sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
 	hn_link_status(sc);
 }
 
 static void
 hn_update_link_status(struct hn_softc *sc)
 {
 
 	if (sc->hn_mgmt_taskq != NULL)
 		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
 }
 
 static void
 hn_change_network(struct hn_softc *sc)
 {
 
 	if (sc->hn_mgmt_taskq != NULL)
 		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
 }
 
 static __inline int
 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
 
 	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
 
 		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
 		if (m_new == NULL)
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
 		txr->hn_tx_collapsed++;
 
 		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
 	return error;
 }
 
 static __inline int
 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
 	    ("put an onlist txd %#x", txd->flags));
 	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("put an onagg txd %#x", txd->flags));
 
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
 	if (!STAILQ_EMPTY(&txd->agg_list)) {
 		struct hn_txdesc *tmp_txd;
 
 		while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
 			int freed __diagused;
 
 			KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
 			    ("resursive aggregation on aggregated txdesc"));
 			KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
 			    ("not aggregated txdesc"));
 			KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
 			    ("aggregated txdesc uses dmamap"));
 			KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
 			    ("aggregated txdesc consumes "
 			     "chimney sending buffer"));
 			KASSERT(tmp_txd->chim_size == 0,
 			    ("aggregated txdesc has non-zero "
 			     "chimney sending size"));
 
 			STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
 			tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
 			freed = hn_txdesc_put(txr, tmp_txd);
 			KASSERT(freed, ("failed to free aggregated txdesc"));
 		}
 	}
 
 	if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
 		KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
 		    ("chim txd uses dmamap"));
 		hn_chim_free(txr->hn_sc, txd->chim_index);
 		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 		txd->chim_size = 0;
 	} else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
 	}
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	KASSERT(txr->hn_txdesc_avail >= 0 &&
 	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
 	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
 	txr->hn_txdesc_avail++;
 	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else	/* HN_USE_TXDESC_BUFRING */
 #ifdef HN_DEBUG
 	atomic_add_int(&txr->hn_txdesc_avail, 1);
 #endif
 	buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif	/* !HN_USE_TXDESC_BUFRING */
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
 hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
 		KASSERT(txr->hn_txdesc_avail > 0,
 		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
 		txr->hn_txdesc_avail--;
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
 #endif
 
 	if (txd != NULL) {
 #ifdef HN_USE_TXDESC_BUFRING
 #ifdef HN_DEBUG
 		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
 #endif
 #endif	/* HN_USE_TXDESC_BUFRING */
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
 		    STAILQ_EMPTY(&txd->agg_list) &&
 		    txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
 		    txd->chim_size == 0 &&
 		    (txd->flags & HN_TXD_FLAG_ONLIST) &&
 		    (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
 		    (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
 		txd->flags &= ~HN_TXD_FLAG_ONLIST;
 		txd->refs = 1;
 	}
 	return txd;
 }
 
 static __inline void
 hn_txdesc_hold(struct hn_txdesc *txd)
 {
 
 	/* 0->1 transition will never work */
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	atomic_add_int(&txd->refs, 1);
 }
 
 static __inline void
 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
 {
 
 	KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("recursive aggregation on aggregating txdesc"));
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("already aggregated"));
 	KASSERT(STAILQ_EMPTY(&txd->agg_list),
 	    ("recursive aggregation on to-be-aggregated txdesc"));
 
 	txd->flags |= HN_TXD_FLAG_ONAGG;
 	STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
 }
 
 static bool
 hn_tx_ring_pending(struct hn_tx_ring *txr)
 {
 	bool pending = false;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
 		pending = true;
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	if (!buf_ring_full(txr->hn_txdesc_br))
 		pending = true;
 #endif
 	return (pending);
 }
 
 static __inline void
 hn_txeof(struct hn_tx_ring *txr)
 {
 	txr->hn_has_txeof = 0;
 	txr->hn_txeof(txr);
 }
 
 static void
 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
     struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
 {
 	struct hn_txdesc *txd = sndc->hn_cbarg;
 	struct hn_tx_ring *txr;
 
 	txr = txd->txr;
 	KASSERT(txr->hn_chan == chan,
 	    ("channel mismatch, on chan%u, should be chan%u",
 	     vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
 
 	txr->hn_has_txeof = 1;
 	hn_txdesc_put(txr, txd);
 
 	++txr->hn_txdone_cnt;
 	if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
 		txr->hn_txdone_cnt = 0;
 		if (txr->hn_oactive)
 			hn_txeof(txr);
 	}
 }
 
 static void
 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
 {
 #if defined(INET) || defined(INET6)
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 	tcp_lro_flush_all(&rxr->hn_lro);
 	NET_EPOCH_EXIT(et);
 #endif
 
 	/*
 	 * NOTE:
 	 * 'txr' could be NULL, if multiple channels and
 	 * ifnet.if_start method are enabled.
 	 */
 	if (txr == NULL || !txr->hn_has_txeof)
 		return;
 
 	txr->hn_txdone_cnt = 0;
 	hn_txeof(txr);
 }
 
 static __inline uint32_t
 hn_rndis_pktmsg_offset(uint32_t ofs)
 {
 
 	KASSERT(ofs >= sizeof(struct rndis_packet_msg),
 	    ("invalid RNDIS packet msg offset %u", ofs));
 	return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
 }
 
 static __inline void *
 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
     size_t pi_dlen, uint32_t pi_type)
 {
 	const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
 	struct rndis_pktinfo *pi;
 
 	KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
 	    ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
 
 	/*
 	 * Per-packet-info does not move; it only grows.
 	 *
 	 * NOTE:
 	 * rm_pktinfooffset in this phase counts from the beginning
 	 * of rndis_packet_msg.
 	 */
 	KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
 	    ("%u pktinfo overflows RNDIS packet msg", pi_type));
 	pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
 	    pkt->rm_pktinfolen);
 	pkt->rm_pktinfolen += pi_size;
 
 	pi->rm_size = pi_size;
 	pi->rm_type = pi_type;
 	pi->rm_internal = 0;
 	pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
 
 	return (pi->rm_data);
 }
 
 static __inline int
 hn_flush_txagg(if_t ifp, struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 	struct mbuf *m;
 	int error, pkts;
 
 	txd = txr->hn_agg_txd;
 	KASSERT(txd != NULL, ("no aggregate txdesc"));
 
 	/*
 	 * Since hn_txpkt() will reset this temporary stat, save
 	 * it now, so that oerrors can be updated properly, if
 	 * hn_txpkt() ever fails.
 	 */
 	pkts = txr->hn_stat_pkts;
 
 	/*
 	 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
 	 * failure, save it for later freeing, if hn_txpkt() ever
 	 * fails.
 	 */
 	m = txd->m;
 	error = hn_txpkt(ifp, txr, txd);
 	if (__predict_false(error)) {
 		/* txd is freed, but m is not. */
 		m_freem(m);
 
 		txr->hn_flush_failed++;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
 	}
 
 	/* Reset all aggregation states. */
 	txr->hn_agg_txd = NULL;
 	txr->hn_agg_szleft = 0;
 	txr->hn_agg_pktleft = 0;
 	txr->hn_agg_prevpkt = NULL;
 
 	return (error);
 }
 
 static void *
 hn_try_txagg(if_t ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
     int pktsize)
 {
 	void *chim;
 
 	if (txr->hn_agg_txd != NULL) {
 		if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
 			struct hn_txdesc *agg_txd = txr->hn_agg_txd;
 			struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
 			int olen;
 
 			/*
 			 * Update the previous RNDIS packet's total length,
 			 * it can be increased due to the mandatory alignment
 			 * padding for this RNDIS packet.  And update the
 			 * aggregating txdesc's chimney sending buffer size
 			 * accordingly.
 			 *
 			 * XXX
 			 * Zero-out the padding, as required by the RNDIS spec.
 			 */
 			olen = pkt->rm_len;
 			pkt->rm_len = roundup2(olen, txr->hn_agg_align);
 			agg_txd->chim_size += pkt->rm_len - olen;
 
 			/* Link this txdesc to the parent. */
 			hn_txdesc_agg(agg_txd, txd);
 
 			chim = (uint8_t *)pkt + pkt->rm_len;
 			/* Save the current packet for later fixup. */
 			txr->hn_agg_prevpkt = chim;
 
 			txr->hn_agg_pktleft--;
 			txr->hn_agg_szleft -= pktsize;
 			if (txr->hn_agg_szleft <=
 			    HN_PKTSIZE_MIN(txr->hn_agg_align)) {
 				/*
 				 * Probably can't aggregate more packets,
 				 * flush this aggregating txdesc proactively.
 				 */
 				txr->hn_agg_pktleft = 0;
 			}
 			/* Done! */
 			return (chim);
 		}
 		hn_flush_txagg(ifp, txr);
 	}
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	txr->hn_tx_chimney_tried++;
 	txd->chim_index = hn_chim_alloc(txr->hn_sc);
 	if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
 		return (NULL);
 	txr->hn_tx_chimney++;
 
 	chim = txr->hn_sc->hn_chim +
 	    (txd->chim_index * txr->hn_sc->hn_chim_szmax);
 
 	if (txr->hn_agg_pktmax > 1 &&
 	    txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
 		txr->hn_agg_txd = txd;
 		txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
 		txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
 		txr->hn_agg_prevpkt = chim;
 	}
 	return (chim);
 }
 
 /*
  * NOTE:
  * If this function fails, then both txd and m_head0 will be freed.
  */
 static int
 hn_encap(if_t ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head0)
 {
 	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
 	int error, nsegs, i;
 	struct mbuf *m_head = *m_head0;
 	struct rndis_packet_msg *pkt;
 	uint32_t *pi_data;
 	void *chim = NULL;
 	int pkt_hlen, pkt_size;
 
 	pkt = txd->rndis_pkt;
 	pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
 	if (pkt_size < txr->hn_chim_size) {
 		chim = hn_try_txagg(ifp, txr, txd, pkt_size);
 		if (chim != NULL)
 			pkt = chim;
 	} else {
 		if (txr->hn_agg_txd != NULL)
 			hn_flush_txagg(ifp, txr);
 	}
 
 	pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
 	pkt->rm_len = m_head->m_pkthdr.len;
 	pkt->rm_dataoffset = 0;
 	pkt->rm_datalen = m_head->m_pkthdr.len;
 	pkt->rm_oobdataoffset = 0;
 	pkt->rm_oobdatalen = 0;
 	pkt->rm_oobdataelements = 0;
 	pkt->rm_pktinfooffset = sizeof(*pkt);
 	pkt->rm_pktinfolen = 0;
 	pkt->rm_vchandle = 0;
 	pkt->rm_reserved = 0;
 
 	if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
 		/*
 		 * Set the hash value for this packet.
 		 */
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
 
 		if (M_HASHTYPE_ISHASH(m_head))
 			/*
 			 * The flowid field contains the hash value host
 			 * set in the rx queue if it is a ip forwarding pkt.
 			 * Set the same hash value so host can send on the
 			 * cpu it was received.
 			 */
 			*pi_data = m_head->m_pkthdr.flowid;
 		else
 			/*
 			 * Otherwise just put the tx queue index.
 			 */
 			*pi_data = txr->hn_tx_idx;
 	}
 
 	if (m_head->m_flags & M_VLANTAG) {
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
 		*pi_data = NDIS_VLAN_INFO_MAKE(
 		    EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
 		    EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
 		    EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
 	}
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 #if defined(INET6) || defined(INET)
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
 #ifdef INET
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 			*pi_data = NDIS_LSO2_INFO_MAKEIPV4(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
 			    m_head->m_pkthdr.tso_segsz);
 		}
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET6
 		{
 			*pi_data = NDIS_LSO2_INFO_MAKEIPV6(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
 			    m_head->m_pkthdr.tso_segsz);
 		}
 #endif
 #endif	/* INET6 || INET */
 	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
 		if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
 			*pi_data = NDIS_TXCSUM_INFO_IPV6;
 		} else {
 			*pi_data = NDIS_TXCSUM_INFO_IPV4;
 			if (m_head->m_pkthdr.csum_flags & CSUM_IP)
 				*pi_data |= NDIS_TXCSUM_INFO_IPCS;
 		}
 
 		if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP_TCP | CSUM_IP6_TCP)) {
 			*pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
 		} else if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP_UDP | CSUM_IP6_UDP)) {
 			*pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(
 			    m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
 		}
 	}
 
 	pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
 	/* Fixup RNDIS packet message total length */
 	pkt->rm_len += pkt_hlen;
 	/* Convert RNDIS packet message offsets */
 	pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
 	pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
 
 	/*
 	 * Fast path: Chimney sending.
 	 */
 	if (chim != NULL) {
 		struct hn_txdesc *tgt_txd = txd;
 
 		if (txr->hn_agg_txd != NULL) {
 			tgt_txd = txr->hn_agg_txd;
 #ifdef INVARIANTS
 			*m_head0 = NULL;
 #endif
 		}
 
 		KASSERT(pkt == chim,
 		    ("RNDIS pkt not in chimney sending buffer"));
 		KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
 		    ("chimney sending buffer is not used"));
 		tgt_txd->chim_size += pkt->rm_len;
 
 		m_copydata(m_head, 0, m_head->m_pkthdr.len,
 		    ((uint8_t *)chim) + pkt_hlen);
 
 		txr->hn_gpa_cnt = 0;
 		txr->hn_sendpkt = hn_txpkt_chim;
 		goto done;
 	}
 
 	KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
 	    ("chimney buffer is used"));
 	KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
 
 	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
 	if (__predict_false(error)) {
 		int freed __diagused;
 
 		/*
 		 * This mbuf is not linked w/ the txd yet, so free it now.
 		 */
 		m_freem(m_head);
 		*m_head0 = NULL;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon txdma error"));
 
 		txr->hn_txdma_failed++;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return error;
 	}
 	*m_head0 = m_head;
 
 	/* +1 RNDIS packet message */
 	txr->hn_gpa_cnt = nsegs + 1;
 
 	/* send packet with page buffer */
 	txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
 	txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
 	txr->hn_gpa[0].gpa_len = pkt_hlen;
 
 	/*
 	 * Fill the page buffers with mbuf info after the page
 	 * buffer for RNDIS packet message.
 	 */
 	for (i = 0; i < nsegs; ++i) {
 		struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
 
 		gpa->gpa_page = atop(segs[i].ds_addr);
 		gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
 		gpa->gpa_len = segs[i].ds_len;
 	}
 
 	txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 	txd->chim_size = 0;
 	txr->hn_sendpkt = hn_txpkt_sglist;
 done:
 	txd->m = m_head;
 
 	/* Set the completion routine */
 	hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
 
 	/* Update temporary stats for later use. */
 	txr->hn_stat_pkts++;
 	txr->hn_stat_size += m_head->m_pkthdr.len;
 	if (m_head->m_flags & M_MCAST)
 		txr->hn_stat_mcasts++;
 
 	return 0;
 }
 
 /*
  * NOTE:
  * If this function fails, then txd will be freed, but the mbuf
  * associated w/ the txd will _not_ be freed.
  */
 static int
 hn_txpkt(if_t ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	int error, send_failed = 0, has_bpf;
 
 again:
-	has_bpf = bpf_peers_present(if_getbpf(ifp));
+	has_bpf = bpf_peers_present_if(ifp);
 	if (has_bpf) {
 		/*
 		 * Make sure that this txd and any aggregated txds are not
 		 * freed before ETHER_BPF_MTAP.
 		 */
 		hn_txdesc_hold(txd);
 	}
 	error = txr->hn_sendpkt(txr, txd);
 	if (!error) {
 		if (has_bpf) {
 			const struct hn_txdesc *tmp_txd;
 
 			ETHER_BPF_MTAP(ifp, txd->m);
 			STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
 				ETHER_BPF_MTAP(ifp, tmp_txd->m);
 		}
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
 #ifdef HN_IFSTART_SUPPORT
 		if (!hn_use_if_start)
 #endif
 		{
 			if_inc_counter(ifp, IFCOUNTER_OBYTES,
 			    txr->hn_stat_size);
 			if (txr->hn_stat_mcasts != 0) {
 				if_inc_counter(ifp, IFCOUNTER_OMCASTS,
 				    txr->hn_stat_mcasts);
 			}
 		}
 		txr->hn_pkts += txr->hn_stat_pkts;
 		txr->hn_sends++;
 	}
 	if (has_bpf)
 		hn_txdesc_put(txr, txd);
 
 	if (__predict_false(error)) {
 		int freed __diagused;
 
 		/*
 		 * This should "really rarely" happen.
 		 *
 		 * XXX Too many RX to be acked or too many sideband
 		 * commands to run?  Ask netvsc_channel_rollup()
 		 * to kick start later.
 		 */
 		txr->hn_has_txeof = 1;
 		if (!send_failed) {
 			txr->hn_send_failed++;
 			send_failed = 1;
 			/*
 			 * Try sending again after set hn_has_txeof;
 			 * in case that we missed the last
 			 * netvsc_channel_rollup().
 			 */
 			goto again;
 		}
 		if_printf(ifp, "send failed\n");
 
 		/*
 		 * Caller will perform further processing on the
 		 * associated mbuf, so don't free it in hn_txdesc_put();
 		 * only unload it from the DMA map in hn_txdesc_put(),
 		 * if it was loaded.
 		 */
 		txd->m = NULL;
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon send error"));
 
 		txr->hn_send_failed++;
 	}
 
 	/* Reset temporary stats, after this sending is done. */
 	txr->hn_stat_size = 0;
 	txr->hn_stat_pkts = 0;
 	txr->hn_stat_mcasts = 0;
 
 	return (error);
 }
 
 /*
  * Append the specified data to the indicated mbuf chain,
  * Extend the mbuf chain if the new data does not fit in
  * existing space.
  *
  * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
  * There should be an equivalent in the kernel mbuf code,
  * but there does not appear to be one yet.
  *
  * Differs from m_append() in that additional mbufs are
  * allocated with cluster size MJUMPAGESIZE, and filled
  * accordingly.
  *
  * Return the last mbuf in the chain or NULL if failed to
  * allocate new mbuf.
  */
 static struct mbuf *
 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
 	int remainder, space;
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	remainder = len;
 	space = M_TRAILINGSPACE(m);
 	if (space > 0) {
 		/*
 		 * Copy into available space.
 		 */
 		if (space > remainder)
 			space = remainder;
 		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
 		m->m_len += space;
 		cp += space;
 		remainder -= space;
 	}
 	while (remainder > 0) {
 		/*
 		 * Allocate a new mbuf; could check space
 		 * and allocate a cluster instead.
 		 */
 		n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
 		if (n == NULL)
 			return NULL;
 		n->m_len = min(MJUMPAGESIZE, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len;
 		remainder -= n->m_len;
 		m->m_next = n;
 		m = n;
 	}
 
 	return m;
 }
 
 #if defined(INET) || defined(INET6)
 static __inline int
 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
 {
 	if (hn_lro_mbufq_depth) {
 		tcp_lro_queue_mbuf(lc, m);
 		return 0;
 	}
 	return tcp_lro_rx(lc, m, 0);
 }
 #endif
 
 static int
 hn_rxpkt(struct hn_rx_ring *rxr)
 {
 	if_t ifp, hn_ifp = rxr->hn_ifp;
 	struct mbuf *m_new, *n;
 	int size, do_lro = 0, do_csum = 1, is_vf = 0;
 	int hash_type = M_HASHTYPE_NONE;
 	int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE;
 	int i;
 
 	ifp = hn_ifp;
 	if (rxr->hn_rxvf_ifp != NULL) {
 		/*
 		 * Non-transparent mode VF; pretend this packet is from
 		 * the VF.
 		 */
 		ifp = rxr->hn_rxvf_ifp;
 		is_vf = 1;
 	} else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) {
 		/* Transparent mode VF. */
 		is_vf = 1;
 	}
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 		/*
 		 * NOTE:
 		 * See the NOTE of hn_rndis_init_fixat().  This
 		 * function can be reached, immediately after the
 		 * RNDIS is initialized but before the ifnet is
 		 * setup on the hn_attach() path; drop the unexpected
 		 * packets.
 		 */
 		return (0);
 	}
 
 	if (__predict_false(rxr->rsc.pktlen < ETHER_HDR_LEN)) {
 		if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1);
 		return (0);
 	}
 
 	if (rxr->rsc.cnt == 1 && rxr->rsc.pktlen <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 		memcpy(mtod(m_new, void *), rxr->rsc.frag_data[0],
 		    rxr->rsc.frag_len[0]);
 		m_new->m_pkthdr.len = m_new->m_len = rxr->rsc.frag_len[0];
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
 		 * get a standard 2K cluster.  For anything larger, get a
 		 * 4K cluster.  Any buffers larger than 4K can cause problems
 		 * if looped around to the Hyper-V TX channel, so avoid them.
 		 */
 		size = MCLBYTES;
 		if (rxr->rsc.pktlen > MCLBYTES) {
 			/* 4096 */
 			size = MJUMPAGESIZE;
 		}
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
 			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 
 		n = m_new;
 		for (i = 0; i < rxr->rsc.cnt; i++) {
 			n = hv_m_append(n, rxr->rsc.frag_len[i],
 			    rxr->rsc.frag_data[i]);
 			if (n == NULL) {
 				if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 				return (0);
 			} else {
 				m_new->m_pkthdr.len += rxr->rsc.frag_len[i];
 			}
 		}
 	}
 	if (rxr->rsc.pktlen <= MHLEN)
 		rxr->hn_small_pkts++;
 
 	m_new->m_pkthdr.rcvif = ifp;
 
 	if (__predict_false((if_getcapenable(hn_ifp) & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
 	if (rxr->rsc.csum_info != NULL) {
 		/* IP csum offload */
 		if ((*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			rxr->hn_csum_ip++;
 		}
 
 		/* TCP/UDP csum offload */
 		if ((*(rxr->rsc.csum_info) & (NDIS_RXCSUM_INFO_UDPCS_OK |
 		     NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
 			if (*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_TCPCS_OK)
 				rxr->hn_csum_tcp++;
 			else
 				rxr->hn_csum_udp++;
 		}
 
 		/*
 		 * XXX
 		 * As of this write (Oct 28th, 2016), host side will turn
 		 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
 		 * the do_lro setting here is actually _not_ accurate.  We
 		 * depend on the RSS hash type check to reset do_lro.
 		 */
 		if ((*(rxr->rsc.csum_info) &
 		     (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
 		    (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
 			do_lro = 1;
 	} else {
 		hn_rxpkt_proto(m_new, &l3proto, &l4proto);
 		if (l3proto == ETHERTYPE_IP) {
 			if (l4proto == IPPROTO_TCP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_TCP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 				do_lro = 1;
 			} else if (l4proto == IPPROTO_UDP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_UDP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 			} else if (l4proto != IPPROTO_DONE && do_csum &&
 			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
 				rxr->hn_csum_trusted++;
 				m_new->m_pkthdr.csum_flags |=
 				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			}
 		}
 	}
 
 	if (rxr->rsc.vlan_info != NULL) {
 		m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
 		    NDIS_VLAN_INFO_ID(*(rxr->rsc.vlan_info)),
 		    NDIS_VLAN_INFO_PRI(*(rxr->rsc.vlan_info)),
 		    NDIS_VLAN_INFO_CFI(*(rxr->rsc.vlan_info)));
 		m_new->m_flags |= M_VLANTAG;
 	}
 
 	/*
 	 * If VF is activated (tranparent/non-transparent mode does not
 	 * matter here).
 	 *
 	 * - Disable LRO
 	 *
 	 *   hn(4) will only receive broadcast packets, multicast packets,
 	 *   TCP SYN and SYN|ACK (in Azure), LRO is useless for these
 	 *   packet types.
 	 *
 	 *   For non-transparent, we definitely _cannot_ enable LRO at
 	 *   all, since the LRO flush will use hn(4) as the receiving
 	 *   interface; i.e. hn_ifp->if_input(hn_ifp, m).
 	 */
 	if (is_vf)
 		do_lro = 0;
 
 	/*
 	 * If VF is activated (tranparent/non-transparent mode does not
 	 * matter here), do _not_ mess with unsupported hash types or
 	 * functions.
 	 */
 	if (rxr->rsc.hash_info != NULL) {
 		rxr->hn_rss_pkts++;
 		m_new->m_pkthdr.flowid = *(rxr->rsc.hash_value);
 		if (!is_vf)
 			hash_type = M_HASHTYPE_OPAQUE_HASH;
 		if ((*(rxr->rsc.hash_info) & NDIS_HASH_FUNCTION_MASK) ==
 		    NDIS_HASH_FUNCTION_TOEPLITZ) {
 			uint32_t type = (*(rxr->rsc.hash_info) & NDIS_HASH_TYPE_MASK &
 			    rxr->hn_mbuf_hash);
 
 			/*
 			 * NOTE:
 			 * do_lro is resetted, if the hash types are not TCP
 			 * related.  See the comment in the above csum_flags
 			 * setup section.
 			 */
 			switch (type) {
 			case NDIS_HASH_IPV4:
 				hash_type = M_HASHTYPE_RSS_IPV4;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_TCP_IPV4:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV4;
 				if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) {
 					int def_htype = M_HASHTYPE_OPAQUE_HASH;
 
 					if (is_vf)
 						def_htype = M_HASHTYPE_NONE;
 
 					/*
 					 * UDP 4-tuple hash is delivered as
 					 * TCP 4-tuple hash.
 					 */
 					if (l3proto == ETHERTYPE_MAX) {
 						hn_rxpkt_proto(m_new,
 						    &l3proto, &l4proto);
 					}
 					if (l3proto == ETHERTYPE_IP) {
 						if (l4proto == IPPROTO_UDP &&
 						    (rxr->hn_mbuf_hash &
 						     NDIS_HASH_UDP_IPV4_X)) {
 							hash_type =
 							M_HASHTYPE_RSS_UDP_IPV4;
 							do_lro = 0;
 						} else if (l4proto !=
 						    IPPROTO_TCP) {
 							hash_type = def_htype;
 							do_lro = 0;
 						}
 					} else {
 						hash_type = def_htype;
 						do_lro = 0;
 					}
 				}
 				break;
 
 			case NDIS_HASH_IPV6:
 				hash_type = M_HASHTYPE_RSS_IPV6;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_IPV6_EX;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_TCP_IPV6:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6;
 				break;
 
 			case NDIS_HASH_TCP_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
 				break;
 			}
 		}
 	} else if (!is_vf) {
 		m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
 		hash_type = M_HASHTYPE_OPAQUE;
 	}
 	M_HASHTYPE_SET(m_new, hash_type);
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if (hn_ifp != ifp) {
 		const struct ether_header *eh;
 
 		/*
 		 * Non-transparent mode VF is activated.
 		 */
 
 		/*
 		 * Allow tapping on hn(4).
 		 */
 		ETHER_BPF_MTAP(hn_ifp, m_new);
 
 		/*
 		 * Update hn(4)'s stats.
 		 */
 		if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len);
 		/* Checked at the beginning of this function. */
 		KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame"));
 		eh = mtod(m_new, struct ether_header *);
 		if (ETHER_IS_MULTICAST(eh->ether_dhost))
 			if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1);
 	}
 	rxr->hn_pkts++;
 
 	if ((if_getcapenable(hn_ifp) & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
 		struct lro_ctrl *lro = &rxr->hn_lro;
 
 		if (lro->lro_cnt) {
 			rxr->hn_lro_tried++;
 			if (hn_lro_rx(lro, m_new) == 0) {
 				/* DONE! */
 				return 0;
 			}
 		}
 #endif
 	}
 	if_input(ifp, m_new);
 
 	return (0);
 }
 
 static int
 hn_ioctl(if_t ifp, u_long cmd, caddr_t data)
 {
 	struct hn_softc *sc = if_getsoftc(ifp);
 	struct ifreq *ifr = (struct ifreq *)data, ifr_vf;
 	if_t vf_ifp;
 	int mask, error = 0;
 	struct ifrsskey *ifrk;
 	struct ifrsshash *ifrh;
 	uint32_t mtu;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu > HN_MTU_MAX) {
 			error = EINVAL;
 			break;
 		}
 
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if ((sc->hn_caps & HN_CAP_MTU) == 0) {
 			/* Can't change MTU */
 			HN_UNLOCK(sc);
 			error = EOPNOTSUPP;
 			break;
 		}
 
 		if (if_getmtu(ifp) == ifr->ifr_mtu) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if (hn_xpnt_vf_isready(sc)) {
 			vf_ifp = sc->hn_vf_ifp;
 			ifr_vf = *ifr;
 			strlcpy(ifr_vf.ifr_name, if_name(vf_ifp),
 			    sizeof(ifr_vf.ifr_name));
 			error = ifhwioctl(SIOCSIFMTU,vf_ifp, 
 			    (caddr_t)&ifr_vf, curthread);
 			if (error) {
 				HN_UNLOCK(sc);
 				if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n",
 				    if_name(vf_ifp), ifr->ifr_mtu, error);
 				break;
 			}
 		}
 
 		/*
 		 * Suspend this interface before the synthetic parts
 		 * are ripped.
 		 */
 		hn_suspend(sc);
 
 		/*
 		 * Detach the synthetics parts, i.e. NVS and RNDIS.
 		 */
 		hn_synth_detach(sc);
 
 		/*
 		 * Reattach the synthetic parts, i.e. NVS and RNDIS,
 		 * with the new MTU setting.
 		 */
 		error = hn_synth_attach(sc, ifr->ifr_mtu);
 		if (error) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		error = hn_rndis_get_mtu(sc, &mtu);
 		if (error)
 			mtu = ifr->ifr_mtu;
 		else if (bootverbose)
 			if_printf(ifp, "RNDIS mtu %u\n", mtu);
 
 		/*
 		 * Commit the requested MTU, after the synthetic parts
 		 * have been successfully attached.
 		 */
 		if (mtu >= ifr->ifr_mtu) {
 			mtu = ifr->ifr_mtu;
 		} else {
 			if_printf(ifp, "fixup mtu %d -> %u\n",
 			    ifr->ifr_mtu, mtu);
 		}
 		if_setmtu(ifp, mtu);
 
 		/*
 		 * Synthetic parts' reattach may change the chimney
 		 * sending size; update it.
 		 */
 		if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
 			hn_set_chim_size(sc, sc->hn_chim_szmax);
 
 		/*
 		 * Make sure that various parameters based on MTU are
 		 * still valid, after the MTU change.
 		 */
 		hn_mtu_change_fixup(sc);
 
 		/*
 		 * All done!  Resume the interface now.
 		 */
 		hn_resume(sc);
 
 		if ((sc->hn_flags & HN_FLAG_RXVF) ||
 		    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
 			/*
 			 * Since we have reattached the NVS part,
 			 * change the datapath to VF again; in case
 			 * that it is lost, after the NVS was detached.
 			 */
 			hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFFLAGS:
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if (hn_xpnt_vf_isready(sc))
 			hn_xpnt_vf_saveifflags(sc);
 
 		if (if_getflags(ifp) & IFF_UP) {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 				/*
 				 * Caller meight hold mutex, e.g.
 				 * bpf; use busy-wait for the RNDIS
 				 * reply.
 				 */
 				HN_NO_SLEEPING(sc);
 				hn_rxfilter_config(sc);
 				HN_SLEEPING_OK(sc);
 
 				if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 					error = hn_xpnt_vf_iocsetflags(sc);
 			} else {
 				hn_init_locked(sc);
 			}
 		} else {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 				hn_stop(sc, false);
 		}
 		sc->hn_if_flags = if_getflags(ifp);
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFCAP:
 		HN_LOCK(sc);
 
 		if (hn_xpnt_vf_isready(sc)) {
 			ifr_vf = *ifr;
 			strlcpy(ifr_vf.ifr_name, if_name(sc->hn_vf_ifp),
 			    sizeof(ifr_vf.ifr_name));
 			error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf);
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		/*
 		 * Fix up requested capabilities w/ supported capabilities,
 		 * since the supported capabilities could have been changed.
 		 */
 		mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^
 		    if_getcapenable(ifp);
 
 		if (mask & IFCAP_TXCSUM) {
 			if_togglecapenable(ifp, IFCAP_TXCSUM);
 			if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 				if_sethwassistbits(ifp, HN_CSUM_IP_HWASSIST(sc), 0);
 			else
 				if_sethwassistbits(ifp, 0, HN_CSUM_IP_HWASSIST(sc));
 		}
 		if (mask & IFCAP_TXCSUM_IPV6) {
 			if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
 			if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
 				if_sethwassistbits(ifp, HN_CSUM_IP6_HWASSIST(sc), 0);
 			else
 				if_sethwassistbits(ifp, 0, HN_CSUM_IP6_HWASSIST(sc));
 		}
 
 		/* TODO: flip RNDIS offload parameters for RXCSUM. */
 		if (mask & IFCAP_RXCSUM)
 			if_togglecapenable(ifp, IFCAP_RXCSUM);
 #ifdef foo
 		/* We can't diff IPv6 packets from IPv4 packets on RX path. */
 		if (mask & IFCAP_RXCSUM_IPV6)
 			if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
 #endif
 
 		if (mask & IFCAP_LRO)
 			if_togglecapenable(ifp, IFCAP_LRO);
 
 		if (mask & IFCAP_TSO4) {
 			if_togglecapenable(ifp, IFCAP_TSO4);
 			if (if_getcapenable(ifp) & IFCAP_TSO4)
 				if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
 			else
 				if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
 		}
 		if (mask & IFCAP_TSO6) {
 			if_togglecapenable(ifp, IFCAP_TSO6);
 			if (if_getcapenable(ifp) & IFCAP_TSO6)
 				if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
 			else
 				if_sethwassistbits(ifp, 0, CSUM_IP6_TSO);
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			/*
 			 * Multicast uses mutex; use busy-wait for
 			 * the RNDIS reply.
 			 */
 			HN_NO_SLEEPING(sc);
 			hn_rxfilter_config(sc);
 			HN_SLEEPING_OK(sc);
 		}
 
 		/* XXX vlan(4) style mcast addr maintenance */
 		if (hn_xpnt_vf_isready(sc)) {
 			int old_if_flags;
 
 			old_if_flags = if_getflags(sc->hn_vf_ifp);
 			hn_xpnt_vf_saveifflags(sc);
 
 			if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) &&
 			    ((old_if_flags ^ if_getflags(sc->hn_vf_ifp)) &
 			     IFF_ALLMULTI))
 				error = hn_xpnt_vf_iocsetflags(sc);
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		HN_LOCK(sc);
 		if (hn_xpnt_vf_isready(sc)) {
 			/*
 			 * SIOCGIFMEDIA expects ifmediareq, so don't
 			 * create and pass ifr_vf to the VF here; just
 			 * replace the ifr_name.
 			 */
 			vf_ifp = sc->hn_vf_ifp;
 			strlcpy(ifr->ifr_name, if_name(vf_ifp),
 			    sizeof(ifr->ifr_name));
 			error = ifhwioctl(cmd, vf_ifp, data, curthread);
 			/* Restore the ifr_name. */
 			strlcpy(ifr->ifr_name, if_name(ifp),
 			    sizeof(ifr->ifr_name));
 			HN_UNLOCK(sc);
 			break;
 		}
 		HN_UNLOCK(sc);
 		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
 		break;
 
 	case SIOCGIFRSSHASH:
 		ifrh = (struct ifrsshash *)data;
 		HN_LOCK(sc);
 		if (sc->hn_rx_ring_inuse == 1) {
 			HN_UNLOCK(sc);
 			ifrh->ifrh_func = RSS_FUNC_NONE;
 			ifrh->ifrh_types = 0;
 			break;
 		}
 
 		if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
 			ifrh->ifrh_func = RSS_FUNC_TOEPLITZ;
 		else
 			ifrh->ifrh_func = RSS_FUNC_PRIVATE;
 		ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash);
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCGIFRSSKEY:
 		ifrk = (struct ifrsskey *)data;
 		HN_LOCK(sc);
 		if (sc->hn_rx_ring_inuse == 1) {
 			HN_UNLOCK(sc);
 			ifrk->ifrk_func = RSS_FUNC_NONE;
 			ifrk->ifrk_keylen = 0;
 			break;
 		}
 		if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
 			ifrk->ifrk_func = RSS_FUNC_TOEPLITZ;
 		else
 			ifrk->ifrk_func = RSS_FUNC_PRIVATE;
 		ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ;
 		memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key,
 		    NDIS_HASH_KEYSIZE_TOEPLITZ);
 		HN_UNLOCK(sc);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 	return (error);
 }
 
 static void
 hn_stop(struct hn_softc *sc, bool detaching)
 {
 	if_t ifp = sc->hn_ifp;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("synthetic parts were not attached"));
 
 	/* Clear RUNNING bit ASAP. */
 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 
 	/* Disable polling. */
 	hn_polling(sc, 0);
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
 		KASSERT(sc->hn_vf_ifp != NULL,
 		    ("%s: VF is not attached", if_name(ifp)));
 
 		/* Mark transparent mode VF as disabled. */
 		hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */);
 
 		/*
 		 * NOTE:
 		 * Datapath setting must happen _before_ bringing
 		 * the VF down.
 		 */
 		hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
 
 		/*
 		 * Bring the VF down.
 		 */
 		hn_xpnt_vf_saveifflags(sc);
 		if_setflagbits(ifp, 0, IFF_UP);
 		hn_xpnt_vf_iocsetflags(sc);
 	}
 
 	/* Suspend data transfers. */
 	hn_suspend_data(sc);
 
 	/* Clear OACTIVE bit. */
 	if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	/*
 	 * If the non-transparent mode VF is active, make sure
 	 * that the RX filter still allows packet reception.
 	 */
 	if (!detaching && (sc->hn_flags & HN_FLAG_RXVF))
 		hn_rxfilter_config(sc);
 }
 
 static void
 hn_init_locked(struct hn_softc *sc)
 {
 	if_t ifp = sc->hn_ifp;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		return;
 
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 		return;
 
 	/* Configure RX filter */
 	hn_rxfilter_config(sc);
 
 	/* Clear OACTIVE bit. */
 	if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	/* Clear TX 'suspended' bit. */
 	hn_resume_tx(sc, sc->hn_tx_ring_inuse);
 
 	if (hn_xpnt_vf_isready(sc)) {
 		/* Initialize transparent VF. */
 		hn_xpnt_vf_init(sc);
 	}
 
 	/* Everything is ready; unleash! */
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 
 	/* Re-enable polling if requested. */
 	if (sc->hn_pollhz > 0)
 		hn_polling(sc, sc->hn_pollhz);
 }
 
 static void
 hn_init(void *xsc)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 	hn_init_locked(sc);
 	HN_UNLOCK(sc);
 }
 
 static int
 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int lenlim;
 	int error;
 
 	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
 	error = sysctl_handle_int(oidp, &lenlim, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
 	    lenlim > TCP_LRO_LENGTH_MAX) {
 		HN_UNLOCK(sc);
 		return EINVAL;
 	}
 	hn_set_lro_lenlim(sc, lenlim);
 	HN_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ackcnt, error, i;
 
 	/*
 	 * lro_ackcnt_lim is append count limit,
 	 * +1 to turn it into aggregation limit.
 	 */
 	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
 	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
 		return EINVAL;
 
 	/*
 	 * Convert aggregation limit back to append
 	 * count limit.
 	 */
 	--ackcnt;
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int hcsum = arg2;
 	int on, error, i;
 
 	on = 0;
 	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
 		on = 1;
 
 	error = sysctl_handle_int(oidp, &on, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (on)
 			rxr->hn_trust_hcsum |= hcsum;
 		else
 			rxr->hn_trust_hcsum &= ~hcsum;
 	}
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int chim_size, error;
 
 	chim_size = sc->hn_tx_ring[0].hn_chim_size;
 	error = sysctl_handle_int(oidp, &chim_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
 		return EINVAL;
 
 	HN_LOCK(sc);
 	hn_set_chim_size(sc, chim_size);
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((u_long *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_tx_ring *txr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		stat += *((u_long *)((uint8_t *)txr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((u_long *)((uint8_t *)txr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error, conf;
 	struct hn_tx_ring *txr;
 
 	txr = &sc->hn_tx_ring[0];
 	conf = *((int *)((uint8_t *)txr + ofs));
 
 	error = sysctl_handle_int(oidp, &conf, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((int *)((uint8_t *)txr + ofs)) = conf;
 	}
 	HN_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, size;
 
 	size = sc->hn_agg_size;
 	error = sysctl_handle_int(oidp, &size, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	sc->hn_agg_size = size;
 	hn_set_txagg(sc);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, pkts;
 
 	pkts = sc->hn_agg_pkts;
 	error = sysctl_handle_int(oidp, &pkts, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	sc->hn_agg_pkts = pkts;
 	hn_set_txagg(sc);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int pkts;
 
 	pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
 	return (sysctl_handle_int(oidp, &pkts, 0, req));
 }
 
 static int
 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int align;
 
 	align = sc->hn_tx_ring[0].hn_agg_align;
 	return (sysctl_handle_int(oidp, &align, 0, req));
 }
 
 static void
 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
 {
 	if (pollhz == 0)
 		vmbus_chan_poll_disable(chan);
 	else
 		vmbus_chan_poll_enable(chan, pollhz);
 }
 
 static void
 hn_polling(struct hn_softc *sc, u_int pollhz)
 {
 	int nsubch = sc->hn_rx_ring_inuse - 1;
 
 	HN_LOCK_ASSERT(sc);
 
 	if (nsubch > 0) {
 		struct vmbus_channel **subch;
 		int i;
 
 		subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
 		for (i = 0; i < nsubch; ++i)
 			hn_chan_polling(subch[i], pollhz);
 		vmbus_subchan_rel(subch, nsubch);
 	}
 	hn_chan_polling(sc->hn_prichan, pollhz);
 }
 
 static int
 hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int pollhz, error;
 
 	pollhz = sc->hn_pollhz;
 	error = sysctl_handle_int(oidp, &pollhz, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	if (pollhz != 0 &&
 	    (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
 		return (EINVAL);
 
 	HN_LOCK(sc);
 	if (sc->hn_pollhz != pollhz) {
 		sc->hn_pollhz = pollhz;
 		if ((if_getdrvflags(sc->hn_ifp) & IFF_DRV_RUNNING) &&
 		    (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
 			hn_polling(sc, sc->hn_pollhz);
 	}
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char verstr[16];
 
 	snprintf(verstr, sizeof(verstr), "%u.%u",
 	    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
 	    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
 }
 
 static int
 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char caps_str[128];
 	uint32_t caps;
 
 	HN_LOCK(sc);
 	caps = sc->hn_caps;
 	HN_UNLOCK(sc);
 	snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
 	return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
 }
 
 static int
 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char assist_str[128];
 	uint32_t hwassist;
 
 	HN_LOCK(sc);
 	hwassist = if_gethwassist(sc->hn_ifp);
 	HN_UNLOCK(sc);
 	snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
 	return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
 }
 
 static int
 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char filter_str[128];
 	uint32_t filter;
 
 	HN_LOCK(sc);
 	filter = sc->hn_rx_filter;
 	HN_UNLOCK(sc);
 	snprintf(filter_str, sizeof(filter_str), "%b", filter,
 	    NDIS_PACKET_TYPES);
 	return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
 }
 
 static int
 hn_rsc_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	uint32_t mtu;
 	int error;
 	HN_LOCK(sc);
 	error = hn_rndis_get_mtu(sc, &mtu);
 	if (error) {
 		if_printf(sc->hn_ifp, "failed to get mtu\n");
 		goto back;
 	}
 	error = SYSCTL_OUT(req, &(sc->hn_rsc_ctrl), sizeof(sc->hn_rsc_ctrl));
 	if (error || req->newptr == NULL)
 		goto back;
 
 	error = SYSCTL_IN(req, &(sc->hn_rsc_ctrl), sizeof(sc->hn_rsc_ctrl));
 	if (error)
 		goto back;
 	error = hn_rndis_reconf_offload(sc, mtu);
 back:
 	HN_UNLOCK(sc);
 	return (error);
 }
 #ifndef RSS
 
 static int
 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error;
 
 	HN_LOCK(sc);
 
 	error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
 	if (error || req->newptr == NULL)
 		goto back;
 
 	if ((sc->hn_flags & HN_FLAG_RXVF) ||
 	    (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) {
 		/*
 		 * RSS key is synchronized w/ VF's, don't allow users
 		 * to change it.
 		 */
 		error = EBUSY;
 		goto back;
 	}
 
 	error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
 	if (error)
 		goto back;
 	sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 
 	if (sc->hn_rx_ring_inuse > 1) {
 		error = hn_rss_reconfig(sc);
 	} else {
 		/* Not RSS capable, at least for now; just save the RSS key. */
 		error = 0;
 	}
 back:
 	HN_UNLOCK(sc);
 	return (error);
 }
 
 static int
 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error;
 
 	HN_LOCK(sc);
 
 	error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
 	if (error || req->newptr == NULL)
 		goto back;
 
 	/*
 	 * Don't allow RSS indirect table change, if this interface is not
 	 * RSS capable currently.
 	 */
 	if (sc->hn_rx_ring_inuse == 1) {
 		error = EOPNOTSUPP;
 		goto back;
 	}
 
 	error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
 	if (error)
 		goto back;
 	sc->hn_flags |= HN_FLAG_HAS_RSSIND;
 
 	hn_rss_ind_fixup(sc);
 	error = hn_rss_reconfig(sc);
 back:
 	HN_UNLOCK(sc);
 	return (error);
 }
 
 #endif	/* !RSS */
 
 static int
 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char hash_str[128];
 	uint32_t hash;
 
 	HN_LOCK(sc);
 	hash = sc->hn_rss_hash;
 	HN_UNLOCK(sc);
 	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
 	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
 }
 
 static int
 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char hash_str[128];
 	uint32_t hash;
 
 	HN_LOCK(sc);
 	hash = sc->hn_rss_hcap;
 	HN_UNLOCK(sc);
 	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
 	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
 }
 
 static int
 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char hash_str[128];
 	uint32_t hash;
 
 	HN_LOCK(sc);
 	hash = sc->hn_rx_ring[0].hn_mbuf_hash;
 	HN_UNLOCK(sc);
 	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
 	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
 }
 
 static int
 hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char vf_name[IFNAMSIZ + 1];
 	if_t vf_ifp;
 
 	HN_LOCK(sc);
 	vf_name[0] = '\0';
 	vf_ifp = sc->hn_vf_ifp;
 	if (vf_ifp != NULL)
 		snprintf(vf_name, sizeof(vf_name), "%s", if_name(vf_ifp));
 	HN_UNLOCK(sc);
 	return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
 }
 
 static int
 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char vf_name[IFNAMSIZ + 1];
 	if_t vf_ifp;
 
 	HN_LOCK(sc);
 	vf_name[0] = '\0';
 	vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp;
 	if (vf_ifp != NULL)
 		snprintf(vf_name, sizeof(vf_name), "%s", if_name(vf_ifp));
 	HN_UNLOCK(sc);
 	return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
 }
 
 static int
 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker pt;
 	struct sbuf *sb;
 	int error, i;
 	bool first;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	rm_rlock(&hn_vfmap_lock, &pt);
 
 	first = true;
 	for (i = 0; i < hn_vfmap_size; ++i) {
 		struct epoch_tracker et;
 		if_t ifp;
 
 		if (hn_vfmap[i] == NULL)
 			continue;
 
 		NET_EPOCH_ENTER(et);
 		ifp = ifnet_byindex(i);
 		if (ifp != NULL) {
 			if (first)
 				sbuf_printf(sb, "%s", if_name(ifp));
 			else
 				sbuf_printf(sb, " %s", if_name(ifp));
 			first = false;
 		}
 		NET_EPOCH_EXIT(et);
 	}
 
 	rm_runlock(&hn_vfmap_lock, &pt);
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 
 static int
 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker pt;
 	struct sbuf *sb;
 	int error, i;
 	bool first;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	rm_rlock(&hn_vfmap_lock, &pt);
 
 	first = true;
 	for (i = 0; i < hn_vfmap_size; ++i) {
 		struct epoch_tracker et;
 		if_t ifp, hn_ifp;
 
 		hn_ifp = hn_vfmap[i];
 		if (hn_ifp == NULL)
 			continue;
 
 		NET_EPOCH_ENTER(et);
 		ifp = ifnet_byindex(i);
 		if (ifp != NULL) {
 			if (first) {
 				sbuf_printf(sb, "%s:%s", if_name(ifp),
 				    if_name(hn_ifp));
 			} else {
 				sbuf_printf(sb, " %s:%s", if_name(ifp),
 				    if_name(hn_ifp));
 			}
 			first = false;
 		}
 		NET_EPOCH_EXIT(et);
 	}
 
 	rm_runlock(&hn_vfmap_lock, &pt);
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 
 static int
 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, onoff = 0;
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF)
 		onoff = 1;
 	error = sysctl_handle_int(oidp, &onoff, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	/* NOTE: hn_vf_lock for hn_transmit() */
 	rm_wlock(&sc->hn_vf_lock);
 	if (onoff)
 		sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
 	else
 		sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF;
 	rm_wunlock(&sc->hn_vf_lock);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int enabled = 0;
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 		enabled = 1;
 	return (sysctl_handle_int(oidp, &enabled, 0, req));
 }
 
 static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
 	const struct ip *ip;
 	int len, iphlen, iplen;
 	const struct tcphdr *th;
 	int thoff;				/* TCP data offset */
 
 	len = hoff + sizeof(struct ip);
 
 	/* The packet must be at least the size of an IP header. */
 	if (m->m_pkthdr.len < len)
 		return IPPROTO_DONE;
 
 	/* The fixed IP header must reside completely in the first mbuf. */
 	if (m->m_len < len)
 		return IPPROTO_DONE;
 
 	ip = mtodo(m, hoff);
 
 	/* Bound check the packet's stated IP header length. */
 	iphlen = ip->ip_hl << 2;
 	if (iphlen < sizeof(struct ip))		/* minimum header length */
 		return IPPROTO_DONE;
 
 	/* The full IP header must reside completely in the one mbuf. */
 	if (m->m_len < hoff + iphlen)
 		return IPPROTO_DONE;
 
 	iplen = ntohs(ip->ip_len);
 
 	/*
 	 * Check that the amount of data in the buffers is as
 	 * at least much as the IP header would have us expect.
 	 */
 	if (m->m_pkthdr.len < hoff + iplen)
 		return IPPROTO_DONE;
 
 	/*
 	 * Ignore IP fragments.
 	 */
 	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
 		return IPPROTO_DONE;
 
 	/*
 	 * The TCP/IP or UDP/IP header must be entirely contained within
 	 * the first fragment of a packet.
 	 */
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (iplen < iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
 		thoff = th->th_off << 2;
 		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + thoff)
 			return IPPROTO_DONE;
 		break;
 	case IPPROTO_UDP:
 		if (iplen < iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		break;
 	default:
 		if (iplen < iphlen)
 			return IPPROTO_DONE;
 		break;
 	}
 	return ip->ip_p;
 }
 
 static void
 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto)
 {
 	const struct ether_header *eh;
 	uint16_t etype;
 	int hoff;
 
 	hoff = sizeof(*eh);
 	/* Checked at the beginning of this function. */
 	KASSERT(m_new->m_len >= hoff, ("not ethernet frame"));
 
 	eh = mtod(m_new, const struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	if (etype == ETHERTYPE_VLAN) {
 		const struct ether_vlan_header *evl;
 
 		hoff = sizeof(*evl);
 		if (m_new->m_len < hoff)
 			return;
 		evl = mtod(m_new, const struct ether_vlan_header *);
 		etype = ntohs(evl->evl_proto);
 	}
 	*l3proto = etype;
 
 	if (etype == ETHERTYPE_IP)
 		*l4proto = hn_check_iplen(m_new, hoff);
 	else
 		*l4proto = IPPROTO_DONE;
 }
 
 static int
 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	device_t dev = sc->hn_dev;
 #if defined(INET) || defined(INET6)
 	int lroent_cnt;
 #endif
 	int i;
 
 	/*
 	 * Create RXBUF for reception.
 	 *
 	 * NOTE:
 	 * - It is shared by all channels.
 	 * - A large enough buffer is allocated, certain version of NVSes
 	 *   may further limit the usable space.
 	 */
 	sc->hn_rxbuf = contigmalloc(HN_RXBUF_SIZE, M_DEVBUF, M_WAITOK | M_ZERO,
 	    0ul, ~0ul, PAGE_SIZE, 0);
 	if (sc->hn_rxbuf == NULL) {
 		device_printf(sc->hn_dev, "allocate rxbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_rx_ring_cnt = ring_cnt;
 	sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
 
 	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
 #if defined(INET) || defined(INET6)
 	lroent_cnt = hn_lro_entry_count;
 	if (lroent_cnt < TCP_LRO_ENTRIES)
 		lroent_cnt = TCP_LRO_ENTRIES;
 	if (bootverbose)
 		device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
 #endif	/* INET || INET6 */
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	/* Create dev.hn.UNIT.rx sysctl tree */
 	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		rxr->hn_br = contigmalloc(HN_TXBR_SIZE + HN_RXBR_SIZE, M_DEVBUF,
 		    M_WAITOK | M_ZERO, 0ul, ~0ul, PAGE_SIZE, 0);
 		if (rxr->hn_br == NULL) {
 			device_printf(dev, "allocate bufring failed\n");
 			return (ENOMEM);
 		}
 
 		if (hn_trust_hosttcp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
 		if (hn_trust_hostudp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
 		if (hn_trust_hostip)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
 		rxr->hn_mbuf_hash = NDIS_HASH_ALL;
 		rxr->hn_ifp = sc->hn_ifp;
 		if (i < sc->hn_tx_ring_cnt)
 			rxr->hn_txr = &sc->hn_tx_ring[i];
 		rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
 		rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
 		rxr->hn_rx_idx = i;
 		rxr->hn_rxbuf = sc->hn_rxbuf;
 
 		/*
 		 * Initialize LRO.
 		 */
 #if defined(INET) || defined(INET6)
 		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
 		    hn_lro_mbufq_depth);
 		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
 		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif	/* INET || INET6 */
 
 		if (sc->hn_rx_sysctl_tree != NULL) {
 			char name[16];
 
 			/*
 			 * Create per RX ring sysctl tree:
 			 * dev.hn.UNIT.rx.RINGID
 			 */
 			snprintf(name, sizeof(name), "%d", i);
 			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
 			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
 			    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 			if (rxr->hn_rx_sysctl_tree != NULL) {
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "packets",
 				    CTLFLAG_RW | CTLFLAG_STATS, &rxr->hn_pkts,
 				    "# of packets received");
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "rss_pkts",
 				    CTLFLAG_RW | CTLFLAG_STATS,
 				    &rxr->hn_rss_pkts,
 				    "# of packets w/ RSS info received");
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "rsc_pkts",
 				    CTLFLAG_RW | CTLFLAG_STATS,
 				    &rxr->hn_rsc_pkts,
 				    "# of RSC packets received");
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "rsc_drop",
 				    CTLFLAG_RW | CTLFLAG_STATS,
 				    &rxr->hn_rsc_drop,
 				    "# of RSC fragments dropped");
 				SYSCTL_ADD_INT(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "pktbuf_len", CTLFLAG_RD,
 				    &rxr->hn_pktbuf_len, 0,
 				    "Temporary channel packet buffer length");
 			}
 		}
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
 	    hn_rx_stat_u64_sysctl,
 	    "LU", "LRO queued");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
 	    hn_rx_stat_u64_sysctl,
 	    "LU", "LRO flushed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_lro_tried),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_lenlim_sysctl, "IU",
 	    "Max # of data bytes to be aggregated by LRO");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_ackcnt_sysctl, "I",
 	    "Max # of ACKs to be aggregated by LRO");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust tcp segment verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust udp datagram verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust ip packet verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_ip),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_udp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
 	    hn_rx_stat_ulong_sysctl, "LU",
 	    "# of packets that we trust host's csum verification");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_small_pkts),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS , sc,
 	    __offsetof(struct hn_rx_ring, hn_ack_failed),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
 
 	return (0);
 }
 
 static void
 hn_destroy_rx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_rxbuf != NULL) {
 		if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
 			contigfree(sc->hn_rxbuf, HN_RXBUF_SIZE, M_DEVBUF);
 		else
 			device_printf(sc->hn_dev, "RXBUF is referenced\n");
 		sc->hn_rxbuf = NULL;
 	}
 
 	if (sc->hn_rx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (rxr->hn_br == NULL)
 			continue;
 		if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
 			contigfree(rxr->hn_br, HN_TXBR_SIZE + HN_RXBR_SIZE,
 			    M_DEVBUF);
 		} else {
 			device_printf(sc->hn_dev,
 			    "%dth channel bufring is referenced", i);
 		}
 		rxr->hn_br = NULL;
 
 #if defined(INET) || defined(INET6)
 		tcp_lro_free(&rxr->hn_lro);
 #endif
 		free(rxr->hn_pktbuf, M_DEVBUF);
 	}
 	free(sc->hn_rx_ring, M_DEVBUF);
 	sc->hn_rx_ring = NULL;
 
 	sc->hn_rx_ring_cnt = 0;
 	sc->hn_rx_ring_inuse = 0;
 }
 
 static int
 hn_tx_ring_create(struct hn_softc *sc, int id)
 {
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	device_t dev = sc->hn_dev;
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 
 	txr->hn_sc = sc;
 	txr->hn_tx_idx = id;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
 #endif
 	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
 
 	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
 	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_INIT(&txr->hn_txlist);
 #else
 	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
 	    M_WAITOK, &txr->hn_tx_lock);
 #endif
 
 	if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
 		txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
 		    device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
 	} else {
 		txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
 	}
 
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		txr->hn_txeof = hn_start_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
 	} else
 #endif
 	{
 		int br_depth;
 
 		txr->hn_txeof = hn_xmit_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
 
 		br_depth = hn_get_txswq_depth(txr);
 		txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
 		    M_WAITOK, &txr->hn_tx_lock);
 	}
 
 	txr->hn_direct_tx_size = hn_direct_tx_size;
 
 	/*
 	 * Always schedule transmission instead of trying to do direct
 	 * transmission.  This one gives the best performance so far.
 	 */
 	txr->hn_sched_tx = 1;
 
 	parent_dtag = bus_get_dma_tag(dev);
 
 	/* DMA tag for RNDIS packet messages. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    HN_RNDIS_PKT_ALIGN,		/* alignment */
 	    HN_RNDIS_PKT_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_RNDIS_PKT_LEN,		/* maxsize */
 	    1,				/* nsegments */
 	    HN_RNDIS_PKT_LEN,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(dev, "failed to create rndis dmatag\n");
 		return error;
 	}
 
 	/* DMA tag for data. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    1,				/* alignment */
 	    HN_TX_DATA_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_TX_DATA_MAXSIZE,		/* maxsize */
 	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
 	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(dev, "failed to create data dmatag\n");
 		return error;
 	}
 
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
 		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
 		txd->txr = txr;
 		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 		STAILQ_INIT(&txd->agg_list);
 
 		/*
 		 * Allocate and load RNDIS packet message.
 		 */
         	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_pkt,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 		    &txd->rndis_pkt_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate rndis_packet_msg, %d\n", i);
 			return error;
 		}
 
 		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_pkt_dmap,
 		    txd->rndis_pkt, HN_RNDIS_PKT_LEN,
 		    hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
 		    BUS_DMA_NOWAIT);
 		if (error) {
 			device_printf(dev,
 			    "failed to load rndis_packet_msg, %d\n", i);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt, txd->rndis_pkt_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
 		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate tx data dmamap\n");
 			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt_dmap);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt, txd->rndis_pkt_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 #else
 		buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 	}
 	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
 
 	if (sc->hn_tx_sysctl_tree != NULL) {
 		struct sysctl_oid_list *child;
 		struct sysctl_ctx_list *ctx;
 		char name[16];
 
 		/*
 		 * Create per TX ring sysctl tree:
 		 * dev.hn.UNIT.tx.RINGID
 		 */
 		ctx = device_get_sysctl_ctx(dev);
 		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
 
 		snprintf(name, sizeof(name), "%d", id);
 		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
 		    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 		if (txr->hn_tx_sysctl_tree != NULL) {
 			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
 
 #ifdef HN_DEBUG
 			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
 			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
 			    "# of available TX descs");
 #endif
 #ifdef HN_IFSTART_SUPPORT
 			if (!hn_use_if_start)
 #endif
 			{
 				SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
 				    CTLFLAG_RD, &txr->hn_oactive, 0,
 				    "over active");
 			}
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
 			    CTLFLAG_RW | CTLFLAG_STATS, &txr->hn_pkts,
 			    "# of packets transmitted");
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
 			    CTLFLAG_RW | CTLFLAG_STATS, &txr->hn_sends,
 			    "# of sends");
 		}
 	}
 
 	return 0;
 }
 
 static void
 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
 {
 	struct hn_tx_ring *txr = txd->txr;
 
 	KASSERT(txd->m == NULL, ("still has mbuf installed"));
 	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
 
 	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
 	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
 	    txd->rndis_pkt_dmap);
 	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
 }
 
 static void
 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT(txd->refs == 0 || txd->refs == 1,
 	    ("invalid txd refs %d", txd->refs));
 
 	/* Aggregated txds will be freed by their aggregating txd. */
 	if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
 		int freed __diagused;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed, ("can't free txdesc"));
 	}
 }
 
 static void
 hn_tx_ring_destroy(struct hn_tx_ring *txr)
 {
 	int i;
 
 	if (txr->hn_txdesc == NULL)
 		return;
 
 	/*
 	 * NOTE:
 	 * Because the freeing of aggregated txds will be deferred
 	 * to the aggregating txd, two passes are used here:
 	 * - The first pass GCes any pending txds.  This GC is necessary,
 	 *   since if the channels are revoked, hypervisor will not
 	 *   deliver send-done for all pending txds.
 	 * - The second pass frees the busdma stuffs, i.e. after all txds
 	 *   were freed.
 	 */
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
 		hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
 		hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
 
 	if (txr->hn_tx_data_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
 	if (txr->hn_tx_rndis_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
 
 #ifdef HN_USE_TXDESC_BUFRING
 	buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
 #endif
 
 	free(txr->hn_txdesc, M_DEVBUF);
 	txr->hn_txdesc = NULL;
 
 	if (txr->hn_mbuf_br != NULL)
 		buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_destroy(&txr->hn_txlist_spin);
 #endif
 	mtx_destroy(&txr->hn_tx_lock);
 }
 
 static int
 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int i;
 
 	/*
 	 * Create TXBUF for chimney sending.
 	 *
 	 * NOTE: It is shared by all channels.
 	 */
 	sc->hn_chim = contigmalloc(HN_CHIM_SIZE, M_DEVBUF, M_WAITOK | M_ZERO,
 	    0ul, ~0ul, PAGE_SIZE, 0);
 	if (sc->hn_chim == NULL) {
 		device_printf(sc->hn_dev, "allocate txbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_tx_ring_cnt = ring_cnt;
 	sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 
 	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
 	ctx = device_get_sysctl_ctx(sc->hn_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
 
 	/* Create dev.hn.UNIT.tx sysctl tree */
 	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		int error;
 
 		error = hn_tx_ring_create(sc, i);
 		if (error)
 			return error;
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS, sc,
 	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS, sc,
 	    __offsetof(struct hn_tx_ring, hn_send_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS, sc,
 	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS, sc,
 	    __offsetof(struct hn_tx_ring, hn_flush_failed),
 	    hn_tx_stat_ulong_sysctl, "LU",
 	    "# of packet transmission aggregation flush failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE | CTLFLAG_STATS, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
 	    "# of total TX descs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
 	    CTLFLAG_RD, &sc->hn_chim_szmax, 0,
 	    "Chimney send packet size upper boundary");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_chim_size_sysctl, "I", "Chimney send packet size limit");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Size of the packet for direct transmission");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_sched_tx),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Always schedule transmission "
 	    "instead of doing direct transmission");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
 	    "Applied packet transmission aggregation size");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_pktmax_sysctl, "I",
 	    "Applied packet transmission aggregation packets");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_align_sysctl, "I",
 	    "Applied packet transmission aggregation alignment");
 
 	return 0;
 }
 
 static void
 hn_set_chim_size(struct hn_softc *sc, int chim_size)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		sc->hn_tx_ring[i].hn_chim_size = chim_size;
 }
 
 static void
 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
 {
 	if_t ifp = sc->hn_ifp;
 	u_int hw_tsomax;
 	int tso_minlen;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
 		return;
 
 	KASSERT(sc->hn_ndis_tso_sgmin >= 2,
 	    ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
 	tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
 
 	KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
 	    sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
 	    ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
 
 	if (tso_maxlen < tso_minlen)
 		tso_maxlen = tso_minlen;
 	else if (tso_maxlen > IP_MAXPACKET)
 		tso_maxlen = IP_MAXPACKET;
 	if (tso_maxlen > sc->hn_ndis_tso_szmax)
 		tso_maxlen = sc->hn_ndis_tso_szmax;
 	hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 
 	if (hn_xpnt_vf_isready(sc)) {
 		if (hw_tsomax > if_gethwtsomax(sc->hn_vf_ifp))
 			hw_tsomax = if_gethwtsomax(sc->hn_vf_ifp);
 	}
 	if_sethwtsomax(ifp, hw_tsomax);
 	if (bootverbose)
 		if_printf(ifp, "TSO size max %u\n", if_gethwtsomax(ifp));
 }
 
 static void
 hn_fixup_tx_data(struct hn_softc *sc)
 {
 	uint64_t csum_assist;
 	int i;
 
 	hn_set_chim_size(sc, sc->hn_chim_szmax);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_chim_szmax)
 		hn_set_chim_size(sc, hn_tx_chimney_size);
 
 	csum_assist = 0;
 	if (sc->hn_caps & HN_CAP_IPCS)
 		csum_assist |= CSUM_IP;
 	if (sc->hn_caps & HN_CAP_TCP4CS)
 		csum_assist |= CSUM_IP_TCP;
 	if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs)
 		csum_assist |= CSUM_IP_UDP;
 	if (sc->hn_caps & HN_CAP_TCP6CS)
 		csum_assist |= CSUM_IP6_TCP;
 	if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs)
 		csum_assist |= CSUM_IP6_UDP;
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
 
 	if (sc->hn_caps & HN_CAP_HASHVAL) {
 		/*
 		 * Support HASHVAL pktinfo on TX path.
 		 */
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
 		for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 			sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
 	}
 }
 
 static void
 hn_fixup_rx_data(struct hn_softc *sc)
 {
 
 	if (sc->hn_caps & HN_CAP_UDPHASH) {
 		int i;
 
 		for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 			sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH;
 	}
 }
 
 static void
 hn_destroy_tx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_chim != NULL) {
 		if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
 			contigfree(sc->hn_chim, HN_CHIM_SIZE, M_DEVBUF);
 		} else {
 			device_printf(sc->hn_dev,
 			    "chimney sending buffer is referenced");
 		}
 		sc->hn_chim = NULL;
 	}
 
 	if (sc->hn_tx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
 
 	free(sc->hn_tx_ring, M_DEVBUF);
 	sc->hn_tx_ring = NULL;
 
 	sc->hn_tx_ring_cnt = 0;
 	sc->hn_tx_ring_inuse = 0;
 }
 
 #ifdef HN_IFSTART_SUPPORT
 
 static void
 hn_start_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static int
 hn_start_locked(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	if_t ifp = sc->hn_ifp;
 	int sched = 0;
 
 	KASSERT(hn_use_if_start,
 	    ("hn_start_locked is called, when if_start is disabled"));
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	if (__predict_false(txr->hn_suspended))
 		return (0);
 
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (0);
 
 	while (!if_sendq_empty(ifp)) {
 		struct hn_txdesc *txd;
 		struct mbuf *m_head;
 		int error;
 
 		m_head = if_dequeue(ifp);
 		if (m_head == NULL)
 			break;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			if_sendq_prepend(ifp, m_head);
 			sched = 1;
 			break;
 		}
 
 #if defined(INET6) || defined(INET)
 		if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 			m_head = hn_tso_fixup(m_head);
 			if (__predict_false(m_head == NULL)) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		} else if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
 			m_head = hn_set_hlen(m_head);
 			if (__predict_false(m_head == NULL)) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		}
 #endif
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			if_sendq_prepend(ifp, m_head);
 			if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
 			break;
 		}
 
 		error = hn_encap(ifp, txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed */
 			KASSERT(txr->hn_agg_txd == NULL,
 			    ("encap failed w/ pending aggregating txdesc"));
 			continue;
 		}
 
 		if (txr->hn_agg_pktleft == 0) {
 			if (txr->hn_agg_txd != NULL) {
 				KASSERT(m_head == NULL,
 				    ("pending mbuf for aggregating txdesc"));
 				error = hn_flush_txagg(ifp, txr);
 				if (__predict_false(error)) {
 					if_setdrvflagbits(ifp,
 					    IFF_DRV_OACTIVE, 0);
 					break;
 				}
 			} else {
 				KASSERT(m_head != NULL, ("mbuf was freed"));
 				error = hn_txpkt(ifp, txr, txd);
 				if (__predict_false(error)) {
 					/* txd is freed, but m_head is not */
 					if_sendq_prepend(ifp, m_head);
 					if_setdrvflagbits(ifp,
 					    IFF_DRV_OACTIVE, 0);
 					break;
 				}
 			}
 		}
 #ifdef INVARIANTS
 		else {
 			KASSERT(txr->hn_agg_txd != NULL,
 			    ("no aggregating txdesc"));
 			KASSERT(m_head == NULL,
 			    ("pending mbuf for aggregating txdesc"));
 		}
 #endif
 	}
 
 	/* Flush pending aggerated transmission. */
 	if (txr->hn_agg_txd != NULL)
 		hn_flush_txagg(ifp, txr);
 	return (sched);
 }
 
 static void
 hn_start(if_t ifp)
 {
 	struct hn_softc *sc = if_getsoftc(ifp);
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 }
 
 static void
 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	if_setdrvflagbits(txr->hn_sc->hn_ifp, 0, IFF_DRV_OACTIVE);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_start_txeof(struct hn_tx_ring *txr)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	if_t ifp = sc->hn_ifp;
 
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the OACTIVE earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * flag again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 #endif	/* HN_IFSTART_SUPPORT */
 
 static int
 hn_xmit(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	if_t ifp = sc->hn_ifp;
 	struct mbuf *m_head;
 	int sched = 0;
 
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 #ifdef HN_IFSTART_SUPPORT
 	KASSERT(hn_use_if_start == 0,
 	    ("hn_xmit is called, when if_start is enabled"));
 #endif
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	if (__predict_false(txr->hn_suspended))
 		return (0);
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
 		return (0);
 
 	while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
 		struct hn_txdesc *txd;
 		int error;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			sched = 1;
 			break;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		error = hn_encap(ifp, txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed; discard */
 			KASSERT(txr->hn_agg_txd == NULL,
 			    ("encap failed w/ pending aggregating txdesc"));
 			drbr_advance(ifp, txr->hn_mbuf_br);
 			continue;
 		}
 
 		if (txr->hn_agg_pktleft == 0) {
 			if (txr->hn_agg_txd != NULL) {
 				KASSERT(m_head == NULL,
 				    ("pending mbuf for aggregating txdesc"));
 				error = hn_flush_txagg(ifp, txr);
 				if (__predict_false(error)) {
 					txr->hn_oactive = 1;
 					break;
 				}
 			} else {
 				KASSERT(m_head != NULL, ("mbuf was freed"));
 				error = hn_txpkt(ifp, txr, txd);
 				if (__predict_false(error)) {
 					/* txd is freed, but m_head is not */
 					drbr_putback(ifp, txr->hn_mbuf_br,
 					    m_head);
 					txr->hn_oactive = 1;
 					break;
 				}
 			}
 		}
 #ifdef INVARIANTS
 		else {
 			KASSERT(txr->hn_agg_txd != NULL,
 			    ("no aggregating txdesc"));
 			KASSERT(m_head == NULL,
 			    ("pending mbuf for aggregating txdesc"));
 		}
 #endif
 
 		/* Sent */
 		drbr_advance(ifp, txr->hn_mbuf_br);
 	}
 
 	/* Flush pending aggerated transmission. */
 	if (txr->hn_agg_txd != NULL)
 		hn_flush_txagg(ifp, txr);
 	return (sched);
 }
 
 static int
 hn_transmit(if_t ifp, struct mbuf *m)
 {
 	struct hn_softc *sc = if_getsoftc(ifp);
 	struct hn_tx_ring *txr;
 	int error, idx = 0;
 
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
 		struct rm_priotracker pt;
 
 		rm_rlock(&sc->hn_vf_lock, &pt);
 		if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
 			struct mbuf *m_bpf = NULL;
 			int obytes, omcast;
 
 			obytes = m->m_pkthdr.len;
 			omcast = (m->m_flags & M_MCAST) != 0;
 
 			if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) {
-				if (bpf_peers_present(if_getbpf(ifp))) {
+				if (bpf_peers_present_if(ifp)) {
 					m_bpf = m_copypacket(m, M_NOWAIT);
 					if (m_bpf == NULL) {
 						/*
 						 * Failed to grab a shallow
 						 * copy; tap now.
 						 */
 						ETHER_BPF_MTAP(ifp, m);
 					}
 				}
 			} else {
 				ETHER_BPF_MTAP(ifp, m);
 			}
 
 			error = if_transmit(sc->hn_vf_ifp, m);
 			rm_runlock(&sc->hn_vf_lock, &pt);
 
 			if (m_bpf != NULL) {
 				if (!error)
 					ETHER_BPF_MTAP(ifp, m_bpf);
 				m_freem(m_bpf);
 			}
 
 			if (error == ENOBUFS) {
 				if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 			} else if (error) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			} else {
 				if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 				if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes);
 				if (omcast) {
 					if_inc_counter(ifp, IFCOUNTER_OMCASTS,
 					    omcast);
 				}
 			}
 			return (error);
 		}
 		rm_runlock(&sc->hn_vf_lock, &pt);
 	}
 
 #if defined(INET6) || defined(INET)
 	/*
 	 * Perform TSO packet header fixup or get l2/l3 header length now,
 	 * since packet headers should be cache-hot.
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		m = hn_tso_fixup(m);
 		if (__predict_false(m == NULL)) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return EIO;
 		}
 	} else if (m->m_pkthdr.csum_flags &
 	    (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
 		m = hn_set_hlen(m);
 		if (__predict_false(m == NULL)) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return EIO;
 		}
 	}
 #endif
 
 	/*
 	 * Select the TX ring based on flowid
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 #ifdef RSS
 		uint32_t bid;
 
 		if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
 		    &bid) == 0)
 			idx = bid % sc->hn_tx_ring_inuse;
 		else
 #endif
 		{
 #if defined(INET6) || defined(INET)
 			int tcpsyn = 0;
 
 			if (m->m_pkthdr.len < 128 &&
 			    (m->m_pkthdr.csum_flags &
 			     (CSUM_IP_TCP | CSUM_IP6_TCP)) &&
 			    (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
 				m = hn_check_tcpsyn(m, &tcpsyn);
 				if (__predict_false(m == NULL)) {
 					if_inc_counter(ifp,
 					    IFCOUNTER_OERRORS, 1);
 					return (EIO);
 				}
 			}
 #else
 			const int tcpsyn = 0;
 #endif
 			if (tcpsyn)
 				idx = 0;
 			else
 				idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
 		}
 	}
 	txr = &sc->hn_tx_ring[idx];
 
 	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
 	if (error) {
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		return error;
 	}
 
 	if (txr->hn_oactive)
 		return 0;
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return 0;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 	return 0;
 }
 
 static void
 hn_tx_ring_qflush(struct hn_tx_ring *txr)
 {
 	struct mbuf *m;
 
 	mtx_lock(&txr->hn_tx_lock);
 	while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
 		m_freem(m);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_qflush(if_t ifp)
 {
 	struct hn_softc *sc = if_getsoftc(ifp);
 	struct rm_priotracker pt;
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
 	if_qflush(ifp);
 
 	rm_rlock(&sc->hn_vf_lock, &pt);
 	if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
 		if_qflush(sc->hn_vf_ifp);
 	rm_runlock(&sc->hn_vf_lock, &pt);
 }
 
 static void
 hn_xmit_txeof(struct hn_tx_ring *txr)
 {
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		txr->hn_oactive = 0;
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the oactive earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * oactive again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		txr->hn_oactive = 0;
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_xmit_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	txr->hn_oactive = 0;
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static int
 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct vmbus_chan_br cbr;
 	struct hn_rx_ring *rxr;
 	struct hn_tx_ring *txr = NULL;
 	int idx, error;
 
 	idx = vmbus_chan_subidx(chan);
 
 	/*
 	 * Link this channel to RX/TX ring.
 	 */
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
 	    ("RX ring %d already attached", idx));
 	rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
 	rxr->hn_chan = chan;
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
 		    idx, vmbus_chan_id(chan));
 	}
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		txr = &sc->hn_tx_ring[idx];
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
 		    ("TX ring %d already attached", idx));
 		txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
 
 		txr->hn_chan = chan;
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
 			    idx, vmbus_chan_id(chan));
 		}
 	}
 
 	/* Bind this channel to a proper CPU. */
 	vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
 
 	/*
 	 * Open this channel
 	 */
 	cbr.cbr = rxr->hn_br;
 	cbr.cbr_paddr = pmap_kextract((vm_offset_t)rxr->hn_br);
 	cbr.cbr_txsz = HN_TXBR_SIZE;
 	cbr.cbr_rxsz = HN_RXBR_SIZE;
 	error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
 	if (error) {
 		if (error == EISCONN) {
 			if_printf(sc->hn_ifp, "bufring is connected after "
 			    "chan%u open failure\n", vmbus_chan_id(chan));
 			rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
 		} else {
 			if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
 			    vmbus_chan_id(chan), error);
 		}
 	}
 	return (error);
 }
 
 static void
 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct hn_rx_ring *rxr;
 	int idx, error;
 
 	idx = vmbus_chan_subidx(chan);
 
 	/*
 	 * Link this channel to RX/TX ring.
 	 */
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
 	    ("RX ring %d is not attached", idx));
 	rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
 
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
 		    ("TX ring %d is not attached attached", idx));
 		txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
 	}
 
 	/*
 	 * Close this channel.
 	 *
 	 * NOTE:
 	 * Channel closing does _not_ destroy the target channel.
 	 */
 	error = vmbus_chan_close_direct(chan);
 	if (error == EISCONN) {
 		if_printf(sc->hn_ifp, "chan%u bufring is connected "
 		    "after being closed\n", vmbus_chan_id(chan));
 		rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
 	} else if (error) {
 		if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
 		    vmbus_chan_id(chan), error);
 	}
 }
 
 static int
 hn_attach_subchans(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
 	int i, error = 0;
 
 	KASSERT(subchan_cnt > 0, ("no sub-channels"));
 
 	/* Attach the sub-channels. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 	for (i = 0; i < subchan_cnt; ++i) {
 		int error1;
 
 		error1 = hn_chan_attach(sc, subchans[i]);
 		if (error1) {
 			error = error1;
 			/* Move on; all channels will be detached later. */
 		}
 	}
 	vmbus_subchan_rel(subchans, subchan_cnt);
 
 	if (error) {
 		if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
 	} else {
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "%d sub-channels attached\n",
 			    subchan_cnt);
 		}
 	}
 	return (error);
 }
 
 static void
 hn_detach_allchans(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
 	int i;
 
 	if (subchan_cnt == 0)
 		goto back;
 
 	/* Detach the sub-channels. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 	for (i = 0; i < subchan_cnt; ++i)
 		hn_chan_detach(sc, subchans[i]);
 	vmbus_subchan_rel(subchans, subchan_cnt);
 
 back:
 	/*
 	 * Detach the primary channel, _after_ all sub-channels
 	 * are detached.
 	 */
 	hn_chan_detach(sc, sc->hn_prichan);
 
 	/* Wait for sub-channels to be destroyed, if any. */
 	vmbus_subchan_drain(sc->hn_prichan);
 
 #ifdef INVARIANTS
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
 		    HN_RX_FLAG_ATTACHED) == 0,
 		    ("%dth RX ring is still attached", i));
 	}
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
 		    HN_TX_FLAG_ATTACHED) == 0,
 		    ("%dth TX ring is still attached", i));
 	}
 #endif
 }
 
 static int
 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
 {
 	struct vmbus_channel **subchans;
 	int nchan, rxr_cnt, error;
 
 	nchan = *nsubch + 1;
 	if (nchan == 1) {
 		/*
 		 * Multiple RX/TX rings are not requested.
 		 */
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
 	 * table entries.
 	 */
 	error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
 	if (error) {
 		/* No RSS; this is benign. */
 		*nsubch = 0;
 		return (0);
 	}
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
 		    rxr_cnt, nchan);
 	}
 
 	if (nchan > rxr_cnt)
 		nchan = rxr_cnt;
 	if (nchan == 1) {
 		if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Allocate sub-channels from NVS.
 	 */
 	*nsubch = nchan - 1;
 	error = hn_nvs_alloc_subchans(sc, nsubch);
 	if (error || *nsubch == 0) {
 		/* Failed to allocate sub-channels. */
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Wait for all sub-channels to become ready before moving on.
 	 */
 	subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
 	vmbus_subchan_rel(subchans, *nsubch);
 	return (0);
 }
 
 static bool
 hn_synth_attachable(const struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_flags & HN_FLAG_ERRORS)
 		return (false);
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
 			return (false);
 	}
 	return (true);
 }
 
 /*
  * Make sure that the RX filter is zero after the successful
  * RNDIS initialization.
  *
  * NOTE:
  * Under certain conditions on certain versions of Hyper-V,
  * the RNDIS rxfilter is _not_ zero on the hypervisor side
  * after the successful RNDIS initialization, which breaks
  * the assumption of any following code (well, it breaks the
  * RNDIS API contract actually).  Clear the RNDIS rxfilter
  * explicitly, drain packets sneaking through, and drain the
  * interrupt taskqueues scheduled due to the stealth packets.
  */
 static void
 hn_rndis_init_fixat(struct hn_softc *sc, int nchan)
 {
 
 	hn_disable_rx(sc);
 	hn_drain_rxtx(sc, nchan);
 }
 
 static int
 hn_synth_attach(struct hn_softc *sc, int mtu)
 {
 #define ATTACHED_NVS		0x0002
 #define ATTACHED_RNDIS		0x0004
 
 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
 	int error, nsubch, nchan = 1, i, rndis_inited;
 	uint32_t old_caps, attached = 0;
 
 	KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
 	    ("synthetic parts were attached"));
 
 	if (!hn_synth_attachable(sc))
 		return (ENXIO);
 
 	/* Save capabilities for later verification. */
 	old_caps = sc->hn_caps;
 	sc->hn_caps = 0;
 
 	/* Clear RSS stuffs. */
 	sc->hn_rss_ind_size = 0;
 	sc->hn_rss_hash = 0;
 	sc->hn_rss_hcap = 0;
 
 	/*
 	 * Attach the primary channel _before_ attaching NVS and RNDIS.
 	 */
 	error = hn_chan_attach(sc, sc->hn_prichan);
 	if (error)
 		goto failed;
 
 	/*
 	 * Attach NVS.
 	 */
 	error = hn_nvs_attach(sc, mtu);
 	if (error)
 		goto failed;
 	attached |= ATTACHED_NVS;
 
 	/*
 	 * Attach RNDIS _after_ NVS is attached.
 	 */
 	error = hn_rndis_attach(sc, mtu, &rndis_inited);
 	if (rndis_inited)
 		attached |= ATTACHED_RNDIS;
 	if (error)
 		goto failed;
 
 	/*
 	 * Make sure capabilities are not changed.
 	 */
 	if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
 		if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
 		    old_caps, sc->hn_caps);
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Allocate sub-channels for multi-TX/RX rings.
 	 *
 	 * NOTE:
 	 * The # of RX rings that can be used is equivalent to the # of
 	 * channels to be requested.
 	 */
 	nsubch = sc->hn_rx_ring_cnt - 1;
 	error = hn_synth_alloc_subchans(sc, &nsubch);
 	if (error)
 		goto failed;
 	/* NOTE: _Full_ synthetic parts detach is required now. */
 	sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
 
 	/*
 	 * Set the # of TX/RX rings that could be used according to
 	 * the # of channels that NVS offered.
 	 */
 	nchan = nsubch + 1;
 	hn_set_ring_inuse(sc, nchan);
 	if (nchan == 1) {
 		/* Only the primary channel can be used; done */
 		goto back;
 	}
 
 	/*
 	 * Attach the sub-channels.
 	 *
 	 * NOTE: hn_set_ring_inuse() _must_ have been called.
 	 */
 	error = hn_attach_subchans(sc);
 	if (error)
 		goto failed;
 
 	/*
 	 * Configure RSS key and indirect table _after_ all sub-channels
 	 * are attached.
 	 */
 	if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
 		/*
 		 * RSS key is not set yet; set it to the default RSS key.
 		 */
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "setup default RSS key\n");
 #ifdef RSS
 		rss_getkey(rss->rss_key);
 #else
 		memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
 #endif
 		sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 	}
 
 	if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
 		/*
 		 * RSS indirect table is not set yet; set it up in round-
 		 * robin fashion.
 		 */
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "setup default RSS indirect "
 			    "table\n");
 		}
 		for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
 			uint32_t subidx;
 
 #ifdef RSS
 			subidx = rss_get_indirection_to_bucket(i);
 #else
 			subidx = i;
 #endif
 			rss->rss_ind[i] = subidx % nchan;
 		}
 		sc->hn_flags |= HN_FLAG_HAS_RSSIND;
 	} else {
 		/*
 		 * # of usable channels may be changed, so we have to
 		 * make sure that all entries in RSS indirect table
 		 * are valid.
 		 *
 		 * NOTE: hn_set_ring_inuse() _must_ have been called.
 		 */
 		hn_rss_ind_fixup(sc);
 	}
 
 	sc->hn_rss_hash = sc->hn_rss_hcap;
 	if ((sc->hn_flags & HN_FLAG_RXVF) ||
 	    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
 		/* NOTE: Don't reconfigure RSS; will do immediately. */
 		hn_vf_rss_fixup(sc, false);
 	}
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
 	if (error)
 		goto failed;
 back:
 	/*
 	 * Fixup transmission aggregation setup.
 	 */
 	hn_set_txagg(sc);
 	hn_rndis_init_fixat(sc, nchan);
 	return (0);
 
 failed:
 	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
 		hn_rndis_init_fixat(sc, nchan);
 		hn_synth_detach(sc);
 	} else {
 		if (attached & ATTACHED_RNDIS) {
 			hn_rndis_init_fixat(sc, nchan);
 			hn_rndis_detach(sc);
 		}
 		if (attached & ATTACHED_NVS)
 			hn_nvs_detach(sc);
 		hn_chan_detach(sc, sc->hn_prichan);
 		/* Restore old capabilities. */
 		sc->hn_caps = old_caps;
 	}
 	return (error);
 
 #undef ATTACHED_RNDIS
 #undef ATTACHED_NVS
 }
 
 /*
  * NOTE:
  * The interface must have been suspended though hn_suspend(), before
  * this function get called.
  */
 static void
 hn_synth_detach(struct hn_softc *sc)
 {
 
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("synthetic parts were not attached"));
 
 	/* Detach the RNDIS first. */
 	hn_rndis_detach(sc);
 
 	/* Detach NVS. */
 	hn_nvs_detach(sc);
 
 	/* Detach all of the channels. */
 	hn_detach_allchans(sc);
 
 	if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) {
 		/*
 		 * Host is post-Win2016, disconnect RXBUF from primary channel here.
 		 */
 		int error;
 
 		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
 		    sc->hn_rxbuf_gpadl);
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "rxbuf gpadl disconn failed: %d\n", error);
 			sc->hn_flags |= HN_FLAG_RXBUF_REF;
 		}
 		sc->hn_rxbuf_gpadl = 0;
 	}
 
 	if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) {
 		/*
 		 * Host is post-Win2016, disconnect chimney sending buffer from
 		 * primary channel here.
 		 */
 		int error;
 
 		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
 		    sc->hn_chim_gpadl);
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "chim gpadl disconn failed: %d\n", error);
 			sc->hn_flags |= HN_FLAG_CHIM_REF;
 		}
 		sc->hn_chim_gpadl = 0;
 	}
 	sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
 }
 
 static void
 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
 {
 	KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
 	    ("invalid ring count %d", ring_cnt));
 
 	if (sc->hn_tx_ring_cnt > ring_cnt)
 		sc->hn_tx_ring_inuse = ring_cnt;
 	else
 		sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 	sc->hn_rx_ring_inuse = ring_cnt;
 
 #ifdef RSS
 	if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) {
 		if_printf(sc->hn_ifp, "# of RX rings (%d) does not match "
 		    "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse,
 		    rss_getnumbuckets());
 	}
 #endif
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
 		    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 	}
 }
 
 static void
 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 
 	/*
 	 * NOTE:
 	 * The TX bufring will not be drained by the hypervisor,
 	 * if the primary channel is revoked.
 	 */
 	while (!vmbus_chan_rx_empty(chan) ||
 	    (!vmbus_chan_is_revoked(sc->hn_prichan) &&
 	     !vmbus_chan_tx_empty(chan)))
 		pause("waitch", 1);
 	vmbus_chan_intr_drain(chan);
 }
 
 static void
 hn_disable_rx(struct hn_softc *sc)
 {
 
 	/*
 	 * Disable RX by clearing RX filter forcefully.
 	 */
 	sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
 	hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */
 
 	/*
 	 * Give RNDIS enough time to flush all pending data packets.
 	 */
 	pause("waitrx", (200 * hz) / 1000);
 }
 
 /*
  * NOTE:
  * RX/TX _must_ have been suspended/disabled, before this function
  * is called.
  */
 static void
 hn_drain_rxtx(struct hn_softc *sc, int nchan)
 {
 	struct vmbus_channel **subch = NULL;
 	int nsubch;
 
 	/*
 	 * Drain RX/TX bufrings and interrupts.
 	 */
 	nsubch = nchan - 1;
 	if (nsubch > 0)
 		subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
 
 	if (subch != NULL) {
 		int i;
 
 		for (i = 0; i < nsubch; ++i)
 			hn_chan_drain(sc, subch[i]);
 	}
 	hn_chan_drain(sc, sc->hn_prichan);
 
 	if (subch != NULL)
 		vmbus_subchan_rel(subch, nsubch);
 }
 
 static void
 hn_suspend_data(struct hn_softc *sc)
 {
 	struct hn_tx_ring *txr;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Suspend TX.
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_suspended = 1;
 		mtx_unlock(&txr->hn_tx_lock);
 		/* No one is able send more packets now. */
 
 		/*
 		 * Wait for all pending sends to finish.
 		 *
 		 * NOTE:
 		 * We will _not_ receive all pending send-done, if the
 		 * primary channel is revoked.
 		 */
 		while (hn_tx_ring_pending(txr) &&
 		    !vmbus_chan_is_revoked(sc->hn_prichan))
 			pause("hnwtx", 1 /* 1 tick */);
 	}
 
 	/*
 	 * Disable RX.
 	 */
 	hn_disable_rx(sc);
 
 	/*
 	 * Drain RX/TX.
 	 */
 	hn_drain_rxtx(sc, sc->hn_rx_ring_inuse);
 
 	/*
 	 * Drain any pending TX tasks.
 	 *
 	 * NOTE:
 	 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX
 	 * tasks will have to be drained _after_ the above hn_drain_rxtx().
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
 {
 
 	((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
 }
 
 static void
 hn_suspend_mgmt(struct hn_softc *sc)
 {
 	struct task task;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
 	 * through hn_mgmt_taskq.
 	 */
 	TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
 	vmbus_chan_run_task(sc->hn_prichan, &task);
 
 	/*
 	 * Make sure that all pending management tasks are completed.
 	 */
 	taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
 	taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
 	taskqueue_drain_all(sc->hn_mgmt_taskq0);
 }
 
 static void
 hn_suspend(struct hn_softc *sc)
 {
 
 	/* Disable polling. */
 	hn_polling(sc, 0);
 
 	/*
 	 * If the non-transparent mode VF is activated, the synthetic
 	 * device is receiving packets, so the data path of the
 	 * synthetic device must be suspended.
 	 */
 	if ((if_getdrvflags(sc->hn_ifp) & IFF_DRV_RUNNING) ||
 	    (sc->hn_flags & HN_FLAG_RXVF))
 		hn_suspend_data(sc);
 	hn_suspend_mgmt(sc);
 }
 
 static void
 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
 {
 	int i;
 
 	KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
 	    ("invalid TX ring count %d", tx_ring_cnt));
 
 	for (i = 0; i < tx_ring_cnt; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_suspended = 0;
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 }
 
 static void
 hn_resume_data(struct hn_softc *sc)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Re-enable RX.
 	 */
 	hn_rxfilter_config(sc);
 
 	/*
 	 * Make sure to clear suspend status on "all" TX rings,
 	 * since hn_tx_ring_inuse can be changed after
 	 * hn_suspend_data().
 	 */
 	hn_resume_tx(sc, sc->hn_tx_ring_cnt);
 
 #ifdef HN_IFSTART_SUPPORT
 	if (!hn_use_if_start)
 #endif
 	{
 		/*
 		 * Flush unused drbrs, since hn_tx_ring_inuse may be
 		 * reduced.
 		 */
 		for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
 			hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
 	}
 
 	/*
 	 * Kick start TX.
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		/*
 		 * Use txeof task, so that any pending oactive can be
 		 * cleared properly.
 		 */
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_resume_mgmt(struct hn_softc *sc)
 {
 
 	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
 
 	/*
 	 * Kick off network change detection, if it was pending.
 	 * If no network change was pending, start link status
 	 * checks, which is more lightweight than network change
 	 * detection.
 	 */
 	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
 		hn_change_network(sc);
 	else
 		hn_update_link_status(sc);
 }
 
 static void
 hn_resume(struct hn_softc *sc)
 {
 
 	/*
 	 * If the non-transparent mode VF is activated, the synthetic
 	 * device have to receive packets, so the data path of the
 	 * synthetic device must be resumed.
 	 */
 	if ((if_getdrvflags(sc->hn_ifp) & IFF_DRV_RUNNING) ||
 	    (sc->hn_flags & HN_FLAG_RXVF))
 		hn_resume_data(sc);
 
 	/*
 	 * Don't resume link status change if VF is attached/activated.
 	 * - In the non-transparent VF mode, the synthetic device marks
 	 *   link down until the VF is deactivated; i.e. VF is down.
 	 * - In transparent VF mode, VF's media status is used until
 	 *   the VF is detached.
 	 */
 	if ((sc->hn_flags & HN_FLAG_RXVF) == 0 &&
 	    !(hn_xpnt_vf && sc->hn_vf_ifp != NULL))
 		hn_resume_mgmt(sc);
 
 	/*
 	 * Re-enable polling if this interface is running and
 	 * the polling is requested.
 	 */
 	if ((if_getdrvflags(sc->hn_ifp) & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
 		hn_polling(sc, sc->hn_pollhz);
 }
 
 static void 
 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
 {
 	const struct rndis_status_msg *msg;
 	int ofs;
 
 	if (dlen < sizeof(*msg)) {
 		if_printf(sc->hn_ifp, "invalid RNDIS status\n");
 		return;
 	}
 	msg = data;
 
 	switch (msg->rm_status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
 		hn_update_link_status(sc);
 		break;
 
 	case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
 	case RNDIS_STATUS_LINK_SPEED_CHANGE:
 		/* Not really useful; ignore. */
 		break;
 
 	case RNDIS_STATUS_NETWORK_CHANGE:
 		ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
 		if (dlen < ofs + msg->rm_stbuflen ||
 		    msg->rm_stbuflen < sizeof(uint32_t)) {
 			if_printf(sc->hn_ifp, "network changed\n");
 		} else {
 			uint32_t change;
 
 			memcpy(&change, ((const uint8_t *)msg) + ofs,
 			    sizeof(change));
 			if_printf(sc->hn_ifp, "network changed, change %u\n",
 			    change);
 		}
 		hn_change_network(sc);
 		break;
 
 	default:
 		if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
 		    msg->rm_status);
 		break;
 	}
 }
 
 static int
 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
 {
 	const struct rndis_pktinfo *pi = info_data;
 	uint32_t mask = 0;
 
 	while (info_dlen != 0) {
 		const void *data;
 		uint32_t dlen;
 
 		if (__predict_false(info_dlen < sizeof(*pi)))
 			return (EINVAL);
 		if (__predict_false(info_dlen < pi->rm_size))
 			return (EINVAL);
 		info_dlen -= pi->rm_size;
 
 		if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
 			return (EINVAL);
 		if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
 			return (EINVAL);
 		dlen = pi->rm_size - pi->rm_pktinfooffset;
 		data = pi->rm_data;
 
 		if (pi->rm_internal == 1) {
 			switch (pi->rm_type) {
 			case NDIS_PKTINFO_IT_PKTINFO_ID:
 				if (__predict_false(dlen < NDIS_PKTINFOID_SZ))
 					return (EINVAL);
 				info->pktinfo_id =
 				    (const struct packet_info_id *)data;
 				mask |= HN_RXINFO_PKTINFO_ID;
 				break;
 
 			default:
 				goto next;
 			}
 		} else {
 			switch (pi->rm_type) {
 			case NDIS_PKTINFO_TYPE_VLAN:
 				if (__predict_false(dlen
 				    < NDIS_VLAN_INFO_SIZE))
 					return (EINVAL);
 				info->vlan_info = (const uint32_t *)data;
 				mask |= HN_RXINFO_VLAN;
 				break;
 
 			case NDIS_PKTINFO_TYPE_CSUM:
 				if (__predict_false(dlen
 				    < NDIS_RXCSUM_INFO_SIZE))
 					return (EINVAL);
 				info->csum_info = (const uint32_t *)data;
 				mask |= HN_RXINFO_CSUM;
 				break;
 
 			case HN_NDIS_PKTINFO_TYPE_HASHVAL:
 				if (__predict_false(dlen
 				    < HN_NDIS_HASH_VALUE_SIZE))
 					return (EINVAL);
 				info->hash_value = (const uint32_t *)data;
 				mask |= HN_RXINFO_HASHVAL;
 				break;
 
 			case HN_NDIS_PKTINFO_TYPE_HASHINF:
 				if (__predict_false(dlen
 				    < HN_NDIS_HASH_INFO_SIZE))
 					return (EINVAL);
 				info->hash_info = (const uint32_t *)data;
 				mask |= HN_RXINFO_HASHINF;
 				break;
 
 			default:
 				goto next;
 			}
 		}
 
 		if (mask == HN_RXINFO_ALL) {
 			/* All found; done */
 			break;
 		}
 next:
 		pi = (const struct rndis_pktinfo *)
 		    ((const uint8_t *)pi + pi->rm_size);
 	}
 
 	/*
 	 * Final fixup.
 	 * - If there is no hash value, invalidate the hash info.
 	 */
 	if ((mask & HN_RXINFO_HASHVAL) == 0)
 		info->hash_info = NULL;
 	return (0);
 }
 
 static __inline bool
 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
 {
 
 	if (off < check_off) {
 		if (__predict_true(off + len <= check_off))
 			return (false);
 	} else if (off > check_off) {
 		if (__predict_true(check_off + check_len <= off))
 			return (false);
 	}
 	return (true);
 }
 
 static __inline void
 hn_rsc_add_data(struct hn_rx_ring *rxr, const void *data,
 		uint32_t len, struct hn_rxinfo *info)
 {
 	uint32_t cnt = rxr->rsc.cnt;
 
 	if (cnt) {
 		rxr->rsc.pktlen += len;
 	} else {
 		rxr->rsc.vlan_info = info->vlan_info;
 		rxr->rsc.csum_info = info->csum_info;
 		rxr->rsc.hash_info = info->hash_info;
 		rxr->rsc.hash_value = info->hash_value;
 		rxr->rsc.pktlen = len;
 	}
 
 	rxr->rsc.frag_data[cnt] = data;
 	rxr->rsc.frag_len[cnt] = len;
 	rxr->rsc.cnt++;
 }
 
 static void
 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const struct rndis_packet_msg *pkt;
 	struct hn_rxinfo info;
 	int data_off, pktinfo_off, data_len, pktinfo_len;
 	bool rsc_more= false;
 
 	/*
 	 * Check length.
 	 */
 	if (__predict_false(dlen < sizeof(*pkt))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
 		return;
 	}
 	pkt = data;
 
 	if (__predict_false(dlen < pkt->rm_len)) {
 		if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
 		    "dlen %d, msglen %u\n", dlen, pkt->rm_len);
 		return;
 	}
 	if (__predict_false(pkt->rm_len <
 	    pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
 		    "msglen %u, data %u, oob %u, pktinfo %u\n",
 		    pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
 		    pkt->rm_pktinfolen);
 		return;
 	}
 	if (__predict_false(pkt->rm_datalen == 0)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
 		return;
 	}
 
 	/*
 	 * Check offests.
 	 */
 #define IS_OFFSET_INVALID(ofs)			\
 	((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN ||	\
 	 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
 
 	/* XXX Hyper-V does not meet data offset alignment requirement */
 	if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "data offset %u\n", pkt->rm_dataoffset);
 		return;
 	}
 	if (__predict_false(pkt->rm_oobdataoffset > 0 &&
 	    IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "oob offset %u\n", pkt->rm_oobdataoffset);
 		return;
 	}
 	if (__predict_true(pkt->rm_pktinfooffset > 0) &&
 	    __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "pktinfo offset %u\n", pkt->rm_pktinfooffset);
 		return;
 	}
 
 #undef IS_OFFSET_INVALID
 
 	data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
 	data_len = pkt->rm_datalen;
 	pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
 	pktinfo_len = pkt->rm_pktinfolen;
 
 	/*
 	 * Check OOB coverage.
 	 */
 	if (__predict_false(pkt->rm_oobdatalen != 0)) {
 		int oob_off, oob_len;
 
 		if_printf(rxr->hn_ifp, "got oobdata\n");
 		oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
 		oob_len = pkt->rm_oobdatalen;
 
 		if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overflow, msglen %u, oob abs %d len %d\n",
 			    pkt->rm_len, oob_off, oob_len);
 			return;
 		}
 
 		/*
 		 * Check against data.
 		 */
 		if (hn_rndis_check_overlap(oob_off, oob_len,
 		    data_off, data_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overlaps data, oob abs %d len %d, "
 			    "data abs %d len %d\n",
 			    oob_off, oob_len, data_off, data_len);
 			return;
 		}
 
 		/*
 		 * Check against pktinfo.
 		 */
 		if (pktinfo_len != 0 &&
 		    hn_rndis_check_overlap(oob_off, oob_len,
 		    pktinfo_off, pktinfo_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overlaps pktinfo, oob abs %d len %d, "
 			    "pktinfo abs %d len %d\n",
 			    oob_off, oob_len, pktinfo_off, pktinfo_len);
 			return;
 		}
 	}
 
 	/*
 	 * Check per-packet-info coverage and find useful per-packet-info.
 	 */
 	info.vlan_info = NULL;
 	info.csum_info = NULL;
 	info.hash_info = NULL;
 	info.pktinfo_id = NULL;
 
 	if (__predict_true(pktinfo_len != 0)) {
 		bool overlap;
 		int error;
 
 		if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "pktinfo overflow, msglen %u, "
 			    "pktinfo abs %d len %d\n",
 			    pkt->rm_len, pktinfo_off, pktinfo_len);
 			return;
 		}
 
 		/*
 		 * Check packet info coverage.
 		 */
 		overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
 		    data_off, data_len);
 		if (__predict_false(overlap)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "pktinfo overlap data, pktinfo abs %d len %d, "
 			    "data abs %d len %d\n",
 			    pktinfo_off, pktinfo_len, data_off, data_len);
 			return;
 		}
 
 		/*
 		 * Find useful per-packet-info.
 		 */
 		error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
 		    pktinfo_len, &info);
 		if (__predict_false(error)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
 			    "pktinfo\n");
 			return;
 		}
 	}
 
 	if (__predict_false(data_off + data_len > pkt->rm_len)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "data overflow, msglen %u, data abs %d len %d\n",
 		    pkt->rm_len, data_off, data_len);
 		return;
 	}
 
 	/* Identify RSC fragments, drop invalid packets */
 	if ((info.pktinfo_id != NULL) &&
 	    (info.pktinfo_id->flag & HN_NDIS_PKTINFO_SUBALLOC)) {
 		if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_1ST_FRAG) {
 			rxr->rsc.cnt = 0;
 			rxr->hn_rsc_pkts++;
 		} else if (rxr->rsc.cnt == 0)
 			goto drop;
 
 		rsc_more = true;
 
 		if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_LAST_FRAG)
 			rsc_more = false;
 
 		if (rsc_more && rxr->rsc.is_last)
 			goto drop;
 	} else {
 		rxr->rsc.cnt = 0;
 	}
 
 	if (__predict_false(rxr->rsc.cnt >= HN_NVS_RSC_MAX))
 		goto drop;
 
 	/* Store data in per rx ring structure */
 	hn_rsc_add_data(rxr,((const uint8_t *)pkt) + data_off,
 	    data_len, &info);
 
 	if (rsc_more)
 		return;
 
 	hn_rxpkt(rxr);
 	rxr->rsc.cnt = 0;
 	return;
 drop:
 	rxr->hn_rsc_drop++;
 	return;
 }
 
 static __inline void
 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const struct rndis_msghdr *hdr;
 
 	if (__predict_false(dlen < sizeof(*hdr))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
 		return;
 	}
 	hdr = data;
 
 	if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
 		/* Hot data path. */
 		hn_rndis_rx_data(rxr, data, dlen);
 		/* Done! */
 		return;
 	}
 
 	if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
 		hn_rndis_rx_status(if_getsoftc(rxr->hn_ifp), data, dlen);
 	else
 		hn_rndis_rx_ctrl(if_getsoftc(rxr->hn_ifp), data, dlen);
 }
 
 static void
 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
 {
 	const struct hn_nvs_hdr *hdr;
 
 	if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
 		if_printf(sc->hn_ifp, "invalid nvs notify\n");
 		return;
 	}
 	hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
 
 	if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
 		/* Useless; ignore */
 		return;
 	}
 	if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
 }
 
 static void
 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
     const struct vmbus_chanpkt_hdr *pkt)
 {
 	struct hn_nvs_sendctx *sndc;
 
 	sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
 	sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
 	    VMBUS_CHANPKT_DATALEN(pkt));
 	/*
 	 * NOTE:
 	 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
 	 * its callback.
 	 */
 }
 
 static void
 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
     const struct vmbus_chanpkt_hdr *pkthdr)
 {
 	struct epoch_tracker et;
 	const struct vmbus_chanpkt_rxbuf *pkt;
 	const struct hn_nvs_hdr *nvs_hdr;
 	int count, i, hlen;
 
 	if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
 		if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
 		return;
 	}
 	nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
 
 	/* Make sure that this is a RNDIS message. */
 	if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
 		if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
 		    nvs_hdr->nvs_type);
 		return;
 	}
 
 	hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
 	if (__predict_false(hlen < sizeof(*pkt))) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
 		return;
 	}
 	pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
 
 	if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
 		    pkt->cp_rxbuf_id);
 		return;
 	}
 
 	count = pkt->cp_rxbuf_cnt;
 	if (__predict_false(hlen <
 	    __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
 		return;
 	}
 
 	NET_EPOCH_ENTER(et);
 	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
 	for (i = 0; i < count; ++i) {
 		int ofs, len;
 
 		ofs = pkt->cp_rxbuf[i].rb_ofs;
 		len = pkt->cp_rxbuf[i].rb_len;
 		if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
 			if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
 			    "ofs %d, len %d\n", i, ofs, len);
 			continue;
 		}
 
 		rxr->rsc.is_last = (i == (count - 1));
 		hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
 	}
 	NET_EPOCH_EXIT(et);
 
 	/*
 	 * Ack the consumed RXBUF associated w/ this channel packet,
 	 * so that this RXBUF can be recycled by the hypervisor.
 	 */
 	hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
 }
 
 static void
 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
     uint64_t tid)
 {
 	struct hn_nvs_rndis_ack ack;
 	int retries, error;
 	
 	ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
 	ack.nvs_status = HN_NVS_STATUS_OK;
 
 	retries = 0;
 again:
 	error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
 	    VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
 	if (__predict_false(error == EAGAIN)) {
 		/*
 		 * NOTE:
 		 * This should _not_ happen in real world, since the
 		 * consumption of the TX bufring from the TX path is
 		 * controlled.
 		 */
 		if (rxr->hn_ack_failed == 0)
 			if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
 		rxr->hn_ack_failed++;
 		retries++;
 		if (retries < 10) {
 			DELAY(100);
 			goto again;
 		}
 		/* RXBUF leaks! */
 		if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
 	}
 }
 
 static void
 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
 {
 	struct hn_rx_ring *rxr = xrxr;
 	struct hn_softc *sc = if_getsoftc(rxr->hn_ifp);
 
 	for (;;) {
 		struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
 		int error, pktlen;
 
 		pktlen = rxr->hn_pktbuf_len;
 		error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
 		if (__predict_false(error == ENOBUFS)) {
 			void *nbuf;
 			int nlen;
 
 			/*
 			 * Expand channel packet buffer.
 			 *
 			 * XXX
 			 * Use M_WAITOK here, since allocation failure
 			 * is fatal.
 			 */
 			nlen = rxr->hn_pktbuf_len * 2;
 			while (nlen < pktlen)
 				nlen *= 2;
 			nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
 
 			if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
 			    rxr->hn_pktbuf_len, nlen);
 
 			free(rxr->hn_pktbuf, M_DEVBUF);
 			rxr->hn_pktbuf = nbuf;
 			rxr->hn_pktbuf_len = nlen;
 			/* Retry! */
 			continue;
 		} else if (__predict_false(error == EAGAIN)) {
 			/* No more channel packets; done! */
 			break;
 		}
 		KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
 
 		switch (pkt->cph_type) {
 		case VMBUS_CHANPKT_TYPE_COMP:
 			hn_nvs_handle_comp(sc, chan, pkt);
 			break;
 
 		case VMBUS_CHANPKT_TYPE_RXBUF:
 			hn_nvs_handle_rxbuf(rxr, chan, pkt);
 			break;
 
 		case VMBUS_CHANPKT_TYPE_INBAND:
 			hn_nvs_handle_notify(sc, pkt);
 			break;
 
 		default:
 			if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
 			    pkt->cph_type);
 			break;
 		}
 	}
 	hn_chan_rollup(rxr, rxr->hn_txr);
 }
 
 static void
 hn_sysinit(void *arg __unused)
 {
 	int i;
 
 	hn_udpcs_fixup = counter_u64_alloc(M_WAITOK);
 
 #ifdef HN_IFSTART_SUPPORT
 	/*
 	 * Don't use ifnet.if_start if transparent VF mode is requested;
 	 * mainly due to the IFF_DRV_OACTIVE flag.
 	 */
 	if (hn_xpnt_vf && hn_use_if_start) {
 		hn_use_if_start = 0;
 		printf("hn: tranparent VF mode, if_transmit will be used, "
 		    "instead of if_start\n");
 	}
 #endif
 	if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) {
 		printf("hn: invalid transparent VF attach routing "
 		    "wait timeout %d, reset to %d\n",
 		    hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN);
 		hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
 	}
 
 	/*
 	 * Initialize VF map.
 	 */
 	rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE);
 	hn_vfmap_size = HN_VFMAP_SIZE_DEF;
 	hn_vfmap = malloc(sizeof(if_t) * hn_vfmap_size, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 
 	/*
 	 * Fix the # of TX taskqueues.
 	 */
 	if (hn_tx_taskq_cnt <= 0)
 		hn_tx_taskq_cnt = 1;
 	else if (hn_tx_taskq_cnt > mp_ncpus)
 		hn_tx_taskq_cnt = mp_ncpus;
 
 	/*
 	 * Fix the TX taskqueue mode.
 	 */
 	switch (hn_tx_taskq_mode) {
 	case HN_TX_TASKQ_M_INDEP:
 	case HN_TX_TASKQ_M_GLOBAL:
 	case HN_TX_TASKQ_M_EVTTQ:
 		break;
 	default:
 		hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
 		break;
 	}
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
 		return;
 
 	hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
 	    M_DEVBUF, M_WAITOK);
 	for (i = 0; i < hn_tx_taskq_cnt; ++i) {
 		hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
 		    taskqueue_thread_enqueue, &hn_tx_taskque[i]);
 		taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
 		    "hn tx%d", i);
 	}
 }
 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL);
 
 static void
 hn_sysuninit(void *arg __unused)
 {
 
 	if (hn_tx_taskque != NULL) {
 		int i;
 
 		for (i = 0; i < hn_tx_taskq_cnt; ++i)
 			taskqueue_free(hn_tx_taskque[i]);
 		free(hn_tx_taskque, M_DEVBUF);
 	}
 
 	if (hn_vfmap != NULL)
 		free(hn_vfmap, M_DEVBUF);
 	rm_destroy(&hn_vfmap_lock);
 
 	counter_u64_free(hn_udpcs_fixup);
 }
 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL);
diff --git a/sys/dev/my/if_my.c b/sys/dev/my/if_my.c
index 2bf4573d337b..631c38df9dca 100644
--- a/sys/dev/my/if_my.c
+++ b/sys/dev/my/if_my.c
@@ -1,1761 +1,1761 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Written by: yen_cw@myson.com.tw
  * Copyright (c) 2002 Myson Technology Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Myson fast ethernet PCI NIC driver, available at: http://www.myson.com.tw/
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/types.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #define NBPFILTER	1
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/bpf.h>
 
 #include <vm/vm.h>		/* for vtophys */
 #include <vm/pmap.h>		/* for vtophys */
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 /*
  * #define MY_USEIOSPACE
  */
 
 static int      MY_USEIOSPACE = 1;
 
 #ifdef MY_USEIOSPACE
 #define MY_RES                  SYS_RES_IOPORT
 #define MY_RID                  MY_PCI_LOIO
 #else
 #define MY_RES                  SYS_RES_MEMORY
 #define MY_RID                  MY_PCI_LOMEM
 #endif
 
 #include <dev/my/if_myreg.h>
 
 /*
  * Various supported device vendors/types and their names.
  */
 struct my_type *my_info_tmp;
 static struct my_type my_devs[] = {
 	{MYSONVENDORID, MTD800ID, "Myson MTD80X Based Fast Ethernet Card"},
 	{MYSONVENDORID, MTD803ID, "Myson MTD80X Based Fast Ethernet Card"},
 	{MYSONVENDORID, MTD891ID, "Myson MTD89X Based Giga Ethernet Card"},
 	{0, 0, NULL}
 };
 
 /*
  * Various supported PHY vendors/types and their names. Note that this driver
  * will work with pretty much any MII-compliant PHY, so failure to positively
  * identify the chip is not a fatal error.
  */
 static struct my_type my_phys[] = {
 	{MysonPHYID0, MysonPHYID0, "<MYSON MTD981>"},
 	{SeeqPHYID0, SeeqPHYID0, "<SEEQ 80225>"},
 	{AhdocPHYID0, AhdocPHYID0, "<AHDOC 101>"},
 	{MarvellPHYID0, MarvellPHYID0, "<MARVELL 88E1000>"},
 	{LevelOnePHYID0, LevelOnePHYID0, "<LevelOne LXT1000>"},
 	{0, 0, "<MII-compliant physical interface>"}
 };
 
 static int      my_probe(device_t);
 static int      my_attach(device_t);
 static int      my_detach(device_t);
 static int      my_newbuf(struct my_softc *, struct my_chain_onefrag *);
 static int      my_encap(struct my_softc *, struct my_chain *, struct mbuf *);
 static void     my_rxeof(struct my_softc *);
 static void     my_txeof(struct my_softc *);
 static void     my_txeoc(struct my_softc *);
 static void     my_intr(void *);
 static void     my_start(if_t);
 static void     my_start_locked(if_t);
 static int      my_ioctl(if_t, u_long, caddr_t);
 static void     my_init(void *);
 static void     my_init_locked(struct my_softc *);
 static void     my_stop(struct my_softc *);
 static void     my_autoneg_timeout(void *);
 static void     my_watchdog(void *);
 static int      my_shutdown(device_t);
 static int      my_ifmedia_upd(if_t);
 static void     my_ifmedia_sts(if_t, struct ifmediareq *);
 static u_int16_t my_phy_readreg(struct my_softc *, int);
 static void     my_phy_writereg(struct my_softc *, int, int);
 static void     my_autoneg_xmit(struct my_softc *);
 static void     my_autoneg_mii(struct my_softc *, int, int);
 static void     my_setmode_mii(struct my_softc *, int);
 static void     my_getmode_mii(struct my_softc *);
 static void     my_setcfg(struct my_softc *, int);
 static void     my_setmulti(struct my_softc *);
 static void     my_reset(struct my_softc *);
 static int      my_list_rx_init(struct my_softc *);
 static int      my_list_tx_init(struct my_softc *);
 static long     my_send_cmd_to_phy(struct my_softc *, int, int);
 
 #define MY_SETBIT(sc, reg, x) CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) | (x))
 #define MY_CLRBIT(sc, reg, x) CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) & ~(x))
 
 static device_method_t my_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, my_probe),
 	DEVMETHOD(device_attach, my_attach),
 	DEVMETHOD(device_detach, my_detach),
 	DEVMETHOD(device_shutdown, my_shutdown),
 
 	DEVMETHOD_END
 };
 
 static driver_t my_driver = {
 	"my",
 	my_methods,
 	sizeof(struct my_softc)
 };
 
 DRIVER_MODULE(my, pci, my_driver, 0, 0);
 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, my, my_devs,
     nitems(my_devs) - 1);
 MODULE_DEPEND(my, pci, 1, 1, 1);
 MODULE_DEPEND(my, ether, 1, 1, 1);
 
 static long
 my_send_cmd_to_phy(struct my_softc * sc, int opcode, int regad)
 {
 	long            miir;
 	int             i;
 	int             mask, data;
 
 	MY_LOCK_ASSERT(sc);
 
 	/* enable MII output */
 	miir = CSR_READ_4(sc, MY_MANAGEMENT);
 	miir &= 0xfffffff0;
 
 	miir |= MY_MASK_MIIR_MII_WRITE + MY_MASK_MIIR_MII_MDO;
 
 	/* send 32 1's preamble */
 	for (i = 0; i < 32; i++) {
 		/* low MDC; MDO is already high (miir) */
 		miir &= ~MY_MASK_MIIR_MII_MDC;
 		CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 
 		/* high MDC */
 		miir |= MY_MASK_MIIR_MII_MDC;
 		CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 	}
 
 	/* calculate ST+OP+PHYAD+REGAD+TA */
 	data = opcode | (sc->my_phy_addr << 7) | (regad << 2);
 
 	/* sent out */
 	mask = 0x8000;
 	while (mask) {
 		/* low MDC, prepare MDO */
 		miir &= ~(MY_MASK_MIIR_MII_MDC + MY_MASK_MIIR_MII_MDO);
 		if (mask & data)
 			miir |= MY_MASK_MIIR_MII_MDO;
 
 		CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 		/* high MDC */
 		miir |= MY_MASK_MIIR_MII_MDC;
 		CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 		DELAY(30);
 
 		/* next */
 		mask >>= 1;
 		if (mask == 0x2 && opcode == MY_OP_READ)
 			miir &= ~MY_MASK_MIIR_MII_WRITE;
 	}
 
 	return miir;
 }
 
 static u_int16_t
 my_phy_readreg(struct my_softc * sc, int reg)
 {
 	long            miir;
 	int             mask, data;
 
 	MY_LOCK_ASSERT(sc);
 
 	if (sc->my_info->my_did == MTD803ID)
 		data = CSR_READ_2(sc, MY_PHYBASE + reg * 2);
 	else {
 		miir = my_send_cmd_to_phy(sc, MY_OP_READ, reg);
 
 		/* read data */
 		mask = 0x8000;
 		data = 0;
 		while (mask) {
 			/* low MDC */
 			miir &= ~MY_MASK_MIIR_MII_MDC;
 			CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 
 			/* read MDI */
 			miir = CSR_READ_4(sc, MY_MANAGEMENT);
 			if (miir & MY_MASK_MIIR_MII_MDI)
 				data |= mask;
 
 			/* high MDC, and wait */
 			miir |= MY_MASK_MIIR_MII_MDC;
 			CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 			DELAY(30);
 
 			/* next */
 			mask >>= 1;
 		}
 
 		/* low MDC */
 		miir &= ~MY_MASK_MIIR_MII_MDC;
 		CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 	}
 
 	return (u_int16_t) data;
 }
 
 static void
 my_phy_writereg(struct my_softc * sc, int reg, int data)
 {
 	long            miir;
 	int             mask;
 
 	MY_LOCK_ASSERT(sc);
 
 	if (sc->my_info->my_did == MTD803ID)
 		CSR_WRITE_2(sc, MY_PHYBASE + reg * 2, data);
 	else {
 		miir = my_send_cmd_to_phy(sc, MY_OP_WRITE, reg);
 
 		/* write data */
 		mask = 0x8000;
 		while (mask) {
 			/* low MDC, prepare MDO */
 			miir &= ~(MY_MASK_MIIR_MII_MDC + MY_MASK_MIIR_MII_MDO);
 			if (mask & data)
 				miir |= MY_MASK_MIIR_MII_MDO;
 			CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 			DELAY(1);
 
 			/* high MDC */
 			miir |= MY_MASK_MIIR_MII_MDC;
 			CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 			DELAY(1);
 
 			/* next */
 			mask >>= 1;
 		}
 
 		/* low MDC */
 		miir &= ~MY_MASK_MIIR_MII_MDC;
 		CSR_WRITE_4(sc, MY_MANAGEMENT, miir);
 	}
 	return;
 }
 
 static u_int
 my_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
 {
 	uint32_t *hashes = arg;
 	int h;
 
 	h = ~ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN) >> 26;
 	if (h < 32)
 		hashes[0] |= (1 << h);
 	else
 		hashes[1] |= (1 << (h - 32));
 
 	return (1);
 }
 /*
  * Program the 64-bit multicast hash filter.
  */
 static void
 my_setmulti(struct my_softc * sc)
 {
 	if_t		ifp;
 	u_int32_t       hashes[2] = {0, 0};
 	u_int32_t       rxfilt;
 
 	MY_LOCK_ASSERT(sc);
 
 	ifp = sc->my_ifp;
 
 	rxfilt = CSR_READ_4(sc, MY_TCRRCR);
 
 	if (if_getflags(ifp) & IFF_ALLMULTI || if_getflags(ifp) & IFF_PROMISC) {
 		rxfilt |= MY_AM;
 		CSR_WRITE_4(sc, MY_TCRRCR, rxfilt);
 		CSR_WRITE_4(sc, MY_MAR0, 0xFFFFFFFF);
 		CSR_WRITE_4(sc, MY_MAR1, 0xFFFFFFFF);
 
 		return;
 	}
 	/* first, zot all the existing hash bits */
 	CSR_WRITE_4(sc, MY_MAR0, 0);
 	CSR_WRITE_4(sc, MY_MAR1, 0);
 
 	/* now program new ones */
 	if (if_foreach_llmaddr(ifp, my_hash_maddr, hashes) > 0)
 		rxfilt |= MY_AM;
 	else
 		rxfilt &= ~MY_AM;
 	CSR_WRITE_4(sc, MY_MAR0, hashes[0]);
 	CSR_WRITE_4(sc, MY_MAR1, hashes[1]);
 	CSR_WRITE_4(sc, MY_TCRRCR, rxfilt);
 }
 
 /*
  * Initiate an autonegotiation session.
  */
 static void
 my_autoneg_xmit(struct my_softc * sc)
 {
 	u_int16_t       phy_sts = 0;
 
 	MY_LOCK_ASSERT(sc);
 
 	my_phy_writereg(sc, PHY_BMCR, PHY_BMCR_RESET);
 	DELAY(500);
 	while (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_RESET);
 
 	phy_sts = my_phy_readreg(sc, PHY_BMCR);
 	phy_sts |= PHY_BMCR_AUTONEGENBL | PHY_BMCR_AUTONEGRSTR;
 	my_phy_writereg(sc, PHY_BMCR, phy_sts);
 
 	return;
 }
 
 static void
 my_autoneg_timeout(void *arg)
 {
 	struct my_softc *sc;
 
 	sc = arg;
 	MY_LOCK_ASSERT(sc);
 	my_autoneg_mii(sc, MY_FLAG_DELAYTIMEO, 1);
 }
 
 /*
  * Invoke autonegotiation on a PHY.
  */
 static void
 my_autoneg_mii(struct my_softc * sc, int flag, int verbose)
 {
 	u_int16_t       phy_sts = 0, media, advert, ability;
 	u_int16_t       ability2 = 0;
 	if_t		ifp;
 	struct ifmedia *ifm;
 
 	MY_LOCK_ASSERT(sc);
 
 	ifm = &sc->ifmedia;
 	ifp = sc->my_ifp;
 
 	ifm->ifm_media = IFM_ETHER | IFM_AUTO;
 
 #ifndef FORCE_AUTONEG_TFOUR
 	/*
 	 * First, see if autoneg is supported. If not, there's no point in
 	 * continuing.
 	 */
 	phy_sts = my_phy_readreg(sc, PHY_BMSR);
 	if (!(phy_sts & PHY_BMSR_CANAUTONEG)) {
 		if (verbose)
 			device_printf(sc->my_dev,
 			    "autonegotiation not supported\n");
 		ifm->ifm_media = IFM_ETHER | IFM_10_T | IFM_HDX;
 		return;
 	}
 #endif
 	switch (flag) {
 	case MY_FLAG_FORCEDELAY:
 		/*
 		 * XXX Never use this option anywhere but in the probe
 		 * routine: making the kernel stop dead in its tracks for
 		 * three whole seconds after we've gone multi-user is really
 		 * bad manners.
 		 */
 		my_autoneg_xmit(sc);
 		DELAY(5000000);
 		break;
 	case MY_FLAG_SCHEDDELAY:
 		/*
 		 * Wait for the transmitter to go idle before starting an
 		 * autoneg session, otherwise my_start() may clobber our
 		 * timeout, and we don't want to allow transmission during an
 		 * autoneg session since that can screw it up.
 		 */
 		if (sc->my_cdata.my_tx_head != NULL) {
 			sc->my_want_auto = 1;
 			MY_UNLOCK(sc);
 			return;
 		}
 		my_autoneg_xmit(sc);
 		callout_reset(&sc->my_autoneg_timer, hz * 5, my_autoneg_timeout,
 		    sc);
 		sc->my_autoneg = 1;
 		sc->my_want_auto = 0;
 		return;
 	case MY_FLAG_DELAYTIMEO:
 		callout_stop(&sc->my_autoneg_timer);
 		sc->my_autoneg = 0;
 		break;
 	default:
 		device_printf(sc->my_dev, "invalid autoneg flag: %d\n", flag);
 		return;
 	}
 
 	if (my_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_AUTONEGCOMP) {
 		if (verbose)
 			device_printf(sc->my_dev, "autoneg complete, ");
 		phy_sts = my_phy_readreg(sc, PHY_BMSR);
 	} else {
 		if (verbose)
 			device_printf(sc->my_dev, "autoneg not complete, ");
 	}
 
 	media = my_phy_readreg(sc, PHY_BMCR);
 
 	/* Link is good. Report modes and set duplex mode. */
 	if (my_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_LINKSTAT) {
 		if (verbose)
 			device_printf(sc->my_dev, "link status good. ");
 		advert = my_phy_readreg(sc, PHY_ANAR);
 		ability = my_phy_readreg(sc, PHY_LPAR);
 		if ((sc->my_pinfo->my_vid == MarvellPHYID0) ||
 		    (sc->my_pinfo->my_vid == LevelOnePHYID0)) {
 			ability2 = my_phy_readreg(sc, PHY_1000SR);
 			if (ability2 & PHY_1000SR_1000BTXFULL) {
 				advert = 0;
 				ability = 0;
 				/*
 				 * this version did not support 1000M,
 				 * ifm->ifm_media =
 				 * IFM_ETHER|IFM_1000_T|IFM_FDX;
 				 */
 				ifm->ifm_media =
 				    IFM_ETHER | IFM_100_TX | IFM_FDX;
 				media &= ~PHY_BMCR_SPEEDSEL;
 				media |= PHY_BMCR_1000;
 				media |= PHY_BMCR_DUPLEX;
 				printf("(full-duplex, 1000Mbps)\n");
 			} else if (ability2 & PHY_1000SR_1000BTXHALF) {
 				advert = 0;
 				ability = 0;
 				/*
 				 * this version did not support 1000M,
 				 * ifm->ifm_media = IFM_ETHER|IFM_1000_T;
 				 */
 				ifm->ifm_media = IFM_ETHER | IFM_100_TX;
 				media &= ~PHY_BMCR_SPEEDSEL;
 				media &= ~PHY_BMCR_DUPLEX;
 				media |= PHY_BMCR_1000;
 				printf("(half-duplex, 1000Mbps)\n");
 			}
 		}
 		if (advert & PHY_ANAR_100BT4 && ability & PHY_ANAR_100BT4) {
 			ifm->ifm_media = IFM_ETHER | IFM_100_T4;
 			media |= PHY_BMCR_SPEEDSEL;
 			media &= ~PHY_BMCR_DUPLEX;
 			printf("(100baseT4)\n");
 		} else if (advert & PHY_ANAR_100BTXFULL &&
 			   ability & PHY_ANAR_100BTXFULL) {
 			ifm->ifm_media = IFM_ETHER | IFM_100_TX | IFM_FDX;
 			media |= PHY_BMCR_SPEEDSEL;
 			media |= PHY_BMCR_DUPLEX;
 			printf("(full-duplex, 100Mbps)\n");
 		} else if (advert & PHY_ANAR_100BTXHALF &&
 			   ability & PHY_ANAR_100BTXHALF) {
 			ifm->ifm_media = IFM_ETHER | IFM_100_TX | IFM_HDX;
 			media |= PHY_BMCR_SPEEDSEL;
 			media &= ~PHY_BMCR_DUPLEX;
 			printf("(half-duplex, 100Mbps)\n");
 		} else if (advert & PHY_ANAR_10BTFULL &&
 			   ability & PHY_ANAR_10BTFULL) {
 			ifm->ifm_media = IFM_ETHER | IFM_10_T | IFM_FDX;
 			media &= ~PHY_BMCR_SPEEDSEL;
 			media |= PHY_BMCR_DUPLEX;
 			printf("(full-duplex, 10Mbps)\n");
 		} else if (advert) {
 			ifm->ifm_media = IFM_ETHER | IFM_10_T | IFM_HDX;
 			media &= ~PHY_BMCR_SPEEDSEL;
 			media &= ~PHY_BMCR_DUPLEX;
 			printf("(half-duplex, 10Mbps)\n");
 		}
 		media &= ~PHY_BMCR_AUTONEGENBL;
 
 		/* Set ASIC's duplex mode to match the PHY. */
 		my_phy_writereg(sc, PHY_BMCR, media);
 		my_setcfg(sc, media);
 	} else {
 		if (verbose)
 			device_printf(sc->my_dev, "no carrier\n");
 	}
 
 	my_init_locked(sc);
 	if (sc->my_tx_pend) {
 		sc->my_autoneg = 0;
 		sc->my_tx_pend = 0;
 		my_start_locked(ifp);
 	}
 	return;
 }
 
 /*
  * To get PHY ability.
  */
 static void
 my_getmode_mii(struct my_softc * sc)
 {
 	u_int16_t       bmsr;
 	if_t		ifp;
 
 	MY_LOCK_ASSERT(sc);
 	ifp = sc->my_ifp;
 	bmsr = my_phy_readreg(sc, PHY_BMSR);
 	if (bootverbose)
 		device_printf(sc->my_dev, "PHY status word: %x\n", bmsr);
 
 	/* fallback */
 	sc->ifmedia.ifm_media = IFM_ETHER | IFM_10_T | IFM_HDX;
 
 	if (bmsr & PHY_BMSR_10BTHALF) {
 		if (bootverbose)
 			device_printf(sc->my_dev,
 			    "10Mbps half-duplex mode supported\n");
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_10_T | IFM_HDX,
 		    0, NULL);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_10_T, 0, NULL);
 	}
 	if (bmsr & PHY_BMSR_10BTFULL) {
 		if (bootverbose)
 			device_printf(sc->my_dev,
 			    "10Mbps full-duplex mode supported\n");
 
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_10_T | IFM_FDX,
 		    0, NULL);
 		sc->ifmedia.ifm_media = IFM_ETHER | IFM_10_T | IFM_FDX;
 	}
 	if (bmsr & PHY_BMSR_100BTXHALF) {
 		if (bootverbose)
 			device_printf(sc->my_dev,
 			    "100Mbps half-duplex mode supported\n");
 		if_setbaudrate(ifp, 100000000);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_TX, 0, NULL);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_TX | IFM_HDX,
 			    0, NULL);
 		sc->ifmedia.ifm_media = IFM_ETHER | IFM_100_TX | IFM_HDX;
 	}
 	if (bmsr & PHY_BMSR_100BTXFULL) {
 		if (bootverbose)
 			device_printf(sc->my_dev,
 			    "100Mbps full-duplex mode supported\n");
 		if_setbaudrate(ifp, 100000000);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX,
 		    0, NULL);
 		sc->ifmedia.ifm_media = IFM_ETHER | IFM_100_TX | IFM_FDX;
 	}
 	/* Some also support 100BaseT4. */
 	if (bmsr & PHY_BMSR_100BT4) {
 		if (bootverbose)
 			device_printf(sc->my_dev, "100baseT4 mode supported\n");
 		if_setbaudrate(ifp, 100000000);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_T4, 0, NULL);
 		sc->ifmedia.ifm_media = IFM_ETHER | IFM_100_T4;
 #ifdef FORCE_AUTONEG_TFOUR
 		if (bootverbose)
 			device_printf(sc->my_dev,
 			    "forcing on autoneg support for BT4\n");
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_AUTO, 0 NULL):
 		sc->ifmedia.ifm_media = IFM_ETHER | IFM_AUTO;
 #endif
 	}
 #if 0				/* this version did not support 1000M, */
 	if (sc->my_pinfo->my_vid == MarvellPHYID0) {
 		if (bootverbose)
 			device_printf(sc->my_dev,
 			    "1000Mbps half-duplex mode supported\n");
 
 		if_setbaudrate(ifp, 1000000000);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_1000_T, 0, NULL);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_1000_T | IFM_HDX,
 		    0, NULL);
 		if (bootverbose)
 			device_printf(sc->my_dev,
 			    "1000Mbps full-duplex mode supported\n");
 		if_setbaudrate(ifp, 1000000000);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_1000_T | IFM_FDX,
 		    0, NULL);
 		sc->ifmedia.ifm_media = IFM_ETHER | IFM_1000_T | IFM_FDX;
 	}
 #endif
 	if (bmsr & PHY_BMSR_CANAUTONEG) {
 		if (bootverbose)
 			device_printf(sc->my_dev, "autoneg supported\n");
 		ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_AUTO, 0, NULL);
 		sc->ifmedia.ifm_media = IFM_ETHER | IFM_AUTO;
 	}
 	return;
 }
 
 /*
  * Set speed and duplex mode.
  */
 static void
 my_setmode_mii(struct my_softc * sc, int media)
 {
 	u_int16_t       bmcr;
 
 	MY_LOCK_ASSERT(sc);
 	/*
 	 * If an autoneg session is in progress, stop it.
 	 */
 	if (sc->my_autoneg) {
 		device_printf(sc->my_dev, "canceling autoneg session\n");
 		callout_stop(&sc->my_autoneg_timer);
 		sc->my_autoneg = sc->my_want_auto = 0;
 		bmcr = my_phy_readreg(sc, PHY_BMCR);
 		bmcr &= ~PHY_BMCR_AUTONEGENBL;
 		my_phy_writereg(sc, PHY_BMCR, bmcr);
 	}
 	device_printf(sc->my_dev, "selecting MII, ");
 	bmcr = my_phy_readreg(sc, PHY_BMCR);
 	bmcr &= ~(PHY_BMCR_AUTONEGENBL | PHY_BMCR_SPEEDSEL | PHY_BMCR_1000 |
 		  PHY_BMCR_DUPLEX | PHY_BMCR_LOOPBK);
 
 #if 0				/* this version did not support 1000M, */
 	if (IFM_SUBTYPE(media) == IFM_1000_T) {
 		printf("1000Mbps/T4, half-duplex\n");
 		bmcr &= ~PHY_BMCR_SPEEDSEL;
 		bmcr &= ~PHY_BMCR_DUPLEX;
 		bmcr |= PHY_BMCR_1000;
 	}
 #endif
 	if (IFM_SUBTYPE(media) == IFM_100_T4) {
 		printf("100Mbps/T4, half-duplex\n");
 		bmcr |= PHY_BMCR_SPEEDSEL;
 		bmcr &= ~PHY_BMCR_DUPLEX;
 	}
 	if (IFM_SUBTYPE(media) == IFM_100_TX) {
 		printf("100Mbps, ");
 		bmcr |= PHY_BMCR_SPEEDSEL;
 	}
 	if (IFM_SUBTYPE(media) == IFM_10_T) {
 		printf("10Mbps, ");
 		bmcr &= ~PHY_BMCR_SPEEDSEL;
 	}
 	if ((media & IFM_GMASK) == IFM_FDX) {
 		printf("full duplex\n");
 		bmcr |= PHY_BMCR_DUPLEX;
 	} else {
 		printf("half duplex\n");
 		bmcr &= ~PHY_BMCR_DUPLEX;
 	}
 	my_phy_writereg(sc, PHY_BMCR, bmcr);
 	my_setcfg(sc, bmcr);
 	return;
 }
 
 /*
  * The Myson manual states that in order to fiddle with the 'full-duplex' and
  * '100Mbps' bits in the netconfig register, we first have to put the
  * transmit and/or receive logic in the idle state.
  */
 static void
 my_setcfg(struct my_softc * sc, int bmcr)
 {
 	int             i, restart = 0;
 
 	MY_LOCK_ASSERT(sc);
 	if (CSR_READ_4(sc, MY_TCRRCR) & (MY_TE | MY_RE)) {
 		restart = 1;
 		MY_CLRBIT(sc, MY_TCRRCR, (MY_TE | MY_RE));
 		for (i = 0; i < MY_TIMEOUT; i++) {
 			DELAY(10);
 			if (!(CSR_READ_4(sc, MY_TCRRCR) &
 			    (MY_TXRUN | MY_RXRUN)))
 				break;
 		}
 		if (i == MY_TIMEOUT)
 			device_printf(sc->my_dev,
 			    "failed to force tx and rx to idle \n");
 	}
 	MY_CLRBIT(sc, MY_TCRRCR, MY_PS1000);
 	MY_CLRBIT(sc, MY_TCRRCR, MY_PS10);
 	if (bmcr & PHY_BMCR_1000)
 		MY_SETBIT(sc, MY_TCRRCR, MY_PS1000);
 	else if (!(bmcr & PHY_BMCR_SPEEDSEL))
 		MY_SETBIT(sc, MY_TCRRCR, MY_PS10);
 	if (bmcr & PHY_BMCR_DUPLEX)
 		MY_SETBIT(sc, MY_TCRRCR, MY_FD);
 	else
 		MY_CLRBIT(sc, MY_TCRRCR, MY_FD);
 	if (restart)
 		MY_SETBIT(sc, MY_TCRRCR, MY_TE | MY_RE);
 	return;
 }
 
 static void
 my_reset(struct my_softc * sc)
 {
 	int    i;
 
 	MY_LOCK_ASSERT(sc);
 	MY_SETBIT(sc, MY_BCR, MY_SWR);
 	for (i = 0; i < MY_TIMEOUT; i++) {
 		DELAY(10);
 		if (!(CSR_READ_4(sc, MY_BCR) & MY_SWR))
 			break;
 	}
 	if (i == MY_TIMEOUT)
 		device_printf(sc->my_dev, "reset never completed!\n");
 
 	/* Wait a little while for the chip to get its brains in order. */
 	DELAY(1000);
 	return;
 }
 
 /*
  * Probe for a Myson chip. Check the PCI vendor and device IDs against our
  * list and return a device name if we find a match.
  */
 static int
 my_probe(device_t dev)
 {
 	struct my_type *t;
 
 	t = my_devs;
 	while (t->my_name != NULL) {
 		if ((pci_get_vendor(dev) == t->my_vid) &&
 		    (pci_get_device(dev) == t->my_did)) {
 			device_set_desc(dev, t->my_name);
 			my_info_tmp = t;
 			return (BUS_PROBE_DEFAULT);
 		}
 		t++;
 	}
 	return (ENXIO);
 }
 
 /*
  * Attach the interface. Allocate softc structures, do ifmedia setup and
  * ethernet/BPF attach.
  */
 static int
 my_attach(device_t dev)
 {
 	int             i;
 	u_char          eaddr[ETHER_ADDR_LEN];
 	u_int32_t       iobase;
 	struct my_softc *sc;
 	if_t		ifp;
 	int             media = IFM_ETHER | IFM_100_TX | IFM_FDX;
 	unsigned int    round;
 	caddr_t         roundptr;
 	struct my_type *p;
 	u_int16_t       phy_vid, phy_did, phy_sts = 0;
 	int             rid, error = 0;
 
 	sc = device_get_softc(dev);
 	sc->my_dev = dev;
 	mtx_init(&sc->my_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	callout_init_mtx(&sc->my_autoneg_timer, &sc->my_mtx, 0);
 	callout_init_mtx(&sc->my_watchdog, &sc->my_mtx, 0);
 
 	/*
 	 * Map control/status registers.
 	 */
 	pci_enable_busmaster(dev);
 
 	if (my_info_tmp->my_did == MTD800ID) {
 		iobase = pci_read_config(dev, MY_PCI_LOIO, 4);
 		if (iobase & 0x300)
 			MY_USEIOSPACE = 0;
 	}
 
 	rid = MY_RID;
 	sc->my_res = bus_alloc_resource_any(dev, MY_RES, &rid, RF_ACTIVE);
 
 	if (sc->my_res == NULL) {
 		device_printf(dev, "couldn't map ports/memory\n");
 		error = ENXIO;
 		goto destroy_mutex;
 	}
 	sc->my_btag = rman_get_bustag(sc->my_res);
 	sc->my_bhandle = rman_get_bushandle(sc->my_res);
 
 	rid = 0;
 	sc->my_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 					    RF_SHAREABLE | RF_ACTIVE);
 
 	if (sc->my_irq == NULL) {
 		device_printf(dev, "couldn't map interrupt\n");
 		error = ENXIO;
 		goto release_io;
 	}
 
 	sc->my_info = my_info_tmp;
 
 	/* Reset the adapter. */
 	MY_LOCK(sc);
 	my_reset(sc);
 	MY_UNLOCK(sc);
 
 	/*
 	 * Get station address
 	 */
 	for (i = 0; i < ETHER_ADDR_LEN; ++i)
 		eaddr[i] = CSR_READ_1(sc, MY_PAR0 + i);
 
 	sc->my_ldata_ptr = malloc(sizeof(struct my_list_data) + 8,
 				  M_DEVBUF, M_NOWAIT);
 	if (sc->my_ldata_ptr == NULL) {
 		device_printf(dev, "no memory for list buffers!\n");
 		error = ENXIO;
 		goto release_irq;
 	}
 	sc->my_ldata = (struct my_list_data *) sc->my_ldata_ptr;
 	round = (uintptr_t)sc->my_ldata_ptr & 0xF;
 	roundptr = sc->my_ldata_ptr;
 	for (i = 0; i < 8; i++) {
 		if (round % 8) {
 			round++;
 			roundptr++;
 		} else
 			break;
 	}
 	sc->my_ldata = (struct my_list_data *) roundptr;
 	bzero(sc->my_ldata, sizeof(struct my_list_data));
 
 	ifp = sc->my_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not if_alloc()\n");
 		error = ENOSPC;
 		goto free_ldata;
 	}
 	if_setsoftc(ifp, sc);
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 	if_setioctlfn(ifp, my_ioctl);
 	if_setstartfn(ifp, my_start);
 	if_setinitfn(ifp, my_init);
 	if_setbaudrate(ifp, 10000000);
 	if_setsendqlen(ifp, ifqmaxlen);
 	if_setsendqready(ifp);
 
 	if (sc->my_info->my_did == MTD803ID)
 		sc->my_pinfo = my_phys;
 	else {
 		if (bootverbose)
 			device_printf(dev, "probing for a PHY\n");
 		MY_LOCK(sc);
 		for (i = MY_PHYADDR_MIN; i < MY_PHYADDR_MAX + 1; i++) {
 			if (bootverbose)
 				device_printf(dev, "checking address: %d\n", i);
 			sc->my_phy_addr = i;
 			phy_sts = my_phy_readreg(sc, PHY_BMSR);
 			if ((phy_sts != 0) && (phy_sts != 0xffff))
 				break;
 			else
 				phy_sts = 0;
 		}
 		if (phy_sts) {
 			phy_vid = my_phy_readreg(sc, PHY_VENID);
 			phy_did = my_phy_readreg(sc, PHY_DEVID);
 			if (bootverbose) {
 				device_printf(dev, "found PHY at address %d, ",
 				    sc->my_phy_addr);
 				printf("vendor id: %x device id: %x\n",
 				    phy_vid, phy_did);
 			}
 			p = my_phys;
 			while (p->my_vid) {
 				if (phy_vid == p->my_vid) {
 					sc->my_pinfo = p;
 					break;
 				}
 				p++;
 			}
 			if (sc->my_pinfo == NULL)
 				sc->my_pinfo = &my_phys[PHY_UNKNOWN];
 			if (bootverbose)
 				device_printf(dev, "PHY type: %s\n",
 				       sc->my_pinfo->my_name);
 		} else {
 			MY_UNLOCK(sc);
 			device_printf(dev, "MII without any phy!\n");
 			error = ENXIO;
 			goto free_if;
 		}
 		MY_UNLOCK(sc);
 	}
 
 	/* Do ifmedia setup. */
 	ifmedia_init(&sc->ifmedia, 0, my_ifmedia_upd, my_ifmedia_sts);
 	MY_LOCK(sc);
 	my_getmode_mii(sc);
 	my_autoneg_mii(sc, MY_FLAG_FORCEDELAY, 1);
 	media = sc->ifmedia.ifm_media;
 	my_stop(sc);
 	MY_UNLOCK(sc);
 	ifmedia_set(&sc->ifmedia, media);
 
 	ether_ifattach(ifp, eaddr);
 
 	error = bus_setup_intr(dev, sc->my_irq, INTR_TYPE_NET | INTR_MPSAFE,
 			       NULL, my_intr, sc, &sc->my_intrhand);
 
 	if (error) {
 		device_printf(dev, "couldn't set up irq\n");
 		goto detach_if;
 	}
 	 
 	return (0);
 
 detach_if:
 	ether_ifdetach(ifp);
 free_if:
 	if_free(ifp);
 free_ldata:
 	free(sc->my_ldata_ptr, M_DEVBUF);
 release_irq:
 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->my_irq);
 release_io:
 	bus_release_resource(dev, MY_RES, MY_RID, sc->my_res);
 destroy_mutex:
 	mtx_destroy(&sc->my_mtx);
 	return (error);
 }
 
 static int
 my_detach(device_t dev)
 {
 	struct my_softc *sc;
 	if_t		ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->my_ifp;
 	ether_ifdetach(ifp);
 	MY_LOCK(sc);
 	my_stop(sc);
 	MY_UNLOCK(sc);
 	bus_teardown_intr(dev, sc->my_irq, sc->my_intrhand);
 	callout_drain(&sc->my_watchdog);
 	callout_drain(&sc->my_autoneg_timer);
 
 	if_free(ifp);
 	free(sc->my_ldata_ptr, M_DEVBUF);
 
 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->my_irq);
 	bus_release_resource(dev, MY_RES, MY_RID, sc->my_res);
 	mtx_destroy(&sc->my_mtx);
 	return (0);
 }
 
 /*
  * Initialize the transmit descriptors.
  */
 static int
 my_list_tx_init(struct my_softc * sc)
 {
 	struct my_chain_data *cd;
 	struct my_list_data *ld;
 	int             i;
 
 	MY_LOCK_ASSERT(sc);
 	cd = &sc->my_cdata;
 	ld = sc->my_ldata;
 	for (i = 0; i < MY_TX_LIST_CNT; i++) {
 		cd->my_tx_chain[i].my_ptr = &ld->my_tx_list[i];
 		if (i == (MY_TX_LIST_CNT - 1))
 			cd->my_tx_chain[i].my_nextdesc = &cd->my_tx_chain[0];
 		else
 			cd->my_tx_chain[i].my_nextdesc =
 			    &cd->my_tx_chain[i + 1];
 	}
 	cd->my_tx_free = &cd->my_tx_chain[0];
 	cd->my_tx_tail = cd->my_tx_head = NULL;
 	return (0);
 }
 
 /*
  * Initialize the RX descriptors and allocate mbufs for them. Note that we
  * arrange the descriptors in a closed ring, so that the last descriptor
  * points back to the first.
  */
 static int
 my_list_rx_init(struct my_softc * sc)
 {
 	struct my_chain_data *cd;
 	struct my_list_data *ld;
 	int             i;
 
 	MY_LOCK_ASSERT(sc);
 	cd = &sc->my_cdata;
 	ld = sc->my_ldata;
 	for (i = 0; i < MY_RX_LIST_CNT; i++) {
 		cd->my_rx_chain[i].my_ptr =
 		    (struct my_desc *) & ld->my_rx_list[i];
 		if (my_newbuf(sc, &cd->my_rx_chain[i]) == ENOBUFS) {
 			MY_UNLOCK(sc);
 			return (ENOBUFS);
 		}
 		if (i == (MY_RX_LIST_CNT - 1)) {
 			cd->my_rx_chain[i].my_nextdesc = &cd->my_rx_chain[0];
 			ld->my_rx_list[i].my_next = vtophys(&ld->my_rx_list[0]);
 		} else {
 			cd->my_rx_chain[i].my_nextdesc =
 			    &cd->my_rx_chain[i + 1];
 			ld->my_rx_list[i].my_next =
 			    vtophys(&ld->my_rx_list[i + 1]);
 		}
 	}
 	cd->my_rx_head = &cd->my_rx_chain[0];
 	return (0);
 }
 
 /*
  * Initialize an RX descriptor and attach an MBUF cluster.
  */
 static int
 my_newbuf(struct my_softc * sc, struct my_chain_onefrag * c)
 {
 	struct mbuf    *m_new = NULL;
 
 	MY_LOCK_ASSERT(sc);
 	MGETHDR(m_new, M_NOWAIT, MT_DATA);
 	if (m_new == NULL) {
 		device_printf(sc->my_dev,
 		    "no memory for rx list -- packet dropped!\n");
 		return (ENOBUFS);
 	}
 	if (!(MCLGET(m_new, M_NOWAIT))) {
 		device_printf(sc->my_dev,
 		    "no memory for rx list -- packet dropped!\n");
 		m_freem(m_new);
 		return (ENOBUFS);
 	}
 	c->my_mbuf = m_new;
 	c->my_ptr->my_data = vtophys(mtod(m_new, caddr_t));
 	c->my_ptr->my_ctl = (MCLBYTES - 1) << MY_RBSShift;
 	c->my_ptr->my_status = MY_OWNByNIC;
 	return (0);
 }
 
 /*
  * A frame has been uploaded: pass the resulting mbuf chain up to the higher
  * level protocols.
  */
 static void
 my_rxeof(struct my_softc * sc)
 {
 	struct ether_header *eh;
 	struct mbuf    *m;
 	if_t		ifp;
 	struct my_chain_onefrag *cur_rx;
 	int             total_len = 0;
 	u_int32_t       rxstat;
 
 	MY_LOCK_ASSERT(sc);
 	ifp = sc->my_ifp;
 	while (!((rxstat = sc->my_cdata.my_rx_head->my_ptr->my_status)
 	    & MY_OWNByNIC)) {
 		cur_rx = sc->my_cdata.my_rx_head;
 		sc->my_cdata.my_rx_head = cur_rx->my_nextdesc;
 
 		if (rxstat & MY_ES) {	/* error summary: give up this rx pkt */
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			cur_rx->my_ptr->my_status = MY_OWNByNIC;
 			continue;
 		}
 		/* No errors; receive the packet. */
 		total_len = (rxstat & MY_FLNGMASK) >> MY_FLNGShift;
 		total_len -= ETHER_CRC_LEN;
 
 		if (total_len < MINCLSIZE) {
 			m = m_devget(mtod(cur_rx->my_mbuf, char *),
 			    total_len, 0, ifp, NULL);
 			cur_rx->my_ptr->my_status = MY_OWNByNIC;
 			if (m == NULL) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				continue;
 			}
 		} else {
 			m = cur_rx->my_mbuf;
 			/*
 			 * Try to conjure up a new mbuf cluster. If that
 			 * fails, it means we have an out of memory condition
 			 * and should leave the buffer in place and continue.
 			 * This will result in a lost packet, but there's
 			 * little else we can do in this situation.
 			 */
 			if (my_newbuf(sc, cur_rx) == ENOBUFS) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				cur_rx->my_ptr->my_status = MY_OWNByNIC;
 				continue;
 			}
 			m->m_pkthdr.rcvif = ifp;
 			m->m_pkthdr.len = m->m_len = total_len;
 		}
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		eh = mtod(m, struct ether_header *);
 #if NBPFILTER > 0
 		/*
 		 * Handle BPF listeners. Let the BPF user see the packet, but
 		 * don't pass it up to the ether_input() layer unless it's a
 		 * broadcast packet, multicast packet, matches our ethernet
 		 * address or the interface is in promiscuous mode.
 		 */
-		if (bpf_peers_present(if_getbpf(ifp))) {
+		if (bpf_peers_present_if(ifp)) {
 			bpf_mtap_if(ifp, m);
 			if (if_getflags(ifp) & IFF_PROMISC &&
 			    (bcmp(eh->ether_dhost, if_getlladdr(sc->my_ifp),
 				ETHER_ADDR_LEN) &&
 			     (eh->ether_dhost[0] & 1) == 0)) {
 				m_freem(m);
 				continue;
 			}
 		}
 #endif
 		MY_UNLOCK(sc);
 		if_input(ifp, m);
 		MY_LOCK(sc);
 	}
 	return;
 }
 
 /*
  * A frame was downloaded to the chip. It's safe for us to clean up the list
  * buffers.
  */
 static void
 my_txeof(struct my_softc * sc)
 {
 	struct my_chain *cur_tx;
 	if_t		ifp;
 
 	MY_LOCK_ASSERT(sc);
 	ifp = sc->my_ifp;
 	/* Clear the timeout timer. */
 	sc->my_timer = 0;
 	if (sc->my_cdata.my_tx_head == NULL) {
 		return;
 	}
 	/*
 	 * Go through our tx list and free mbufs for those frames that have
 	 * been transmitted.
 	 */
 	while (sc->my_cdata.my_tx_head->my_mbuf != NULL) {
 		u_int32_t       txstat;
 
 		cur_tx = sc->my_cdata.my_tx_head;
 		txstat = MY_TXSTATUS(cur_tx);
 		if ((txstat & MY_OWNByNIC) || txstat == MY_UNSENT)
 			break;
 		if (!(CSR_READ_4(sc, MY_TCRRCR) & MY_Enhanced)) {
 			if (txstat & MY_TXERR) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 				if (txstat & MY_EC) /* excessive collision */
 					if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
 				if (txstat & MY_LC)	/* late collision */
 					if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
 			}
 			if_inc_counter(ifp, IFCOUNTER_COLLISIONS,
 			    (txstat & MY_NCRMASK) >> MY_NCRShift);
 		}
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		m_freem(cur_tx->my_mbuf);
 		cur_tx->my_mbuf = NULL;
 		if (sc->my_cdata.my_tx_head == sc->my_cdata.my_tx_tail) {
 			sc->my_cdata.my_tx_head = NULL;
 			sc->my_cdata.my_tx_tail = NULL;
 			break;
 		}
 		sc->my_cdata.my_tx_head = cur_tx->my_nextdesc;
 	}
 	if (CSR_READ_4(sc, MY_TCRRCR) & MY_Enhanced) {
 		if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (CSR_READ_4(sc, MY_TSR) & MY_NCRMask));
 	}
 	return;
 }
 
 /*
  * TX 'end of channel' interrupt handler.
  */
 static void
 my_txeoc(struct my_softc * sc)
 {
 	if_t		ifp;
 
 	MY_LOCK_ASSERT(sc);
 	ifp = sc->my_ifp;
 	sc->my_timer = 0;
 	if (sc->my_cdata.my_tx_head == NULL) {
 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 		sc->my_cdata.my_tx_tail = NULL;
 		if (sc->my_want_auto)
 			my_autoneg_mii(sc, MY_FLAG_SCHEDDELAY, 1);
 	} else {
 		if (MY_TXOWN(sc->my_cdata.my_tx_head) == MY_UNSENT) {
 			MY_TXOWN(sc->my_cdata.my_tx_head) = MY_OWNByNIC;
 			sc->my_timer = 5;
 			CSR_WRITE_4(sc, MY_TXPDR, 0xFFFFFFFF);
 		}
 	}
 	return;
 }
 
 static void
 my_intr(void *arg)
 {
 	struct my_softc *sc;
 	if_t		ifp;
 	u_int32_t       status;
 
 	sc = arg;
 	MY_LOCK(sc);
 	ifp = sc->my_ifp;
 	if (!(if_getflags(ifp) & IFF_UP)) {
 		MY_UNLOCK(sc);
 		return;
 	}
 	/* Disable interrupts. */
 	CSR_WRITE_4(sc, MY_IMR, 0x00000000);
 
 	for (;;) {
 		status = CSR_READ_4(sc, MY_ISR);
 		status &= MY_INTRS;
 		if (status)
 			CSR_WRITE_4(sc, MY_ISR, status);
 		else
 			break;
 
 		if (status & MY_RI)	/* receive interrupt */
 			my_rxeof(sc);
 
 		if ((status & MY_RBU) || (status & MY_RxErr)) {
 			/* rx buffer unavailable or rx error */
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 #ifdef foo
 			my_stop(sc);
 			my_reset(sc);
 			my_init_locked(sc);
 #endif
 		}
 		if (status & MY_TI)	/* tx interrupt */
 			my_txeof(sc);
 		if (status & MY_ETI)	/* tx early interrupt */
 			my_txeof(sc);
 		if (status & MY_TBU)	/* tx buffer unavailable */
 			my_txeoc(sc);
 
 #if 0				/* 90/1/18 delete */
 		if (status & MY_FBE) {
 			my_reset(sc);
 			my_init_locked(sc);
 		}
 #endif
 	}
 
 	/* Re-enable interrupts. */
 	CSR_WRITE_4(sc, MY_IMR, MY_INTRS);
 	if (!if_sendq_empty(ifp))
 		my_start_locked(ifp);
 	MY_UNLOCK(sc);
 	return;
 }
 
 /*
  * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data
  * pointers to the fragment pointers.
  */
 static int
 my_encap(struct my_softc * sc, struct my_chain * c, struct mbuf * m_head)
 {
 	struct my_desc *f = NULL;
 	int             total_len;
 	struct mbuf    *m, *m_new = NULL;
 
 	MY_LOCK_ASSERT(sc);
 	/* calculate the total tx pkt length */
 	total_len = 0;
 	for (m = m_head; m != NULL; m = m->m_next)
 		total_len += m->m_len;
 	/*
 	 * Start packing the mbufs in this chain into the fragment pointers.
 	 * Stop when we run out of fragments or hit the end of the mbuf
 	 * chain.
 	 */
 	m = m_head;
 	MGETHDR(m_new, M_NOWAIT, MT_DATA);
 	if (m_new == NULL) {
 		device_printf(sc->my_dev, "no memory for tx list");
 		return (1);
 	}
 	if (m_head->m_pkthdr.len > MHLEN) {
 		if (!(MCLGET(m_new, M_NOWAIT))) {
 			m_freem(m_new);
 			device_printf(sc->my_dev, "no memory for tx list");
 			return (1);
 		}
 	}
 	m_copydata(m_head, 0, m_head->m_pkthdr.len, mtod(m_new, caddr_t));
 	m_new->m_pkthdr.len = m_new->m_len = m_head->m_pkthdr.len;
 	m_freem(m_head);
 	m_head = m_new;
 	f = &c->my_ptr->my_frag[0];
 	f->my_status = 0;
 	f->my_data = vtophys(mtod(m_new, caddr_t));
 	total_len = m_new->m_len;
 	f->my_ctl = MY_TXFD | MY_TXLD | MY_CRCEnable | MY_PADEnable;
 	f->my_ctl |= total_len << MY_PKTShift;	/* pkt size */
 	f->my_ctl |= total_len;	/* buffer size */
 	/* 89/12/29 add, for mtd891 *//* [ 89? ] */
 	if (sc->my_info->my_did == MTD891ID)
 		f->my_ctl |= MY_ETIControl | MY_RetryTxLC;
 	c->my_mbuf = m_head;
 	c->my_lastdesc = 0;
 	MY_TXNEXT(c) = vtophys(&c->my_nextdesc->my_ptr->my_frag[0]);
 	return (0);
 }
 
 /*
  * Main transmit routine. To avoid having to do mbuf copies, we put pointers
  * to the mbuf data regions directly in the transmit lists. We also save a
  * copy of the pointers since the transmit list fragment pointers are
  * physical addresses.
  */
 static void
 my_start(if_t ifp)
 {
 	struct my_softc *sc;
 
 	sc = if_getsoftc(ifp);
 	MY_LOCK(sc);
 	my_start_locked(ifp);
 	MY_UNLOCK(sc);
 }
 
 static void
 my_start_locked(if_t ifp)
 {
 	struct my_softc *sc;
 	struct mbuf    *m_head = NULL;
 	struct my_chain *cur_tx = NULL, *start_tx;
 
 	sc = if_getsoftc(ifp);
 	MY_LOCK_ASSERT(sc);
 	if (sc->my_autoneg) {
 		sc->my_tx_pend = 1;
 		return;
 	}
 	/*
 	 * Check for an available queue slot. If there are none, punt.
 	 */
 	if (sc->my_cdata.my_tx_free->my_mbuf != NULL) {
 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
 		return;
 	}
 	start_tx = sc->my_cdata.my_tx_free;
 	while (sc->my_cdata.my_tx_free->my_mbuf == NULL) {
 		m_head = if_dequeue(ifp);
 		if (m_head == NULL)
 			break;
 
 		/* Pick a descriptor off the free list. */
 		cur_tx = sc->my_cdata.my_tx_free;
 		sc->my_cdata.my_tx_free = cur_tx->my_nextdesc;
 
 		/* Pack the data into the descriptor. */
 		my_encap(sc, cur_tx, m_head);
 
 		if (cur_tx != start_tx)
 			MY_TXOWN(cur_tx) = MY_OWNByNIC;
 #if NBPFILTER > 0
 		/*
 		 * If there's a BPF listener, bounce a copy of this frame to
 		 * him.
 		 */
 		BPF_MTAP(ifp, cur_tx->my_mbuf);
 #endif
 	}
 	/*
 	 * If there are no packets queued, bail.
 	 */
 	if (cur_tx == NULL) {
 		return;
 	}
 	/*
 	 * Place the request for the upload interrupt in the last descriptor
 	 * in the chain. This way, if we're chaining several packets at once,
 	 * we'll only get an interrupt once for the whole chain rather than
 	 * once for each packet.
 	 */
 	MY_TXCTL(cur_tx) |= MY_TXIC;
 	cur_tx->my_ptr->my_frag[0].my_ctl |= MY_TXIC;
 	sc->my_cdata.my_tx_tail = cur_tx;
 	if (sc->my_cdata.my_tx_head == NULL)
 		sc->my_cdata.my_tx_head = start_tx;
 	MY_TXOWN(start_tx) = MY_OWNByNIC;
 	CSR_WRITE_4(sc, MY_TXPDR, 0xFFFFFFFF);	/* tx polling demand */
 
 	/*
 	 * Set a timeout in case the chip goes out to lunch.
 	 */
 	sc->my_timer = 5;
 	return;
 }
 
 static void
 my_init(void *xsc)
 {
 	struct my_softc *sc = xsc;
 
 	MY_LOCK(sc);
 	my_init_locked(sc);
 	MY_UNLOCK(sc);
 }
 
 static void
 my_init_locked(struct my_softc *sc)
 {
 	if_t		ifp = sc->my_ifp;
 	u_int16_t       phy_bmcr = 0;
 
 	MY_LOCK_ASSERT(sc);
 	if (sc->my_autoneg) {
 		return;
 	}
 	if (sc->my_pinfo != NULL)
 		phy_bmcr = my_phy_readreg(sc, PHY_BMCR);
 	/*
 	 * Cancel pending I/O and free all RX/TX buffers.
 	 */
 	my_stop(sc);
 	my_reset(sc);
 
 	/*
 	 * Set cache alignment and burst length.
 	 */
 #if 0				/* 89/9/1 modify,  */
 	CSR_WRITE_4(sc, MY_BCR, MY_RPBLE512);
 	CSR_WRITE_4(sc, MY_TCRRCR, MY_TFTSF);
 #endif
 	CSR_WRITE_4(sc, MY_BCR, MY_PBL8);
 	CSR_WRITE_4(sc, MY_TCRRCR, MY_TFTSF | MY_RBLEN | MY_RPBLE512);
 	/*
 	 * 89/12/29 add, for mtd891,
 	 */
 	if (sc->my_info->my_did == MTD891ID) {
 		MY_SETBIT(sc, MY_BCR, MY_PROG);
 		MY_SETBIT(sc, MY_TCRRCR, MY_Enhanced);
 	}
 	my_setcfg(sc, phy_bmcr);
 	/* Init circular RX list. */
 	if (my_list_rx_init(sc) == ENOBUFS) {
 		device_printf(sc->my_dev, "init failed: no memory for rx buffers\n");
 		my_stop(sc);
 		return;
 	}
 	/* Init TX descriptors. */
 	my_list_tx_init(sc);
 
 	/* If we want promiscuous mode, set the allframes bit. */
 	if (if_getflags(ifp) & IFF_PROMISC)
 		MY_SETBIT(sc, MY_TCRRCR, MY_PROM);
 	else
 		MY_CLRBIT(sc, MY_TCRRCR, MY_PROM);
 
 	/*
 	 * Set capture broadcast bit to capture broadcast frames.
 	 */
 	if (if_getflags(ifp) & IFF_BROADCAST)
 		MY_SETBIT(sc, MY_TCRRCR, MY_AB);
 	else
 		MY_CLRBIT(sc, MY_TCRRCR, MY_AB);
 
 	/*
 	 * Program the multicast filter, if necessary.
 	 */
 	my_setmulti(sc);
 
 	/*
 	 * Load the address of the RX list.
 	 */
 	MY_CLRBIT(sc, MY_TCRRCR, MY_RE);
 	CSR_WRITE_4(sc, MY_RXLBA, vtophys(&sc->my_ldata->my_rx_list[0]));
 
 	/*
 	 * Enable interrupts.
 	 */
 	CSR_WRITE_4(sc, MY_IMR, MY_INTRS);
 	CSR_WRITE_4(sc, MY_ISR, 0xFFFFFFFF);
 
 	/* Enable receiver and transmitter. */
 	MY_SETBIT(sc, MY_TCRRCR, MY_RE);
 	MY_CLRBIT(sc, MY_TCRRCR, MY_TE);
 	CSR_WRITE_4(sc, MY_TXLBA, vtophys(&sc->my_ldata->my_tx_list[0]));
 	MY_SETBIT(sc, MY_TCRRCR, MY_TE);
 
 	/* Restore state of BMCR */
 	if (sc->my_pinfo != NULL)
 		my_phy_writereg(sc, PHY_BMCR, phy_bmcr);
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 	if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 
 	callout_reset(&sc->my_watchdog, hz, my_watchdog, sc);
 	return;
 }
 
 /*
  * Set media options.
  */
 
 static int
 my_ifmedia_upd(if_t ifp)
 {
 	struct my_softc *sc;
 	struct ifmedia *ifm;
 
 	sc = if_getsoftc(ifp);
 	MY_LOCK(sc);
 	ifm = &sc->ifmedia;
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) {
 		MY_UNLOCK(sc);
 		return (EINVAL);
 	}
 	if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO)
 		my_autoneg_mii(sc, MY_FLAG_SCHEDDELAY, 1);
 	else
 		my_setmode_mii(sc, ifm->ifm_media);
 	MY_UNLOCK(sc);
 	return (0);
 }
 
 /*
  * Report current media status.
  */
 
 static void
 my_ifmedia_sts(if_t ifp, struct ifmediareq * ifmr)
 {
 	struct my_softc *sc;
 	u_int16_t advert = 0, ability = 0;
 
 	sc = if_getsoftc(ifp);
 	MY_LOCK(sc);
 	ifmr->ifm_active = IFM_ETHER;
 	if (!(my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_AUTONEGENBL)) {
 #if 0				/* this version did not support 1000M, */
 		if (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_1000)
 			ifmr->ifm_active = IFM_ETHER | IFM_1000TX;
 #endif
 		if (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_SPEEDSEL)
 			ifmr->ifm_active = IFM_ETHER | IFM_100_TX;
 		else
 			ifmr->ifm_active = IFM_ETHER | IFM_10_T;
 		if (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_DUPLEX)
 			ifmr->ifm_active |= IFM_FDX;
 		else
 			ifmr->ifm_active |= IFM_HDX;
 
 		MY_UNLOCK(sc);
 		return;
 	}
 	ability = my_phy_readreg(sc, PHY_LPAR);
 	advert = my_phy_readreg(sc, PHY_ANAR);
 
 #if 0				/* this version did not support 1000M, */
 	if (sc->my_pinfo->my_vid = MarvellPHYID0) {
 		ability2 = my_phy_readreg(sc, PHY_1000SR);
 		if (ability2 & PHY_1000SR_1000BTXFULL) {
 			advert = 0;
 			ability = 0;
 	  		ifmr->ifm_active = IFM_ETHER|IFM_1000_T|IFM_FDX;
 	  	} else if (ability & PHY_1000SR_1000BTXHALF) {
 			advert = 0;
 			ability = 0;
 			ifmr->ifm_active = IFM_ETHER|IFM_1000_T|IFM_HDX;
 		}
 	}
 #endif
 	if (advert & PHY_ANAR_100BT4 && ability & PHY_ANAR_100BT4)
 		ifmr->ifm_active = IFM_ETHER | IFM_100_T4;
 	else if (advert & PHY_ANAR_100BTXFULL && ability & PHY_ANAR_100BTXFULL)
 		ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX;
 	else if (advert & PHY_ANAR_100BTXHALF && ability & PHY_ANAR_100BTXHALF)
 		ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_HDX;
 	else if (advert & PHY_ANAR_10BTFULL && ability & PHY_ANAR_10BTFULL)
 		ifmr->ifm_active = IFM_ETHER | IFM_10_T | IFM_FDX;
 	else if (advert & PHY_ANAR_10BTHALF && ability & PHY_ANAR_10BTHALF)
 		ifmr->ifm_active = IFM_ETHER | IFM_10_T | IFM_HDX;
 	MY_UNLOCK(sc);
 	return;
 }
 
 static int
 my_ioctl(if_t ifp, u_long command, caddr_t data)
 {
 	struct my_softc *sc = if_getsoftc(ifp);
 	struct ifreq   *ifr = (struct ifreq *) data;
 	int             error;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		MY_LOCK(sc);
 		if (if_getflags(ifp) & IFF_UP)
 			my_init_locked(sc);
 		else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 			my_stop(sc);
 		MY_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		MY_LOCK(sc);
 		my_setmulti(sc);
 		MY_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command);
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 	return (error);
 }
 
 static void
 my_watchdog(void *arg)
 {
 	struct my_softc *sc;
 	if_t		ifp;
 
 	sc = arg;
 	MY_LOCK_ASSERT(sc);
 	callout_reset(&sc->my_watchdog, hz, my_watchdog, sc);
 	if (sc->my_timer == 0 || --sc->my_timer > 0)
 		return;
 
 	ifp = sc->my_ifp;
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	if_printf(ifp, "watchdog timeout\n");
 	if (!(my_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_LINKSTAT))
 		if_printf(ifp, "no carrier - transceiver cable problem?\n");
 	my_stop(sc);
 	my_reset(sc);
 	my_init_locked(sc);
 	if (!if_sendq_empty(ifp))
 		my_start_locked(ifp);
 }
 
 /*
  * Stop the adapter and free any mbufs allocated to the RX and TX lists.
  */
 static void
 my_stop(struct my_softc * sc)
 {
 	int    i;
 	if_t   ifp;
 
 	MY_LOCK_ASSERT(sc);
 	ifp = sc->my_ifp;
 
 	callout_stop(&sc->my_autoneg_timer);
 	callout_stop(&sc->my_watchdog);
 
 	MY_CLRBIT(sc, MY_TCRRCR, (MY_RE | MY_TE));
 	CSR_WRITE_4(sc, MY_IMR, 0x00000000);
 	CSR_WRITE_4(sc, MY_TXLBA, 0x00000000);
 	CSR_WRITE_4(sc, MY_RXLBA, 0x00000000);
 
 	/*
 	 * Free data in the RX lists.
 	 */
 	for (i = 0; i < MY_RX_LIST_CNT; i++) {
 		if (sc->my_cdata.my_rx_chain[i].my_mbuf != NULL) {
 			m_freem(sc->my_cdata.my_rx_chain[i].my_mbuf);
 			sc->my_cdata.my_rx_chain[i].my_mbuf = NULL;
 		}
 	}
 	bzero((char *)&sc->my_ldata->my_rx_list,
 	    sizeof(sc->my_ldata->my_rx_list));
 	/*
 	 * Free the TX list buffers.
 	 */
 	for (i = 0; i < MY_TX_LIST_CNT; i++) {
 		if (sc->my_cdata.my_tx_chain[i].my_mbuf != NULL) {
 			m_freem(sc->my_cdata.my_tx_chain[i].my_mbuf);
 			sc->my_cdata.my_tx_chain[i].my_mbuf = NULL;
 		}
 	}
 	bzero((char *)&sc->my_ldata->my_tx_list,
 	    sizeof(sc->my_ldata->my_tx_list));
 	if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
 	return;
 }
 
 /*
  * Stop all chip I/O so that the kernel's probe routines don't get confused
  * by errant DMAs when rebooting.
  */
 static int
 my_shutdown(device_t dev)
 {
 	struct my_softc *sc;
 
 	sc = device_get_softc(dev);
 	MY_LOCK(sc);
 	my_stop(sc);
 	MY_UNLOCK(sc);
 	return 0;
 }
diff --git a/sys/dev/usb/usb_pf.c b/sys/dev/usb/usb_pf.c
index 43e819684857..4da59419a7c6 100644
--- a/sys/dev/usb/usb_pf.c
+++ b/sys/dev/usb/usb_pf.c
@@ -1,539 +1,537 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifdef USB_GLOBAL_INCLUDE_FILE
 #include USB_GLOBAL_INCLUDE_FILE
 #else
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_clone.h>
 #include <net/bpf.h>
 #include <sys/sysctl.h>
 #include <net/route.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 #include <dev/usb/usb_busdma.h>
 #include <dev/usb/usb_controller.h>
 #include <dev/usb/usb_core.h>
 #include <dev/usb/usb_process.h>
 #include <dev/usb/usb_device.h>
 #include <dev/usb/usb_bus.h>
 #include <dev/usb/usb_pf.h>
 #include <dev/usb/usb_transfer.h>
 #endif			/* USB_GLOBAL_INCLUDE_FILE */
 
 static void usbpf_init(void *);
 static void usbpf_uninit(void *);
 static int usbpf_ioctl(if_t, u_long, caddr_t);
 static int usbpf_clone_match(struct if_clone *, const char *);
 static int usbpf_clone_create(struct if_clone *, char *, size_t,
 	    struct ifc_data *, if_t *);
 static int usbpf_clone_destroy(struct if_clone *, if_t, uint32_t);
 static struct usb_bus *usbpf_ifname2ubus(const char *);
 static uint32_t usbpf_aggregate_xferflags(struct usb_xfer_flags *);
 static uint32_t usbpf_aggregate_status(struct usb_xfer_flags_int *);
 static int usbpf_xfer_frame_is_read(struct usb_xfer *, uint32_t);
 static uint32_t usbpf_xfer_precompute_size(struct usb_xfer *, int);
 
 static struct if_clone *usbpf_cloner;
 static const char usbusname[] = "usbus";
 
 SYSINIT(usbpf_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, usbpf_init, NULL);
 SYSUNINIT(usbpf_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, usbpf_uninit, NULL);
 
 static void
 usbpf_init(void *arg)
 {
 	struct if_clone_addreq req = {
 		.match_f = usbpf_clone_match,
 		.create_f = usbpf_clone_create,
 		.destroy_f = usbpf_clone_destroy,
 	};
 
 	usbpf_cloner = ifc_attach_cloner(usbusname, &req);
 }
 
 static void
 usbpf_uninit(void *arg)
 {
 	int devlcnt;
 	device_t *devlp;
 	devclass_t dc;
 	struct usb_bus *ubus;
 	int error;
 	int i;
 
 	if_clone_detach(usbpf_cloner);
 
 	dc = devclass_find(usbusname);
 	if (dc == NULL)
 		return;
 	error = devclass_get_devices(dc, &devlp, &devlcnt);
 	if (error)
 		return;
 	for (i = 0; i < devlcnt; i++) {
 		ubus = device_get_softc(devlp[i]);
 		if (ubus != NULL && ubus->ifp != NULL)
 			usbpf_clone_destroy(usbpf_cloner, ubus->ifp, 0);
 	}
 	free(devlp, M_TEMP);
 }
 
 static int
 usbpf_ioctl(if_t ifp, u_long cmd, caddr_t data)
 {
 
 	/* No configuration allowed. */
 	return (EINVAL);
 }
 
 static struct usb_bus *
 usbpf_ifname2ubus(const char *ifname)
 {
 	device_t dev;
 	devclass_t dc;
 	int unit;
 	int error;
 
 	if (strncmp(ifname, usbusname, sizeof(usbusname) - 1) != 0)
 		return (NULL);
 	error = ifc_name2unit(ifname, &unit);
 	if (error || unit < 0)
 		return (NULL);
 	dc = devclass_find(usbusname);
 	if (dc == NULL)
 		return (NULL);
 	dev = devclass_get_device(dc, unit);
 	if (dev == NULL)
 		return (NULL);
 
 	return (device_get_softc(dev));
 }
 
 static int
 usbpf_clone_match(struct if_clone *ifc, const char *name)
 {
 	struct usb_bus *ubus;
 
 	ubus = usbpf_ifname2ubus(name);
 	if (ubus == NULL)
 		return (0);
 	if (ubus->ifp != NULL)
 		return (0);
 
 	return (1);
 }
 
 static int
 usbpf_clone_create(struct if_clone *ifc, char *name, size_t len,
     struct ifc_data *ifd, if_t *ifpp)
 {
 	int error;
 	int unit;
 	if_t ifp;
 	struct usb_bus *ubus;
 
 	error = ifc_name2unit(name, &unit);
 	if (error)
 		return (error);
  	if (unit < 0)
 		return (EINVAL);
 
 	ubus = usbpf_ifname2ubus(name);
 	if (ubus == NULL)
 		return (1);
 	if (ubus->ifp != NULL)
 		return (1);
 
 	error = ifc_alloc_unit(ifc, &unit);
 	if (error) {
 		device_printf(ubus->parent, "usbpf: Could not allocate "
 		    "instance\n");
 		return (error);
 	}
 	ifp = ubus->ifp = if_alloc(IFT_USB);
 	if (ifp == NULL) {
 		ifc_free_unit(ifc, unit);
 		device_printf(ubus->parent, "usbpf: Could not allocate "
 		    "instance\n");
 		return (ENOSPC);
 	}
 	if_setsoftc(ifp, ubus);
 	if_initname(ifp, usbusname, unit);
 	if_setname(ifp, name);
 	if_setioctlfn(ifp, usbpf_ioctl);
 	if_attach(ifp);
 	if_setflagbits(ifp, IFF_UP, 0);
 	rt_ifmsg(ifp, IFF_UP);
 	/*
 	 * XXX According to the specification of DLT_USB, it indicates
 	 * packets beginning with USB setup header. But not sure all
 	 * packets would be.
 	 */
 	bpfattach(ifp, DLT_USB, USBPF_HDR_LEN);
 	*ifpp = ifp;
 
 	return (0);
 }
 
 static int
 usbpf_clone_destroy(struct if_clone *ifc, if_t ifp, uint32_t flags)
 {
 	struct usb_bus *ubus;
 	int unit;
 
 	ubus = if_getsoftc(ifp);
 	unit = if_getdunit(ifp);
 
 	/*
 	 * Lock USB before clearing the "ifp" pointer, to avoid
 	 * clearing the pointer in the middle of a TAP operation:
 	 */
 	USB_BUS_LOCK(ubus);
 	ubus->ifp = NULL;
 	USB_BUS_UNLOCK(ubus);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 	ifc_free_unit(ifc, unit);
 
 	return (0);
 }
 
 void
 usbpf_attach(struct usb_bus *ubus)
 {
 
 	if (bootverbose)
 		device_printf(ubus->parent, "usbpf: Attached\n");
 }
 
 void
 usbpf_detach(struct usb_bus *ubus)
 {
 
 	if (ubus->ifp != NULL)
 		usbpf_clone_destroy(usbpf_cloner, ubus->ifp, 0);
 	if (bootverbose)
 		device_printf(ubus->parent, "usbpf: Detached\n");
 }
 
 static uint32_t
 usbpf_aggregate_xferflags(struct usb_xfer_flags *flags)
 {
 	uint32_t val = 0;
 
 	if (flags->force_short_xfer == 1)
 		val |= USBPF_FLAG_FORCE_SHORT_XFER;
 	if (flags->short_xfer_ok == 1)
 		val |= USBPF_FLAG_SHORT_XFER_OK;
 	if (flags->short_frames_ok == 1)
 		val |= USBPF_FLAG_SHORT_FRAMES_OK;
 	if (flags->pipe_bof == 1)
 		val |= USBPF_FLAG_PIPE_BOF;
 	if (flags->proxy_buffer == 1)
 		val |= USBPF_FLAG_PROXY_BUFFER;
 	if (flags->ext_buffer == 1)
 		val |= USBPF_FLAG_EXT_BUFFER;
 	if (flags->manual_status == 1)
 		val |= USBPF_FLAG_MANUAL_STATUS;
 	if (flags->no_pipe_ok == 1)
 		val |= USBPF_FLAG_NO_PIPE_OK;
 	if (flags->stall_pipe == 1)
 		val |= USBPF_FLAG_STALL_PIPE;
 	return (val);
 }
 
 static uint32_t
 usbpf_aggregate_status(struct usb_xfer_flags_int *flags)
 {
 	uint32_t val = 0;
 
 	if (flags->open == 1)
 		val |= USBPF_STATUS_OPEN;
 	if (flags->transferring == 1)
 		val |= USBPF_STATUS_TRANSFERRING;
 	if (flags->did_dma_delay == 1)
 		val |= USBPF_STATUS_DID_DMA_DELAY;
 	if (flags->did_close == 1)
 		val |= USBPF_STATUS_DID_CLOSE;
 	if (flags->draining == 1)
 		val |= USBPF_STATUS_DRAINING;
 	if (flags->started == 1)
 		val |= USBPF_STATUS_STARTED;
 	if (flags->bandwidth_reclaimed == 1)
 		val |= USBPF_STATUS_BW_RECLAIMED;
 	if (flags->control_xfr == 1)
 		val |= USBPF_STATUS_CONTROL_XFR;
 	if (flags->control_hdr == 1)
 		val |= USBPF_STATUS_CONTROL_HDR;
 	if (flags->control_act == 1)
 		val |= USBPF_STATUS_CONTROL_ACT;
 	if (flags->control_stall == 1)
 		val |= USBPF_STATUS_CONTROL_STALL;
 	if (flags->short_frames_ok == 1)
 		val |= USBPF_STATUS_SHORT_FRAMES_OK;
 	if (flags->short_xfer_ok == 1)
 		val |= USBPF_STATUS_SHORT_XFER_OK;
 #if USB_HAVE_BUSDMA
 	if (flags->bdma_enable == 1)
 		val |= USBPF_STATUS_BDMA_ENABLE;
 	if (flags->bdma_no_post_sync == 1)
 		val |= USBPF_STATUS_BDMA_NO_POST_SYNC;
 	if (flags->bdma_setup == 1)
 		val |= USBPF_STATUS_BDMA_SETUP;
 #endif
 	if (flags->isochronous_xfr == 1)
 		val |= USBPF_STATUS_ISOCHRONOUS_XFR;
 	if (flags->curr_dma_set == 1)
 		val |= USBPF_STATUS_CURR_DMA_SET;
 	if (flags->can_cancel_immed == 1)
 		val |= USBPF_STATUS_CAN_CANCEL_IMMED;
 	if (flags->doing_callback == 1)
 		val |= USBPF_STATUS_DOING_CALLBACK;
 
 	return (val);
 }
 
 static int
 usbpf_xfer_frame_is_read(struct usb_xfer *xfer, uint32_t frame)
 {
 	int isread;
 
 	if ((frame == 0) && (xfer->flags_int.control_xfr != 0) &&
 	    (xfer->flags_int.control_hdr != 0)) {
 		/* special case */
 		if (xfer->flags_int.usb_mode == USB_MODE_DEVICE) {
 			/* The device controller writes to memory */
 			isread = 1;
 		} else {
 			/* The host controller reads from memory */
 			isread = 0;
 		}
 	} else {
 		isread = USB_GET_DATA_ISREAD(xfer);
 	}
 	return (isread);
 }
 
 static uint32_t
 usbpf_xfer_precompute_size(struct usb_xfer *xfer, int type)
 {
 	uint32_t totlen;
 	uint32_t x;
 	uint32_t nframes;
 
 	if (type == USBPF_XFERTAP_SUBMIT)
 		nframes = xfer->nframes;
 	else
 		nframes = xfer->aframes;
 
 	totlen = USBPF_HDR_LEN + (USBPF_FRAME_HDR_LEN * nframes);
 
 	/* precompute all trace lengths */
 	for (x = 0; x != nframes; x++) {
 		if (usbpf_xfer_frame_is_read(xfer, x)) {
 			if (type != USBPF_XFERTAP_SUBMIT) {
 				totlen += USBPF_FRAME_ALIGN(
 				    xfer->frlengths[x]);
 			}
 		} else {
 			if (type == USBPF_XFERTAP_SUBMIT) {
 				totlen += USBPF_FRAME_ALIGN(
 				    xfer->frlengths[x]);
 			}
 		}
 	}
 	return (totlen);
 }
 
 void
 usbpf_xfertap(struct usb_xfer *xfer, int type)
 {
 	struct usb_bus *bus;
 	struct usbpf_pkthdr *up;
 	struct usbpf_framehdr *uf;
 	usb_frlength_t offset;
 	uint32_t totlen;
 	uint32_t frame;
 	uint32_t temp;
 	uint32_t nframes;
 	uint32_t x;
 	uint8_t *buf;
 	uint8_t *ptr;
 
 	bus = xfer->xroot->bus;
 
 	/* sanity checks */
-	if (bus->ifp == NULL || if_getbpf(bus->ifp) == NULL)
-		return;
-	if (!bpf_peers_present(if_getbpf(bus->ifp)))
+	if (bus->ifp == NULL || !bpf_peers_present_if(bus->ifp))
 		return;
 
 	totlen = usbpf_xfer_precompute_size(xfer, type);
 
 	if (type == USBPF_XFERTAP_SUBMIT)
 		nframes = xfer->nframes;
 	else
 		nframes = xfer->aframes;
 
 	/*
 	 * XXX TODO XXX
 	 *
 	 * When BPF supports it we could pass a fragmented array of
 	 * buffers avoiding the data copy operation here.
 	 */
 	buf = ptr = malloc(totlen, M_TEMP, M_NOWAIT);
 	if (buf == NULL) {
 		device_printf(bus->parent, "usbpf: Out of memory\n");
 		return;
 	}
 
 	up = (struct usbpf_pkthdr *)ptr;
 	ptr += USBPF_HDR_LEN;
 
 	/* fill out header */
 	temp = device_get_unit(bus->bdev);
 	up->up_totlen = htole32(totlen);
 	up->up_busunit = htole32(temp);
 	up->up_address = xfer->xroot->udev->device_index;
 	if (xfer->flags_int.usb_mode == USB_MODE_DEVICE)
 		up->up_mode = USBPF_MODE_DEVICE;
 	else
 		up->up_mode = USBPF_MODE_HOST;
 	up->up_type = type;
 	up->up_xfertype = xfer->endpoint->edesc->bmAttributes & UE_XFERTYPE;
 	temp = usbpf_aggregate_xferflags(&xfer->flags);
 	up->up_flags = htole32(temp);
 	temp = usbpf_aggregate_status(&xfer->flags_int);
 	up->up_status = htole32(temp);
 	temp = xfer->error;
 	up->up_error = htole32(temp);
 	temp = xfer->interval;
 	up->up_interval = htole32(temp);
 	up->up_frames = htole32(nframes);
 	temp = xfer->max_packet_size;
 	up->up_packet_size = htole32(temp);
 	temp = xfer->max_packet_count;
 	up->up_packet_count = htole32(temp);
 	temp = xfer->endpointno;
 	up->up_endpoint = htole32(temp);
 	up->up_speed = xfer->xroot->udev->speed;
 
 	/* clear reserved area */
 	memset(up->up_reserved, 0, sizeof(up->up_reserved));
 
 	/* init offset and frame */
 	offset = 0;
 	frame = 0;
 
 	/* iterate all the USB frames and copy data, if any */
 	for (x = 0; x != nframes; x++) {
 		uint32_t length;
 		int isread;
 
 		/* get length */
 		length = xfer->frlengths[x];
 
 		/* get frame header pointer */
 		uf = (struct usbpf_framehdr *)ptr;
 		ptr += USBPF_FRAME_HDR_LEN;
 
 		/* fill out packet header */
 		uf->length = htole32(length);
 		uf->flags = 0;
 
 		/* get information about data read/write */
 		isread = usbpf_xfer_frame_is_read(xfer, x);
 
 		/* check if we need to copy any data */
 		if (isread) {
 			if (type == USBPF_XFERTAP_SUBMIT)
 				length = 0;
 			else {
 				uf->flags |= htole32(
 				    USBPF_FRAMEFLAG_DATA_FOLLOWS);
 			}
 		} else {
 			if (type != USBPF_XFERTAP_SUBMIT)
 				length = 0;
 			else {
 				uf->flags |= htole32(
 				    USBPF_FRAMEFLAG_DATA_FOLLOWS);
 			}
 		}
 
 		/* check if data is read direction */
 		if (isread)
 			uf->flags |= htole32(USBPF_FRAMEFLAG_READ);
 
 		/* copy USB data, if any */
 		if (length != 0) {
 			/* copy data */
 			usbd_copy_out(&xfer->frbuffers[frame],
 			    offset, ptr, length);
 
 			/* align length */
 			temp = USBPF_FRAME_ALIGN(length);
 
 			/* zero pad */
 			if (temp != length)
 				memset(ptr + length, 0, temp - length);
 
 			ptr += temp;
 		}
 
 		if (xfer->flags_int.isochronous_xfr) {
 			offset += usbd_xfer_old_frame_length(xfer, x);
 		} else {
 			frame ++;
 		}
 	}
 
 	bpf_tap_if(bus->ifp, buf, totlen);
 
 	free(buf, M_TEMP);
 }
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 8ca6e941e646..96420b709911 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -1,3209 +1,3223 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org>
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
  */
 
 #include <sys/cdefs.h>
 #include "opt_bpf.h"
 #include "opt_ddb.h"
 #include "opt_netgraph.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/time.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/ttycom.h>
 #include <sys/uio.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 
 #include <sys/event.h>
 #include <sys/file.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 
 #include <sys/socket.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_vlan_var.h>
 #include <net/if_dl.h>
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
 #include <net/bpf_jitter.h>
 #endif
 #include <net/bpf_zerocopy.h>
 #include <net/bpfdesc.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include <net80211/ieee80211_freebsd.h>
 
 #include <security/mac/mac_framework.h>
 
 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 
 static struct bpf_if_ext dead_bpf_if = {
 	.bif_dlist = CK_LIST_HEAD_INITIALIZER()
 };
 
 struct bpf_if {
 #define	bif_next	bif_ext.bif_next
 #define	bif_dlist	bif_ext.bif_dlist
 	struct bpf_if_ext bif_ext;	/* public members */
 	u_int		bif_dlt;	/* link layer type */
 	u_int		bif_hdrlen;	/* length of link header */
 	struct bpfd_list bif_wlist;	/* writer-only list */
 	struct ifnet	*bif_ifp;	/* corresponding interface */
 	struct bpf_if	**bif_bpf;	/* Pointer to pointer to us */
 	volatile u_int	bif_refcnt;
 	struct epoch_context epoch_ctx;
 };
 
 CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
 
 struct bpf_program_buffer {
 	struct epoch_context	epoch_ctx;
 #ifdef BPF_JITTER
 	bpf_jit_filter		*func;
 #endif
 	void			*buffer[0];
 };
 
 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
 
 #define PRINET  26			/* interruptible */
 #define BPF_PRIO_MAX	7
 
 #define	SIZEOF_BPF_HDR(type)	\
     (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 #define BPF_ALIGNMENT32 sizeof(int32_t)
 #define	BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
 
 #ifndef BURN_BRIDGES
 /*
  * 32-bit version of structure prepended to each packet.  We use this header
  * instead of the standard one for 32-bit streams.  We mark the a stream as
  * 32-bit the first time we see a 32-bit compat ioctl request.
  */
 struct bpf_hdr32 {
 	struct timeval32 bh_tstamp;	/* time stamp */
 	uint32_t	bh_caplen;	/* length of captured portion */
 	uint32_t	bh_datalen;	/* original length of packet */
 	uint16_t	bh_hdrlen;	/* length of bpf header (this struct
 					   plus alignment padding) */
 };
 #endif
 
 struct bpf_program32 {
 	u_int bf_len;
 	uint32_t bf_insns;
 };
 
 struct bpf_dltlist32 {
 	u_int	bfl_len;
 	u_int	bfl_list;
 };
 
 #define	BIOCSETF32	_IOW('B', 103, struct bpf_program32)
 #define	BIOCSRTIMEOUT32	_IOW('B', 109, struct timeval32)
 #define	BIOCGRTIMEOUT32	_IOR('B', 110, struct timeval32)
 #define	BIOCGDLTLIST32	_IOWR('B', 121, struct bpf_dltlist32)
 #define	BIOCSETWF32	_IOW('B', 123, struct bpf_program32)
 #define	BIOCSETFNR32	_IOW('B', 130, struct bpf_program32)
 #endif
 
 #define BPF_LOCK()	   sx_xlock(&bpf_sx)
 #define BPF_UNLOCK()		sx_xunlock(&bpf_sx)
 #define BPF_LOCK_ASSERT()	sx_assert(&bpf_sx, SA_XLOCKED)
 /*
  * bpf_iflist is a list of BPF interface structures, each corresponding to a
  * specific DLT. The same network interface might have several BPF interface
  * structures registered by different layers in the stack (i.e., 802.11
  * frames, ethernet frames, etc).
  */
 CK_LIST_HEAD(bpf_iflist, bpf_if);
 static struct bpf_iflist bpf_iflist;
 static struct sx	bpf_sx;		/* bpf global lock */
 static int		bpf_bpfd_cnt;
 
 static void	bpfif_ref(struct bpf_if *);
 static void	bpfif_rele(struct bpf_if *);
 
 static void	bpfd_ref(struct bpf_d *);
 static void	bpfd_rele(struct bpf_d *);
 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
 static void	bpf_detachd(struct bpf_d *);
 static void	bpf_detachd_locked(struct bpf_d *, bool);
 static void	bpfd_free(epoch_context_t);
 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
 		    struct sockaddr *, int *, struct bpf_d *);
 static int	bpf_setif(struct bpf_d *, struct ifreq *);
 static void	bpf_timed_out(void *);
 static __inline void
 		bpf_wakeup(struct bpf_d *);
 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
 		    struct bintime *);
 static void	reset_d(struct bpf_d *);
 static int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 static int	bpf_setdlt(struct bpf_d *, u_int);
 static void	filt_bpfdetach(struct knote *);
 static int	filt_bpfread(struct knote *, long);
 static int	filt_bpfwrite(struct knote *, long);
 static void	bpf_drvinit(void *);
 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "bpf sysctl");
 int bpf_maxinsns = BPF_MAXINSNS;
 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
     &bpf_maxinsns, 0, "Maximum bpf program instructions");
 static int bpf_zerocopy_enable = 0;
 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
     bpf_stats_sysctl, "bpf statistics portal");
 
 VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0;
 #define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN,
     &VNET_NAME(bpf_optimize_writers), 0,
     "Do not send packets until BPF program is set");
 
 static	d_open_t	bpfopen;
 static	d_read_t	bpfread;
 static	d_write_t	bpfwrite;
 static	d_ioctl_t	bpfioctl;
 static	d_poll_t	bpfpoll;
 static	d_kqfilter_t	bpfkqfilter;
 
 static struct cdevsw bpf_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	bpfopen,
 	.d_read =	bpfread,
 	.d_write =	bpfwrite,
 	.d_ioctl =	bpfioctl,
 	.d_poll =	bpfpoll,
 	.d_name =	"bpf",
 	.d_kqfilter =	bpfkqfilter,
 };
 
 static struct filterops bpfread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_bpfdetach,
 	.f_event = filt_bpfread,
 };
 
 static struct filterops bpfwrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_bpfdetach,
 	.f_event = filt_bpfwrite,
 };
 
 /*
  * LOCKING MODEL USED BY BPF
  *
  * Locks:
  * 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
  * every bpf_iflist changes, serializes ioctl access to bpf descriptors.
  * 2) Descriptor lock. Mutex, used to protect BPF buffers and various
  * structure fields used by bpf_*tap* code.
  *
  * Lock order: global lock, then descriptor lock.
  *
  * There are several possible consumers:
  *
  * 1. The kernel registers interface pointer with bpfattach().
  * Each call allocates new bpf_if structure, references ifnet pointer
  * and links bpf_if into bpf_iflist chain. This is protected with global
  * lock.
  *
  * 2. An userland application uses ioctl() call to bpf_d descriptor.
  * All such call are serialized with global lock. BPF filters can be
  * changed, but pointer to old filter will be freed using NET_EPOCH_CALL().
  * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
  * filter pointers, even if change will happen during bpf_tap execution.
  * Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL().
  *
  * 3. An userland application can write packets into bpf_d descriptor.
  * There we need to be sure, that ifnet won't disappear during bpfwrite().
  *
  * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
  * bif_dlist is protected with net_epoch_preempt section. So, it should
  * be safe to make access to bpf_d descriptor inside the section.
  *
  * 5. The kernel invokes bpfdetach() on interface destroying. All lists
  * are modified with global lock held and actual free() is done using
  * NET_EPOCH_CALL().
  */
 
 static void
 bpfif_free(epoch_context_t ctx)
 {
 	struct bpf_if *bp;
 
 	bp = __containerof(ctx, struct bpf_if, epoch_ctx);
 	if_rele(bp->bif_ifp);
 	free(bp, M_BPF);
 }
 
 static void
 bpfif_ref(struct bpf_if *bp)
 {
 
 	refcount_acquire(&bp->bif_refcnt);
 }
 
 static void
 bpfif_rele(struct bpf_if *bp)
 {
 
 	if (!refcount_release(&bp->bif_refcnt))
 		return;
 	NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx);
 }
 
 static void
 bpfd_ref(struct bpf_d *d)
 {
 
 	refcount_acquire(&d->bd_refcnt);
 }
 
 static void
 bpfd_rele(struct bpf_d *d)
 {
 
 	if (!refcount_release(&d->bd_refcnt))
 		return;
 	NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx);
 }
 
 static struct bpf_program_buffer*
 bpf_program_buffer_alloc(size_t size, int flags)
 {
 
 	return (malloc(sizeof(struct bpf_program_buffer) + size,
 	    M_BPF, flags));
 }
 
 static void
 bpf_program_buffer_free(epoch_context_t ctx)
 {
 	struct bpf_program_buffer *ptr;
 
 	ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
 #ifdef BPF_JITTER
 	if (ptr->func != NULL)
 		bpf_destroy_jit_filter(ptr->func);
 #endif
 	free(ptr, M_BPF);
 }
 
 /*
  * Wrapper functions for various buffering methods.  If the set of buffer
  * modes expands, we will probably want to introduce a switch data structure
  * similar to protosw, et.
  */
 static void
 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
     u_int len)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
 
 	case BPF_BUFMODE_ZBUF:
 		counter_u64_add(d->bd_zcopy, 1);
 		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
 
 	default:
 		panic("bpf_buf_append_bytes");
 	}
 }
 
 static void
 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
     u_int len)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
 
 	case BPF_BUFMODE_ZBUF:
 		counter_u64_add(d->bd_zcopy, 1);
 		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
 
 	default:
 		panic("bpf_buf_append_mbuf");
 	}
 }
 
 /*
  * This function gets called when the free buffer is re-assigned.
  */
 static void
 bpf_buf_reclaimed(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return;
 
 	case BPF_BUFMODE_ZBUF:
 		bpf_zerocopy_buf_reclaimed(d);
 		return;
 
 	default:
 		panic("bpf_buf_reclaimed");
 	}
 }
 
 /*
  * If the buffer mechanism has a way to decide that a held buffer can be made
  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
  * returned if the buffer can be discarded, (0) is returned if it cannot.
  */
 static int
 bpf_canfreebuf(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		return (bpf_zerocopy_canfreebuf(d));
 	}
 	return (0);
 }
 
 /*
  * Allow the buffer model to indicate that the current store buffer is
  * immutable, regardless of the appearance of space.  Return (1) if the
  * buffer is writable, and (0) if not.
  */
 static int
 bpf_canwritebuf(struct bpf_d *d)
 {
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		return (bpf_zerocopy_canwritebuf(d));
 	}
 	return (1);
 }
 
 /*
  * Notify buffer model that an attempt to write to the store buffer has
  * resulted in a dropped packet, in which case the buffer may be considered
  * full.
  */
 static void
 bpf_buffull(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		bpf_zerocopy_buffull(d);
 		break;
 	}
 }
 
 /*
  * Notify the buffer model that a buffer has moved into the hold position.
  */
 void
 bpf_bufheld(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		bpf_zerocopy_bufheld(d);
 		break;
 	}
 }
 
 static void
 bpf_free(struct bpf_d *d)
 {
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return (bpf_buffer_free(d));
 
 	case BPF_BUFMODE_ZBUF:
 		return (bpf_zerocopy_free(d));
 
 	default:
 		panic("bpf_buf_free");
 	}
 }
 
 static int
 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
 		return (EOPNOTSUPP);
 	return (bpf_buffer_uiomove(d, buf, len, uio));
 }
 
 static int
 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
 		return (EOPNOTSUPP);
 	return (bpf_buffer_ioctl_sblen(d, i));
 }
 
 static int
 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 		return (EOPNOTSUPP);
 	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
 }
 
 static int
 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 		return (EOPNOTSUPP);
 	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
 }
 
 static int
 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 		return (EOPNOTSUPP);
 	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
 }
 
 /*
  * General BPF functions.
  */
 static int
 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
     struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
 {
 	const struct ieee80211_bpf_params *p;
 	struct ether_header *eh;
 	struct mbuf *m;
 	int error;
 	int len;
 	int hlen;
 	int slen;
 
 	/*
 	 * Build a sockaddr based on the data link layer type.
 	 * We do this at this level because the ethernet header
 	 * is copied directly into the data field of the sockaddr.
 	 * In the case of SLIP, there is no header and the packet
 	 * is forwarded as is.
 	 * Also, we are careful to leave room at the front of the mbuf
 	 * for the link level header.
 	 */
 	switch (linktype) {
 	case DLT_SLIP:
 		sockp->sa_family = AF_INET;
 		hlen = 0;
 		break;
 
 	case DLT_EN10MB:
 		sockp->sa_family = AF_UNSPEC;
 		/* XXX Would MAXLINKHDR be better? */
 		hlen = ETHER_HDR_LEN;
 		break;
 
 	case DLT_FDDI:
 		sockp->sa_family = AF_IMPLINK;
 		hlen = 0;
 		break;
 
 	case DLT_RAW:
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 0;
 		break;
 
 	case DLT_NULL:
 		/*
 		 * null interface types require a 4 byte pseudo header which
 		 * corresponds to the address family of the packet.
 		 */
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 4;
 		break;
 
 	case DLT_ATM_RFC1483:
 		/*
 		 * en atm driver requires 4-byte atm pseudo header.
 		 * though it isn't standard, vpi:vci needs to be
 		 * specified anyway.
 		 */
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 		break;
 
 	case DLT_PPP:
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 4;	/* This should match PPP_HDRLEN */
 		break;
 
 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
 		sockp->sa_family = AF_IEEE80211;
 		hlen = 0;
 		break;
 
 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
 		sockp->sa_family = AF_IEEE80211;
 		sockp->sa_len = 12;	/* XXX != 0 */
 		hlen = sizeof(struct ieee80211_bpf_params);
 		break;
 
 	default:
 		return (EIO);
 	}
 
 	len = uio->uio_resid;
 	if (len < hlen || len - hlen > ifp->if_mtu)
 		return (EMSGSIZE);
 
 	/* Allocate a mbuf for our write, since m_get2 fails if len >= to MJUMPAGESIZE, use m_getjcl for bigger buffers */
 	m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (EIO);
 	m->m_pkthdr.len = m->m_len = len;
 	*mp = m;
 
 	error = uiomove(mtod(m, u_char *), len, uio);
 	if (error)
 		goto bad;
 
 	slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
 	if (slen == 0) {
 		error = EPERM;
 		goto bad;
 	}
 
 	/* Check for multicast destination */
 	switch (linktype) {
 	case DLT_EN10MB:
 		eh = mtod(m, struct ether_header *);
 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
 			    ETHER_ADDR_LEN) == 0)
 				m->m_flags |= M_BCAST;
 			else
 				m->m_flags |= M_MCAST;
 		}
 		if (d->bd_hdrcmplt == 0) {
 			memcpy(eh->ether_shost, IF_LLADDR(ifp),
 			    sizeof(eh->ether_shost));
 		}
 		break;
 	}
 
 	/*
 	 * Make room for link header, and copy it to sockaddr
 	 */
 	if (hlen != 0) {
 		if (sockp->sa_family == AF_IEEE80211) {
 			/*
 			 * Collect true length from the parameter header
 			 * NB: sockp is known to be zero'd so if we do a
 			 *     short copy unspecified parameters will be
 			 *     zero.
 			 * NB: packet may not be aligned after stripping
 			 *     bpf params
 			 * XXX check ibp_vers
 			 */
 			p = mtod(m, const struct ieee80211_bpf_params *);
 			hlen = p->ibp_len;
 			if (hlen > sizeof(sockp->sa_data)) {
 				error = EINVAL;
 				goto bad;
 			}
 		}
 		bcopy(mtod(m, const void *), sockp->sa_data, hlen);
 	}
 	*hdrlen = hlen;
 
 	return (0);
 bad:
 	m_freem(m);
 	return (error);
 }
 
 /*
  * Attach descriptor to the bpf interface, i.e. make d listen on bp,
  * then reset its buffers and counters with reset_d().
  */
 static void
 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
 	int op_w;
 
 	BPF_LOCK_ASSERT();
 
 	/*
 	 * Save sysctl value to protect from sysctl change
 	 * between reads
 	 */
 	op_w = V_bpf_optimize_writers || d->bd_writer;
 
 	if (d->bd_bif != NULL)
 		bpf_detachd_locked(d, false);
 	/*
 	 * Point d at bp, and add d to the interface's list.
 	 * Since there are many applications using BPF for
 	 * sending raw packets only (dhcpd, cdpd are good examples)
 	 * we can delay adding d to the list of active listeners until
 	 * some filter is configured.
 	 */
 
 	BPFD_LOCK(d);
 	/*
 	 * Hold reference to bpif while descriptor uses this interface.
 	 */
 	bpfif_ref(bp);
 	d->bd_bif = bp;
 	if (op_w != 0) {
 		/* Add to writers-only list */
 		CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
 		/*
 		 * We decrement bd_writer on every filter set operation.
 		 * First BIOCSETF is done by pcap_open_live() to set up
 		 * snap length. After that appliation usually sets its own
 		 * filter.
 		 */
 		d->bd_writer = 2;
 	} else
 		CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 
 	reset_d(d);
 
 	/* Trigger EVFILT_WRITE events. */
 	bpf_wakeup(d);
 
 	BPFD_UNLOCK(d);
 	bpf_bpfd_cnt++;
 
 	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
 	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
 
 	if (op_w == 0)
 		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
 
 /*
  * Check if we need to upgrade our descriptor @d from write-only mode.
  */
 static int
 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
     int flen)
 {
 	int is_snap, need_upgrade;
 
 	/*
 	 * Check if we've already upgraded or new filter is empty.
 	 */
 	if (d->bd_writer == 0 || fcode == NULL)
 		return (0);
 
 	need_upgrade = 0;
 
 	/*
 	 * Check if cmd looks like snaplen setting from
 	 * pcap_bpf.c:pcap_open_live().
 	 * Note we're not checking .k value here:
 	 * while pcap_open_live() definitely sets to non-zero value,
 	 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
 	 * do not consider upgrading immediately
 	 */
 	if (cmd == BIOCSETF && flen == 1 &&
 	    fcode[0].code == (BPF_RET | BPF_K))
 		is_snap = 1;
 	else
 		is_snap = 0;
 
 	if (is_snap == 0) {
 		/*
 		 * We're setting first filter and it doesn't look like
 		 * setting snaplen.  We're probably using bpf directly.
 		 * Upgrade immediately.
 		 */
 		need_upgrade = 1;
 	} else {
 		/*
 		 * Do not require upgrade by first BIOCSETF
 		 * (used to set snaplen) by pcap_open_live().
 		 */
 
 		if (--d->bd_writer == 0) {
 			/*
 			 * First snaplen filter has already
 			 * been set. This is probably catch-all
 			 * filter
 			 */
 			need_upgrade = 1;
 		}
 	}
 
 	CTR5(KTR_NET,
 	    "%s: filter function set by pid %d, "
 	    "bd_writer counter %d, snap %d upgrade %d",
 	    __func__, d->bd_pid, d->bd_writer,
 	    is_snap, need_upgrade);
 
 	return (need_upgrade);
 }
 
 /*
  * Detach a file from its interface.
  */
 static void
 bpf_detachd(struct bpf_d *d)
 {
 	BPF_LOCK();
 	bpf_detachd_locked(d, false);
 	BPF_UNLOCK();
 }
 
 static void
 bpf_detachd_locked(struct bpf_d *d, bool detached_ifp)
 {
 	struct bpf_if *bp;
 	struct ifnet *ifp;
 	int error;
 
 	BPF_LOCK_ASSERT();
 	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
 
 	/* Check if descriptor is attached */
 	if ((bp = d->bd_bif) == NULL)
 		return;
 
 	BPFD_LOCK(d);
 	/* Remove d from the interface's descriptor list. */
 	CK_LIST_REMOVE(d, bd_next);
 	/* Save bd_writer value */
 	error = d->bd_writer;
 	ifp = bp->bif_ifp;
 	d->bd_bif = NULL;
 	if (detached_ifp) {
 		/*
 		 * Notify descriptor as it's detached, so that any
 		 * sleepers wake up and get ENXIO.
 		 */
 		bpf_wakeup(d);
 	}
 	BPFD_UNLOCK(d);
 	bpf_bpfd_cnt--;
 
 	/* Call event handler iff d is attached */
 	if (error == 0)
 		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
 
 	/*
 	 * Check if this descriptor had requested promiscuous mode.
 	 * If so and ifnet is not detached, turn it off.
 	 */
 	if (d->bd_promisc && !detached_ifp) {
 		d->bd_promisc = 0;
 		CURVNET_SET(ifp->if_vnet);
 		error = ifpromisc(ifp, 0);
 		CURVNET_RESTORE();
 		if (error != 0 && error != ENXIO) {
 			/*
 			 * ENXIO can happen if a pccard is unplugged
 			 * Something is really wrong if we were able to put
 			 * the driver into promiscuous mode, but can't
 			 * take it out.
 			 */
 			if_printf(bp->bif_ifp,
 				"bpf_detach: ifpromisc failed (%d)\n", error);
 		}
 	}
 	bpfif_rele(bp);
 }
 
 /*
  * Close the descriptor by detaching it from its interface,
  * deallocating its buffers, and marking it free.
  */
 static void
 bpf_dtor(void *data)
 {
 	struct bpf_d *d = data;
 
 	BPFD_LOCK(d);
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
 	BPFD_UNLOCK(d);
 	funsetown(&d->bd_sigio);
 	bpf_detachd(d);
 #ifdef MAC
 	mac_bpfdesc_destroy(d);
 #endif /* MAC */
 	seldrain(&d->bd_sel);
 	knlist_destroy(&d->bd_sel.si_note);
 	callout_drain(&d->bd_callout);
 	bpfd_rele(d);
 }
 
 /*
  * Open ethernet device.  Returns ENXIO for illegal minor device number,
  * EBUSY if file is open by another process.
  */
 /* ARGSUSED */
 static	int
 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d;
 	int error;
 
 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 	error = devfs_set_cdevpriv(d, bpf_dtor);
 	if (error != 0) {
 		free(d, M_BPF);
 		return (error);
 	}
 
 	/* Setup counters */
 	d->bd_rcount = counter_u64_alloc(M_WAITOK);
 	d->bd_dcount = counter_u64_alloc(M_WAITOK);
 	d->bd_fcount = counter_u64_alloc(M_WAITOK);
 	d->bd_wcount = counter_u64_alloc(M_WAITOK);
 	d->bd_wfcount = counter_u64_alloc(M_WAITOK);
 	d->bd_wdcount = counter_u64_alloc(M_WAITOK);
 	d->bd_zcopy = counter_u64_alloc(M_WAITOK);
 
 	/*
 	 * For historical reasons, perform a one-time initialization call to
 	 * the buffer routines, even though we're not yet committed to a
 	 * particular buffer method.
 	 */
 	bpf_buffer_init(d);
 	if ((flags & FREAD) == 0)
 		d->bd_writer = 2;
 	d->bd_hbuf_in_use = 0;
 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
 	d->bd_sig = SIGIO;
 	d->bd_direction = BPF_D_INOUT;
 	refcount_init(&d->bd_refcnt, 1);
 	BPF_PID_REFRESH(d, td);
 #ifdef MAC
 	mac_bpfdesc_init(d);
 	mac_bpfdesc_create(td->td_ucred, d);
 #endif
 	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
 	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
 
 	/* Disable VLAN pcp tagging. */
 	d->bd_pcp = 0;
 
 	return (0);
 }
 
 /*
  *  bpfread - read next chunk of packets from buffers
  */
 static	int
 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct bpf_d *d;
 	int error;
 	int non_block;
 	int timed_out;
 
 	error = devfs_get_cdevpriv((void **)&d);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Restrict application to use a buffer the same size as
 	 * as kernel buffers.
 	 */
 	if (uio->uio_resid != d->bd_bufsize)
 		return (EINVAL);
 
 	non_block = ((ioflag & O_NONBLOCK) != 0);
 
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH_CUR(d);
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
 		BPFD_UNLOCK(d);
 		return (EOPNOTSUPP);
 	}
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	timed_out = (d->bd_state == BPF_TIMED_OUT);
 	d->bd_state = BPF_IDLE;
 	while (d->bd_hbuf_in_use) {
 		error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
 		    PRINET|PCATCH, "bd_hbuf", 0);
 		if (error != 0) {
 			BPFD_UNLOCK(d);
 			return (error);
 		}
 	}
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
 	 * ends when the timeout expires or when enough packets
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == NULL) {
 		if (d->bd_slen != 0) {
 			/*
 			 * A packet(s) either arrived since the previous
 			 * read or arrived while we were asleep.
 			 */
 			if (d->bd_immediate || non_block || timed_out) {
 				/*
 				 * Rotate the buffers and return what's here
 				 * if we are in immediate mode, non-blocking
 				 * flag is set, or this descriptor timed out.
 				 */
 				ROTATE_BUFFERS(d);
 				break;
 			}
 		}
 
 		/*
 		 * No data is available, check to see if the bpf device
 		 * is still pointed at a real interface.  If not, return
 		 * ENXIO so that the userland process knows to rebind
 		 * it before using it again.
 		 */
 		if (d->bd_bif == NULL) {
 			BPFD_UNLOCK(d);
 			return (ENXIO);
 		}
 
 		if (non_block) {
 			BPFD_UNLOCK(d);
 			return (EWOULDBLOCK);
 		}
 		error = msleep(d, &d->bd_lock, PRINET|PCATCH,
 		     "bpf", d->bd_rtout);
 		if (error == EINTR || error == ERESTART) {
 			BPFD_UNLOCK(d);
 			return (error);
 		}
 		if (error == EWOULDBLOCK) {
 			/*
 			 * On a timeout, return what's in the buffer,
 			 * which may be nothing.  If there is something
 			 * in the store buffer, we can rotate the buffers.
 			 */
 			if (d->bd_hbuf)
 				/*
 				 * We filled up the buffer in between
 				 * getting the timeout and arriving
 				 * here, so we don't need to rotate.
 				 */
 				break;
 
 			if (d->bd_slen == 0) {
 				BPFD_UNLOCK(d);
 				return (0);
 			}
 			ROTATE_BUFFERS(d);
 			break;
 		}
 	}
 	/*
 	 * At this point, we know we have something in the hold slot.
 	 */
 	d->bd_hbuf_in_use = 1;
 	BPFD_UNLOCK(d);
 
 	/*
 	 * Move data from hold buffer into user space.
 	 * We know the entire buffer is transferred since
 	 * we checked above that the read buffer is bpf_bufsize bytes.
   	 *
 	 * We do not have to worry about simultaneous reads because
 	 * we waited for sole access to the hold buffer above.
 	 */
 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
 
 	BPFD_LOCK(d);
 	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
 	d->bd_fbuf = d->bd_hbuf;
 	d->bd_hbuf = NULL;
 	d->bd_hlen = 0;
 	bpf_buf_reclaimed(d);
 	d->bd_hbuf_in_use = 0;
 	wakeup(&d->bd_hbuf_in_use);
 	BPFD_UNLOCK(d);
 
 	return (error);
 }
 
 /*
  * If there are processes sleeping on this descriptor, wake them up.
  */
 static __inline void
 bpf_wakeup(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 	if (d->bd_state == BPF_WAITING) {
 		callout_stop(&d->bd_callout);
 		d->bd_state = BPF_IDLE;
 	}
 	wakeup(d);
 	if (d->bd_async && d->bd_sig && d->bd_sigio)
 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
 
 	selwakeuppri(&d->bd_sel, PRINET);
 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
 }
 
 static void
 bpf_timed_out(void *arg)
 {
 	struct bpf_d *d = (struct bpf_d *)arg;
 
 	BPFD_LOCK_ASSERT(d);
 
 	if (callout_pending(&d->bd_callout) ||
 	    !callout_active(&d->bd_callout))
 		return;
 	if (d->bd_state == BPF_WAITING) {
 		d->bd_state = BPF_TIMED_OUT;
 		if (d->bd_slen != 0)
 			bpf_wakeup(d);
 	}
 }
 
 static int
 bpf_ready(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
 		return (1);
 	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
 	    d->bd_slen != 0)
 		return (1);
 	return (0);
 }
 
 static int
 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct route ro;
 	struct sockaddr dst;
 	struct epoch_tracker et;
 	struct bpf_if *bp;
 	struct bpf_d *d;
 	struct ifnet *ifp;
 	struct mbuf *m, *mc;
 	int error, hlen;
 
 	error = devfs_get_cdevpriv((void **)&d);
 	if (error != 0)
 		return (error);
 
 	NET_EPOCH_ENTER(et);
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH_CUR(d);
 	counter_u64_add(d->bd_wcount, 1);
 	if ((bp = d->bd_bif) == NULL) {
 		error = ENXIO;
 		goto out_locked;
 	}
 
 	ifp = bp->bif_ifp;
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		error = ENETDOWN;
 		goto out_locked;
 	}
 
 	if (uio->uio_resid == 0)
 		goto out_locked;
 
 	bzero(&dst, sizeof(dst));
 	m = NULL;
 	hlen = 0;
 
 	/*
 	 * Take extra reference, unlock d and exit from epoch section,
 	 * since bpf_movein() can sleep.
 	 */
 	bpfd_ref(d);
 	NET_EPOCH_EXIT(et);
 	BPFD_UNLOCK(d);
 
 	error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
 	    &m, &dst, &hlen, d);
 
 	if (error != 0) {
 		counter_u64_add(d->bd_wdcount, 1);
 		bpfd_rele(d);
 		return (error);
 	}
 
 	BPFD_LOCK(d);
 	/*
 	 * Check that descriptor is still attached to the interface.
 	 * This can happen on bpfdetach(). To avoid access to detached
 	 * ifnet, free mbuf and return ENXIO.
 	 */
 	if (d->bd_bif == NULL) {
 		counter_u64_add(d->bd_wdcount, 1);
 		BPFD_UNLOCK(d);
 		bpfd_rele(d);
 		m_freem(m);
 		return (ENXIO);
 	}
 	counter_u64_add(d->bd_wfcount, 1);
 	if (d->bd_hdrcmplt)
 		dst.sa_family = pseudo_AF_HDRCMPLT;
 
 	if (d->bd_feedback) {
 		mc = m_dup(m, M_NOWAIT);
 		if (mc != NULL)
 			mc->m_pkthdr.rcvif = ifp;
 		/* Set M_PROMISC for outgoing packets to be discarded. */
 		if (d->bd_direction == BPF_D_INOUT)
 			m->m_flags |= M_PROMISC;
 	} else
 		mc = NULL;
 
 	m->m_pkthdr.len -= hlen;
 	m->m_len -= hlen;
 	m->m_data += hlen;	/* XXX */
 
 	CURVNET_SET(ifp->if_vnet);
 #ifdef MAC
 	mac_bpfdesc_create_mbuf(d, m);
 	if (mc != NULL)
 		mac_bpfdesc_create_mbuf(d, mc);
 #endif
 
 	bzero(&ro, sizeof(ro));
 	if (hlen != 0) {
 		ro.ro_prepend = (u_char *)&dst.sa_data;
 		ro.ro_plen = hlen;
 		ro.ro_flags = RT_HAS_HEADER;
 	}
 
 	if (d->bd_pcp != 0)
 		vlan_set_pcp(m, d->bd_pcp);
 
 	/* Avoid possible recursion on BPFD_LOCK(). */
 	NET_EPOCH_ENTER(et);
 	BPFD_UNLOCK(d);
 	error = (*ifp->if_output)(ifp, m, &dst, &ro);
 	if (error)
 		counter_u64_add(d->bd_wdcount, 1);
 
 	if (mc != NULL) {
 		if (error == 0)
 			(*ifp->if_input)(ifp, mc);
 		else
 			m_freem(mc);
 	}
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	bpfd_rele(d);
 	return (error);
 
 out_locked:
 	counter_u64_add(d->bd_wdcount, 1);
 	NET_EPOCH_EXIT(et);
 	BPFD_UNLOCK(d);
 	return (error);
 }
 
 /*
  * Reset a descriptor by flushing its packet buffer and clearing the receive
  * and drop counts.  This is doable for kernel-only buffers, but with
  * zero-copy buffers, we can't write to (or rotate) buffers that are
  * currently owned by userspace.  It would be nice if we could encapsulate
  * this logic in the buffer code rather than here.
  */
 static void
 reset_d(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	while (d->bd_hbuf_in_use)
 		mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
 		    "bd_hbuf", 0);
 	if ((d->bd_hbuf != NULL) &&
 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
 		/* Free the hold buffer. */
 		d->bd_fbuf = d->bd_hbuf;
 		d->bd_hbuf = NULL;
 		d->bd_hlen = 0;
 		bpf_buf_reclaimed(d);
 	}
 	if (bpf_canwritebuf(d))
 		d->bd_slen = 0;
 	counter_u64_zero(d->bd_rcount);
 	counter_u64_zero(d->bd_dcount);
 	counter_u64_zero(d->bd_fcount);
 	counter_u64_zero(d->bd_wcount);
 	counter_u64_zero(d->bd_wfcount);
 	counter_u64_zero(d->bd_wdcount);
 	counter_u64_zero(d->bd_zcopy);
 }
 
 /*
  *  FIONREAD		Check for read packet available.
  *  BIOCGBLEN		Get buffer len [for read()].
  *  BIOCSETF		Set read filter.
  *  BIOCSETFNR		Set read filter without resetting descriptor.
  *  BIOCSETWF		Set write filter.
  *  BIOCFLUSH		Flush read packet buffer.
  *  BIOCPROMISC		Put interface into promiscuous mode.
  *  BIOCGDLT		Get link layer type.
  *  BIOCGETIF		Get interface name.
  *  BIOCSETIF		Set interface.
  *  BIOCSRTIMEOUT	Set read timeout.
  *  BIOCGRTIMEOUT	Get read timeout.
  *  BIOCGSTATS		Get packet stats.
  *  BIOCIMMEDIATE	Set immediate mode.
  *  BIOCVERSION		Get filter language version.
  *  BIOCGHDRCMPLT	Get "header already complete" flag
  *  BIOCSHDRCMPLT	Set "header already complete" flag
  *  BIOCGDIRECTION	Get packet direction flag
  *  BIOCSDIRECTION	Set packet direction flag
  *  BIOCGTSTAMP		Get time stamp format and resolution.
  *  BIOCSTSTAMP		Set time stamp format and resolution.
  *  BIOCLOCK		Set "locked" flag
  *  BIOCFEEDBACK	Set packet feedback mode.
  *  BIOCSETZBUF		Set current zero-copy buffer locations.
  *  BIOCGETZMAX		Get maximum zero-copy buffer size.
  *  BIOCROTZBUF		Force rotation of zero-copy buffer
  *  BIOCSETBUFMODE	Set buffer mode.
  *  BIOCGETBUFMODE	Get current buffer mode.
  *  BIOCSETVLANPCP	Set VLAN PCP tag.
  */
 /* ARGSUSED */
 static	int
 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
     struct thread *td)
 {
 	struct bpf_d *d;
 	int error;
 
 	error = devfs_get_cdevpriv((void **)&d);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH(d, td);
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
 	BPFD_UNLOCK(d);
 
 	if (d->bd_locked == 1) {
 		switch (cmd) {
 		case BIOCGBLEN:
 		case BIOCFLUSH:
 		case BIOCGDLT:
 		case BIOCGDLTLIST:
 #ifdef COMPAT_FREEBSD32
 		case BIOCGDLTLIST32:
 #endif
 		case BIOCGETIF:
 		case BIOCGRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
 		case BIOCGRTIMEOUT32:
 #endif
 		case BIOCGSTATS:
 		case BIOCVERSION:
 		case BIOCGRSIG:
 		case BIOCGHDRCMPLT:
 		case BIOCSTSTAMP:
 		case BIOCFEEDBACK:
 		case FIONREAD:
 		case BIOCLOCK:
 		case BIOCSRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
 		case BIOCSRTIMEOUT32:
 #endif
 		case BIOCIMMEDIATE:
 		case TIOCGPGRP:
 		case BIOCROTZBUF:
 			break;
 		default:
 			return (EPERM);
 		}
 	}
 #ifdef COMPAT_FREEBSD32
 	/*
 	 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
 	 * that it will get 32-bit packet headers.
 	 */
 	switch (cmd) {
 	case BIOCSETF32:
 	case BIOCSETFNR32:
 	case BIOCSETWF32:
 	case BIOCGDLTLIST32:
 	case BIOCGRTIMEOUT32:
 	case BIOCSRTIMEOUT32:
 		if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 			BPFD_LOCK(d);
 			d->bd_compat32 = 1;
 			BPFD_UNLOCK(d);
 		}
 	}
 #endif
 
 	CURVNET_SET(TD_TO_VNET(td));
 	switch (cmd) {
 	default:
 		error = EINVAL;
 		break;
 
 	/*
 	 * Check for read packet available.
 	 */
 	case FIONREAD:
 		{
 			int n;
 
 			BPFD_LOCK(d);
 			n = d->bd_slen;
 			while (d->bd_hbuf_in_use)
 				mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
 				    PRINET, "bd_hbuf", 0);
 			if (d->bd_hbuf)
 				n += d->bd_hlen;
 			BPFD_UNLOCK(d);
 
 			*(int *)addr = n;
 			break;
 		}
 
 	/*
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_bufsize;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set buffer length.
 	 */
 	case BIOCSBLEN:
 		error = bpf_ioctl_sblen(d, (u_int *)addr);
 		break;
 
 	/*
 	 * Set link layer read filter.
 	 */
 	case BIOCSETF:
 	case BIOCSETFNR:
 	case BIOCSETWF:
 #ifdef COMPAT_FREEBSD32
 	case BIOCSETF32:
 	case BIOCSETFNR32:
 	case BIOCSETWF32:
 #endif
 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
 		break;
 
 	/*
 	 * Flush read packet buffer.
 	 */
 	case BIOCFLUSH:
 		BPFD_LOCK(d);
 		reset_d(d);
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Put interface into promiscuous mode.
 	 */
 	case BIOCPROMISC:
 		BPF_LOCK();
 		if (d->bd_bif == NULL) {
 			/*
 			 * No interface attached yet.
 			 */
 			error = EINVAL;
 		} else if (d->bd_promisc == 0) {
 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
 			if (error == 0)
 				d->bd_promisc = 1;
 		}
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get current data link type.
 	 */
 	case BIOCGDLT:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			*(u_int *)addr = d->bd_bif->bif_dlt;
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get a list of supported data link types.
 	 */
 #ifdef COMPAT_FREEBSD32
 	case BIOCGDLTLIST32:
 		{
 			struct bpf_dltlist32 *list32;
 			struct bpf_dltlist dltlist;
 
 			list32 = (struct bpf_dltlist32 *)addr;
 			dltlist.bfl_len = list32->bfl_len;
 			dltlist.bfl_list = PTRIN(list32->bfl_list);
 			BPF_LOCK();
 			if (d->bd_bif == NULL)
 				error = EINVAL;
 			else {
 				error = bpf_getdltlist(d, &dltlist);
 				if (error == 0)
 					list32->bfl_len = dltlist.bfl_len;
 			}
 			BPF_UNLOCK();
 			break;
 		}
 #endif
 
 	case BIOCGDLTLIST:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set data link type.
 	 */
 	case BIOCSDLT:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_setdlt(d, *(u_int *)addr);
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get interface name.
 	 */
 	case BIOCGETIF:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else {
 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
 			struct ifreq *const ifr = (struct ifreq *)addr;
 
 			strlcpy(ifr->ifr_name, ifp->if_xname,
 			    sizeof(ifr->ifr_name));
 		}
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
 		{
 			int alloc_buf, size;
 
 			/*
 			 * Behavior here depends on the buffering model.  If
 			 * we're using kernel memory buffers, then we can
 			 * allocate them here.  If we're using zero-copy,
 			 * then the user process must have registered buffers
 			 * by the time we get here.
 			 */
 			alloc_buf = 0;
 			BPFD_LOCK(d);
 			if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
 			    d->bd_sbuf == NULL)
 				alloc_buf = 1;
 			BPFD_UNLOCK(d);
 			if (alloc_buf) {
 				size = d->bd_bufsize;
 				error = bpf_buffer_ioctl_sblen(d, &size);
 				if (error != 0)
 					break;
 			}
 			BPF_LOCK();
 			error = bpf_setif(d, (struct ifreq *)addr);
 			BPF_UNLOCK();
 			break;
 		}
 
 	/*
 	 * Set read timeout.
 	 */
 	case BIOCSRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
 	case BIOCSRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv = (struct timeval *)addr;
 #if defined(COMPAT_FREEBSD32)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
 			if (cmd == BIOCSRTIMEOUT32) {
 				tv32 = (struct timeval32 *)addr;
 				tv = &tv64;
 				tv->tv_sec = tv32->tv_sec;
 				tv->tv_usec = tv32->tv_usec;
 			} else
 #endif
 				tv = (struct timeval *)addr;
 
 			/*
 			 * Subtract 1 tick from tvtohz() since this isn't
 			 * a one-shot timer.
 			 */
 			if ((error = itimerfix(tv)) == 0)
 				d->bd_rtout = tvtohz(tv) - 1;
 			break;
 		}
 
 	/*
 	 * Get read timeout.
 	 */
 	case BIOCGRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
 	case BIOCGRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv;
 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
 			if (cmd == BIOCGRTIMEOUT32)
 				tv = &tv64;
 			else
 #endif
 				tv = (struct timeval *)addr;
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
 			if (cmd == BIOCGRTIMEOUT32) {
 				tv32 = (struct timeval32 *)addr;
 				tv32->tv_sec = tv->tv_sec;
 				tv32->tv_usec = tv->tv_usec;
 			}
 #endif
 
 			break;
 		}
 
 	/*
 	 * Get packet stats.
 	 */
 	case BIOCGSTATS:
 		{
 			struct bpf_stat *bs = (struct bpf_stat *)addr;
 
 			/* XXXCSJP overflow */
 			bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount);
 			bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount);
 			break;
 		}
 
 	/*
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
 		BPFD_LOCK(d);
 		d->bd_immediate = *(u_int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCVERSION:
 		{
 			struct bpf_version *bv = (struct bpf_version *)addr;
 
 			bv->bv_major = BPF_MAJOR_VERSION;
 			bv->bv_minor = BPF_MINOR_VERSION;
 			break;
 		}
 
 	/*
 	 * Get "header already complete" flag
 	 */
 	case BIOCGHDRCMPLT:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_hdrcmplt;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set "header already complete" flag
 	 */
 	case BIOCSHDRCMPLT:
 		BPFD_LOCK(d);
 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Get packet direction flag
 	 */
 	case BIOCGDIRECTION:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_direction;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set packet direction flag
 	 */
 	case BIOCSDIRECTION:
 		{
 			u_int	direction;
 
 			direction = *(u_int *)addr;
 			switch (direction) {
 			case BPF_D_IN:
 			case BPF_D_INOUT:
 			case BPF_D_OUT:
 				BPFD_LOCK(d);
 				d->bd_direction = direction;
 				BPFD_UNLOCK(d);
 				break;
 			default:
 				error = EINVAL;
 			}
 		}
 		break;
 
 	/*
 	 * Get packet timestamp format and resolution.
 	 */
 	case BIOCGTSTAMP:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_tstamp;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set packet timestamp format and resolution.
 	 */
 	case BIOCSTSTAMP:
 		{
 			u_int	func;
 
 			func = *(u_int *)addr;
 			if (BPF_T_VALID(func))
 				d->bd_tstamp = func;
 			else
 				error = EINVAL;
 		}
 		break;
 
 	case BIOCFEEDBACK:
 		BPFD_LOCK(d);
 		d->bd_feedback = *(u_int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCLOCK:
 		BPFD_LOCK(d);
 		d->bd_locked = 1;
 		BPFD_UNLOCK(d);
 		break;
 
 	case FIONBIO:		/* Non-blocking I/O */
 		break;
 
 	case FIOASYNC:		/* Send signal on receive packets */
 		BPFD_LOCK(d);
 		d->bd_async = *(int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case FIOSETOWN:
 		/*
 		 * XXX: Add some sort of locking here?
 		 * fsetown() can sleep.
 		 */
 		error = fsetown(*(int *)addr, &d->bd_sigio);
 		break;
 
 	case FIOGETOWN:
 		BPFD_LOCK(d);
 		*(int *)addr = fgetown(&d->bd_sigio);
 		BPFD_UNLOCK(d);
 		break;
 
 	/* This is deprecated, FIOSETOWN should be used instead. */
 	case TIOCSPGRP:
 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
 		break;
 
 	/* This is deprecated, FIOGETOWN should be used instead. */
 	case TIOCGPGRP:
 		*(int *)addr = -fgetown(&d->bd_sigio);
 		break;
 
 	case BIOCSRSIG:		/* Set receive signal */
 		{
 			u_int sig;
 
 			sig = *(u_int *)addr;
 
 			if (sig >= NSIG)
 				error = EINVAL;
 			else {
 				BPFD_LOCK(d);
 				d->bd_sig = sig;
 				BPFD_UNLOCK(d);
 			}
 			break;
 		}
 	case BIOCGRSIG:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_sig;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCGETBUFMODE:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_bufmode;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCSETBUFMODE:
 		/*
 		 * Allow the buffering mode to be changed as long as we
 		 * haven't yet committed to a particular mode.  Our
 		 * definition of commitment, for now, is whether or not a
 		 * buffer has been allocated or an interface attached, since
 		 * that's the point where things get tricky.
 		 */
 		switch (*(u_int *)addr) {
 		case BPF_BUFMODE_BUFFER:
 			break;
 
 		case BPF_BUFMODE_ZBUF:
 			if (bpf_zerocopy_enable)
 				break;
 			/* FALLSTHROUGH */
 
 		default:
 			CURVNET_RESTORE();
 			return (EINVAL);
 		}
 
 		BPFD_LOCK(d);
 		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
 		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
 			BPFD_UNLOCK(d);
 			CURVNET_RESTORE();
 			return (EBUSY);
 		}
 		d->bd_bufmode = *(u_int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCGETZMAX:
 		error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
 		break;
 
 	case BIOCSETZBUF:
 		error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
 		break;
 
 	case BIOCROTZBUF:
 		error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
 		break;
 
 	case BIOCSETVLANPCP:
 		{
 			u_int pcp;
 
 			pcp = *(u_int *)addr;
 			if (pcp > BPF_PRIO_MAX || pcp < 0) {
 				error = EINVAL;
 				break;
 			}
 			d->bd_pcp = pcp;
 			break;
 		}
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Set d's packet filter program to fp. If this file already has a filter,
  * free it and replace it. Returns EINVAL for bogus requests.
  *
  * Note we use global lock here to serialize bpf_setf() and bpf_setif()
  * calls.
  */
 static int
 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 {
 #ifdef COMPAT_FREEBSD32
 	struct bpf_program fp_swab;
 	struct bpf_program32 *fp32;
 #endif
 	struct bpf_program_buffer *fcode;
 	struct bpf_insn *filter;
 #ifdef BPF_JITTER
 	bpf_jit_filter *jfunc;
 #endif
 	size_t size;
 	u_int flen;
 	bool track_event;
 
 #ifdef COMPAT_FREEBSD32
 	switch (cmd) {
 	case BIOCSETF32:
 	case BIOCSETWF32:
 	case BIOCSETFNR32:
 		fp32 = (struct bpf_program32 *)fp;
 		fp_swab.bf_len = fp32->bf_len;
 		fp_swab.bf_insns =
 		    (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
 		fp = &fp_swab;
 		switch (cmd) {
 		case BIOCSETF32:
 			cmd = BIOCSETF;
 			break;
 		case BIOCSETWF32:
 			cmd = BIOCSETWF;
 			break;
 		}
 		break;
 	}
 #endif
 
 	filter = NULL;
 #ifdef BPF_JITTER
 	jfunc = NULL;
 #endif
 	/*
 	 * Check new filter validness before acquiring any locks.
 	 * Allocate memory for new filter, if needed.
 	 */
 	flen = fp->bf_len;
 	if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
 		return (EINVAL);
 	size = flen * sizeof(*fp->bf_insns);
 	if (size > 0) {
 		/* We're setting up new filter. Copy and check actual data. */
 		fcode = bpf_program_buffer_alloc(size, M_WAITOK);
 		filter = (struct bpf_insn *)fcode->buffer;
 		if (copyin(fp->bf_insns, filter, size) != 0 ||
 		    !bpf_validate(filter, flen)) {
 			free(fcode, M_BPF);
 			return (EINVAL);
 		}
 #ifdef BPF_JITTER
 		if (cmd != BIOCSETWF) {
 			/*
 			 * Filter is copied inside fcode and is
 			 * perfectly valid.
 			 */
 			jfunc = bpf_jitter(filter, flen);
 		}
 #endif
 	}
 
 	track_event = false;
 	fcode = NULL;
 
 	BPF_LOCK();
 	BPFD_LOCK(d);
 	/* Set up new filter. */
 	if (cmd == BIOCSETWF) {
 		if (d->bd_wfilter != NULL) {
 			fcode = __containerof((void *)d->bd_wfilter,
 			    struct bpf_program_buffer, buffer);
 #ifdef BPF_JITTER
 			fcode->func = NULL;
 #endif
 		}
 		d->bd_wfilter = filter;
 	} else {
 		if (d->bd_rfilter != NULL) {
 			fcode = __containerof((void *)d->bd_rfilter,
 			    struct bpf_program_buffer, buffer);
 #ifdef BPF_JITTER
 			fcode->func = d->bd_bfilter;
 #endif
 		}
 		d->bd_rfilter = filter;
 #ifdef BPF_JITTER
 		d->bd_bfilter = jfunc;
 #endif
 		if (cmd == BIOCSETF)
 			reset_d(d);
 
 		if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
 			/*
 			 * Filter can be set several times without
 			 * specifying interface. In this case just mark d
 			 * as reader.
 			 */
 			d->bd_writer = 0;
 			if (d->bd_bif != NULL) {
 				/*
 				 * Remove descriptor from writers-only list
 				 * and add it to active readers list.
 				 */
 				CK_LIST_REMOVE(d, bd_next);
 				CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
 				    d, bd_next);
 				CTR2(KTR_NET,
 				    "%s: upgrade required by pid %d",
 				    __func__, d->bd_pid);
 				track_event = true;
 			}
 		}
 	}
 	BPFD_UNLOCK(d);
 
 	if (fcode != NULL)
 		NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx);
 
 	if (track_event)
 		EVENTHANDLER_INVOKE(bpf_track,
 		    d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
 
 	BPF_UNLOCK();
 	return (0);
 }
 
 /*
  * Detach a file from its current interface (if attached at all) and attach
  * to the interface indicated by the name stored in ifr.
  * Return an errno or 0.
  */
 static int
 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 {
 	struct bpf_if *bp;
 	struct ifnet *theywant;
 
 	BPF_LOCK_ASSERT();
 
 	theywant = ifunit(ifr->ifr_name);
 	if (theywant == NULL || theywant->if_bpf == NULL)
 		return (ENXIO);
 
 	bp = theywant->if_bpf;
 	/*
 	 * At this point, we expect the buffer is already allocated.  If not,
 	 * return an error.
 	 */
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 	case BPF_BUFMODE_ZBUF:
 		if (d->bd_sbuf == NULL)
 			return (EINVAL);
 		break;
 
 	default:
 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
 	}
 	if (bp != d->bd_bif)
 		bpf_attachd(d, bp);
 	else {
 		BPFD_LOCK(d);
 		reset_d(d);
 		BPFD_UNLOCK(d);
 	}
 	return (0);
 }
 
 /*
  * Support for select() and poll() system calls
  *
  * Return true iff the specific operation will not block indefinitely.
  * Otherwise, return false but make a note that a selwakeup() must be done.
  */
 static int
 bpfpoll(struct cdev *dev, int events, struct thread *td)
 {
 	struct bpf_d *d;
 	int revents;
 
 	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
 		return (events &
 		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
 
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
 	revents = events & (POLLOUT | POLLWRNORM);
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH(d, td);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (bpf_ready(d))
 			revents |= events & (POLLIN | POLLRDNORM);
 		else {
 			selrecord(td, &d->bd_sel);
 			/* Start the read timeout if necessary. */
 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 				callout_reset(&d->bd_callout, d->bd_rtout,
 				    bpf_timed_out, d);
 				d->bd_state = BPF_WAITING;
 			}
 		}
 	}
 	BPFD_UNLOCK(d);
 	return (revents);
 }
 
 /*
  * Support for kevent() system call.  Register EVFILT_READ filters and
  * reject all others.
  */
 int
 bpfkqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct bpf_d *d;
 
 	if (devfs_get_cdevpriv((void **)&d) != 0)
 		return (1);
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &bpfread_filtops;
 		break;
 
 	case EVFILT_WRITE:
 		kn->kn_fop = &bpfwrite_filtops;
 		break;
 
 	default:
 		return (1);
 	}
 
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH_CUR(d);
 	kn->kn_hook = d;
 	knlist_add(&d->bd_sel.si_note, kn, 1);
 	BPFD_UNLOCK(d);
 
 	return (0);
 }
 
 static void
 filt_bpfdetach(struct knote *kn)
 {
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 
 	knlist_remove(&d->bd_sel.si_note, kn, 0);
 }
 
 static int
 filt_bpfread(struct knote *kn, long hint)
 {
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 	int ready;
 
 	BPFD_LOCK_ASSERT(d);
 	ready = bpf_ready(d);
 	if (ready) {
 		kn->kn_data = d->bd_slen;
 		/*
 		 * Ignore the hold buffer if it is being copied to user space.
 		 */
 		if (!d->bd_hbuf_in_use && d->bd_hbuf)
 			kn->kn_data += d->bd_hlen;
 	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 		callout_reset(&d->bd_callout, d->bd_rtout,
 		    bpf_timed_out, d);
 		d->bd_state = BPF_WAITING;
 	}
 
 	return (ready);
 }
 
 static int
 filt_bpfwrite(struct knote *kn, long hint)
 {
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 
 	BPFD_LOCK_ASSERT(d);
 
 	if (d->bd_bif == NULL) {
 		kn->kn_data = 0;
 		return (0);
 	} else {
 		kn->kn_data = d->bd_bif->bif_ifp->if_mtu;
 		return (1);
 	}
 }
 
 #define	BPF_TSTAMP_NONE		0
 #define	BPF_TSTAMP_FAST		1
 #define	BPF_TSTAMP_NORMAL	2
 #define	BPF_TSTAMP_EXTERN	3
 
 static int
 bpf_ts_quality(int tstype)
 {
 
 	if (tstype == BPF_T_NONE)
 		return (BPF_TSTAMP_NONE);
 	if ((tstype & BPF_T_FAST) != 0)
 		return (BPF_TSTAMP_FAST);
 
 	return (BPF_TSTAMP_NORMAL);
 }
 
 static int
 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
 {
 	struct timespec ts;
 	struct m_tag *tag;
 	int quality;
 
 	quality = bpf_ts_quality(tstype);
 	if (quality == BPF_TSTAMP_NONE)
 		return (quality);
 
 	if (m != NULL) {
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | M_TSTMP)) {
 			mbuf_tstmp2timespec(m, &ts);
 			timespec2bintime(&ts, bt);
 			return (BPF_TSTAMP_EXTERN);
 		}
 		tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
 		if (tag != NULL) {
 			*bt = *(struct bintime *)(tag + 1);
 			return (BPF_TSTAMP_EXTERN);
 		}
 	}
 	if (quality == BPF_TSTAMP_NORMAL)
 		binuptime(bt);
 	else
 		getbinuptime(bt);
 
 	return (quality);
 }
 
 /*
  * Incoming linkage from device drivers.  Process the packet pkt, of length
  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
  * by each process' filter, and if accepted, stashed into the corresponding
  * buffer.
  */
 void
 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 {
 	struct epoch_tracker et;
 	struct bintime bt;
 	struct bpf_d *d;
 #ifdef BPF_JITTER
 	bpf_jit_filter *bf;
 #endif
 	u_int slen;
 	int gottime;
 
 	gottime = BPF_TSTAMP_NONE;
 	NET_EPOCH_ENTER(et);
 	CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		counter_u64_add(d->bd_rcount, 1);
 		/*
 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there
 		 * is no way for the caller to indiciate to us whether this
 		 * packet is inbound or outbound. In the bpf_mtap() routines,
 		 * we use the interface pointers on the mbuf to figure it out.
 		 */
 #ifdef BPF_JITTER
 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
 		if (bf != NULL)
 			slen = (*(bf->func))(pkt, pktlen, pktlen);
 		else
 #endif
 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
 		if (slen != 0) {
 			/*
 			 * Filter matches. Let's to acquire write lock.
 			 */
 			BPFD_LOCK(d);
 			counter_u64_add(d->bd_fcount, 1);
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp,
 				    NULL);
 #ifdef MAC
 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, pkt, pktlen, slen,
 				    bpf_append_bytes, &bt);
 			BPFD_UNLOCK(d);
 		}
 	}
 	NET_EPOCH_EXIT(et);
 }
 
 void
 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
 {
 	if (bpf_peers_present(ifp->if_bpf))
 		bpf_tap(ifp->if_bpf, pkt, pktlen);
 }
 
 #define	BPF_CHECK_DIRECTION(d, r, i)				\
 	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
 	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
 
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
  * Locking model is explained in bpf_tap().
  */
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct bintime bt;
 	struct bpf_d *d;
 #ifdef BPF_JITTER
 	bpf_jit_filter *bf;
 #endif
 	u_int pktlen, slen;
 	int gottime;
 
 	/* Skip outgoing duplicate packets. */
 	if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
 		m->m_flags &= ~M_PROMISC;
 		return;
 	}
 
 	pktlen = m_length(m, NULL);
 	gottime = BPF_TSTAMP_NONE;
 
 	NET_EPOCH_ENTER(et);
 	CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
 			continue;
 		counter_u64_add(d->bd_rcount, 1);
 #ifdef BPF_JITTER
 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
 		/* XXX We cannot handle multiple mbufs. */
 		if (bf != NULL && m->m_next == NULL)
 			slen = (*(bf->func))(mtod(m, u_char *), pktlen,
 			    pktlen);
 		else
 #endif
 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
 		if (slen != 0) {
 			BPFD_LOCK(d);
 
 			counter_u64_add(d->bd_fcount, 1);
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, (u_char *)m, pktlen, slen,
 				    bpf_append_mbuf, &bt);
 			BPFD_UNLOCK(d);
 		}
 	}
 	NET_EPOCH_EXIT(et);
 }
 
 void
 bpf_mtap_if(if_t ifp, struct mbuf *m)
 {
 	if (bpf_peers_present(ifp->if_bpf)) {
 		M_ASSERTVALID(m);
 		bpf_mtap(ifp->if_bpf, m);
 	}
 }
 
 /*
  * Incoming linkage from device drivers, when packet is in
  * an mbuf chain and to be prepended by a contiguous header.
  */
 void
 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct bintime bt;
 	struct mbuf mb;
 	struct bpf_d *d;
 	u_int pktlen, slen;
 	int gottime;
 
 	/* Skip outgoing duplicate packets. */
 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
 		m->m_flags &= ~M_PROMISC;
 		return;
 	}
 
 	pktlen = m_length(m, NULL);
 	/*
 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
 	 * Note that we cut corners here; we only setup what's
 	 * absolutely needed--this mbuf should never go anywhere else.
 	 */
 	mb.m_flags = 0;
 	mb.m_next = m;
 	mb.m_data = data;
 	mb.m_len = dlen;
 	pktlen += dlen;
 
 	gottime = BPF_TSTAMP_NONE;
 
 	NET_EPOCH_ENTER(et);
 	CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
 		counter_u64_add(d->bd_rcount, 1);
 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
 		if (slen != 0) {
 			BPFD_LOCK(d);
 
 			counter_u64_add(d->bd_fcount, 1);
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, (u_char *)&mb, pktlen, slen,
 				    bpf_append_mbuf, &bt);
 			BPFD_UNLOCK(d);
 		}
 	}
 	NET_EPOCH_EXIT(et);
 }
 
 void
 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
 {
 	if (bpf_peers_present(ifp->if_bpf)) {
 		M_ASSERTVALID(m);
 		bpf_mtap2(ifp->if_bpf, data, dlen, m);
 	}
 }
 
 #undef	BPF_CHECK_DIRECTION
 #undef	BPF_TSTAMP_NONE
 #undef	BPF_TSTAMP_FAST
 #undef	BPF_TSTAMP_NORMAL
 #undef	BPF_TSTAMP_EXTERN
 
 static int
 bpf_hdrlen(struct bpf_d *d)
 {
 	int hdrlen;
 
 	hdrlen = d->bd_bif->bif_hdrlen;
 #ifndef BURN_BRIDGES
 	if (d->bd_tstamp == BPF_T_NONE ||
 	    BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
 #ifdef COMPAT_FREEBSD32
 		if (d->bd_compat32)
 			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
 		else
 #endif
 			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
 	else
 #endif
 		hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
 #ifdef COMPAT_FREEBSD32
 	if (d->bd_compat32)
 		hdrlen = BPF_WORDALIGN32(hdrlen);
 	else
 #endif
 		hdrlen = BPF_WORDALIGN(hdrlen);
 
 	return (hdrlen - d->bd_bif->bif_hdrlen);
 }
 
 static void
 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
 {
 	struct bintime bt2, boottimebin;
 	struct timeval tsm;
 	struct timespec tsn;
 
 	if ((tstype & BPF_T_MONOTONIC) == 0) {
 		bt2 = *bt;
 		getboottimebin(&boottimebin);
 		bintime_add(&bt2, &boottimebin);
 		bt = &bt2;
 	}
 	switch (BPF_T_FORMAT(tstype)) {
 	case BPF_T_MICROTIME:
 		bintime2timeval(bt, &tsm);
 		ts->bt_sec = tsm.tv_sec;
 		ts->bt_frac = tsm.tv_usec;
 		break;
 	case BPF_T_NANOTIME:
 		bintime2timespec(bt, &tsn);
 		ts->bt_sec = tsn.tv_sec;
 		ts->bt_frac = tsn.tv_nsec;
 		break;
 	case BPF_T_BINTIME:
 		ts->bt_sec = bt->sec;
 		ts->bt_frac = bt->frac;
 		break;
 	}
 }
 
 /*
  * Move the packet data from interface memory (pkt) into the
  * store buffer.  "cpfn" is the routine called to do the actual data
  * transfer.  bcopy is passed in to copy contiguous chunks, while
  * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
  * pkt is really an mbuf.
  */
 static void
 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
     void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
     struct bintime *bt)
 {
 	static char zeroes[BPF_ALIGNMENT];
 	struct bpf_xhdr hdr;
 #ifndef BURN_BRIDGES
 	struct bpf_hdr hdr_old;
 #ifdef COMPAT_FREEBSD32
 	struct bpf_hdr32 hdr32_old;
 #endif
 #endif
 	int caplen, curlen, hdrlen, pad, totlen;
 	int do_wakeup = 0;
 	int do_timestamp;
 	int tstype;
 
 	BPFD_LOCK_ASSERT(d);
 	if (d->bd_bif == NULL) {
 		/* Descriptor was detached in concurrent thread */
 		counter_u64_add(d->bd_dcount, 1);
 		return;
 	}
 
 	/*
 	 * Detect whether user space has released a buffer back to us, and if
 	 * so, move it from being a hold buffer to a free buffer.  This may
 	 * not be the best place to do it (for example, we might only want to
 	 * run this check if we need the space), but for now it's a reliable
 	 * spot to do it.
 	 */
 	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
 		d->bd_fbuf = d->bd_hbuf;
 		d->bd_hbuf = NULL;
 		d->bd_hlen = 0;
 		bpf_buf_reclaimed(d);
 	}
 
 	/*
 	 * Figure out how many bytes to move.  If the packet is
 	 * greater or equal to the snapshot length, transfer that
 	 * much.  Otherwise, transfer the whole packet (unless
 	 * we hit the buffer size limit).
 	 */
 	hdrlen = bpf_hdrlen(d);
 	totlen = hdrlen + min(snaplen, pktlen);
 	if (totlen > d->bd_bufsize)
 		totlen = d->bd_bufsize;
 
 	/*
 	 * Round up the end of the previous packet to the next longword.
 	 *
 	 * Drop the packet if there's no room and no hope of room
 	 * If the packet would overflow the storage buffer or the storage
 	 * buffer is considered immutable by the buffer model, try to rotate
 	 * the buffer and wakeup pending processes.
 	 */
 #ifdef COMPAT_FREEBSD32
 	if (d->bd_compat32)
 		curlen = BPF_WORDALIGN32(d->bd_slen);
 	else
 #endif
 		curlen = BPF_WORDALIGN(d->bd_slen);
 	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
 		if (d->bd_fbuf == NULL) {
 			/*
 			 * There's no room in the store buffer, and no
 			 * prospect of room, so drop the packet.  Notify the
 			 * buffer model.
 			 */
 			bpf_buffull(d);
 			counter_u64_add(d->bd_dcount, 1);
 			return;
 		}
 		KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
 		ROTATE_BUFFERS(d);
 		do_wakeup = 1;
 		curlen = 0;
 	} else {
 		if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
 			/*
 			 * Immediate mode is set, or the read timeout has
 			 * already expired during a select call.  A packet
 			 * arrived, so the reader should be woken up.
 			 */
 			do_wakeup = 1;
 		}
 		pad = curlen - d->bd_slen;
 		KASSERT(pad >= 0 && pad <= sizeof(zeroes),
 		    ("%s: invalid pad byte count %d", __func__, pad));
 		if (pad > 0) {
 			/* Zero pad bytes. */
 			bpf_append_bytes(d, d->bd_sbuf, d->bd_slen, zeroes,
 			    pad);
 		}
 	}
 
 	caplen = totlen - hdrlen;
 	tstype = d->bd_tstamp;
 	do_timestamp = tstype != BPF_T_NONE;
 #ifndef BURN_BRIDGES
 	if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
 		struct bpf_ts ts;
 		if (do_timestamp)
 			bpf_bintime2ts(bt, &ts, tstype);
 #ifdef COMPAT_FREEBSD32
 		if (d->bd_compat32) {
 			bzero(&hdr32_old, sizeof(hdr32_old));
 			if (do_timestamp) {
 				hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
 				hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
 			}
 			hdr32_old.bh_datalen = pktlen;
 			hdr32_old.bh_hdrlen = hdrlen;
 			hdr32_old.bh_caplen = caplen;
 			bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
 			    sizeof(hdr32_old));
 			goto copy;
 		}
 #endif
 		bzero(&hdr_old, sizeof(hdr_old));
 		if (do_timestamp) {
 			hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
 			hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
 		}
 		hdr_old.bh_datalen = pktlen;
 		hdr_old.bh_hdrlen = hdrlen;
 		hdr_old.bh_caplen = caplen;
 		bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
 		    sizeof(hdr_old));
 		goto copy;
 	}
 #endif
 
 	/*
 	 * Append the bpf header.  Note we append the actual header size, but
 	 * move forward the length of the header plus padding.
 	 */
 	bzero(&hdr, sizeof(hdr));
 	if (do_timestamp)
 		bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
 	hdr.bh_datalen = pktlen;
 	hdr.bh_hdrlen = hdrlen;
 	hdr.bh_caplen = caplen;
 	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
 
 	/*
 	 * Copy the packet data into the store buffer and update its length.
 	 */
 #ifndef BURN_BRIDGES
 copy:
 #endif
 	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
 	d->bd_slen = curlen + totlen;
 
 	if (do_wakeup)
 		bpf_wakeup(d);
 }
 
 /*
  * Free buffers currently in use by a descriptor.
  * Called on close.
  */
 static void
 bpfd_free(epoch_context_t ctx)
 {
 	struct bpf_d *d;
 	struct bpf_program_buffer *p;
 
 	/*
 	 * We don't need to lock out interrupts since this descriptor has
 	 * been detached from its interface and it yet hasn't been marked
 	 * free.
 	 */
 	d = __containerof(ctx, struct bpf_d, epoch_ctx);
 	bpf_free(d);
 	if (d->bd_rfilter != NULL) {
 		p = __containerof((void *)d->bd_rfilter,
 		    struct bpf_program_buffer, buffer);
 #ifdef BPF_JITTER
 		p->func = d->bd_bfilter;
 #endif
 		bpf_program_buffer_free(&p->epoch_ctx);
 	}
 	if (d->bd_wfilter != NULL) {
 		p = __containerof((void *)d->bd_wfilter,
 		    struct bpf_program_buffer, buffer);
 #ifdef BPF_JITTER
 		p->func = NULL;
 #endif
 		bpf_program_buffer_free(&p->epoch_ctx);
 	}
 
 	mtx_destroy(&d->bd_lock);
 	counter_u64_free(d->bd_rcount);
 	counter_u64_free(d->bd_dcount);
 	counter_u64_free(d->bd_fcount);
 	counter_u64_free(d->bd_wcount);
 	counter_u64_free(d->bd_wfcount);
 	counter_u64_free(d->bd_wdcount);
 	counter_u64_free(d->bd_zcopy);
 	free(d, M_BPF);
 }
 
 /*
  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
  * fixed size of the link header (variable length headers not yet supported).
  */
 void
 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 {
 
 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
 }
 
 /*
  * Attach an interface to bpf.  ifp is a pointer to the structure
  * defining the interface to be attached, dlt is the link layer type,
  * and hdrlen is the fixed size of the link header (variable length
  * headers are not yet supporrted).
  */
 void
 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
     struct bpf_if **driverp)
 {
 	struct bpf_if *bp;
 
 	KASSERT(*driverp == NULL,
 	    ("bpfattach2: driverp already initialized"));
 
 	bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
 
 	CK_LIST_INIT(&bp->bif_dlist);
 	CK_LIST_INIT(&bp->bif_wlist);
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
 	bp->bif_hdrlen = hdrlen;
 	bp->bif_bpf = driverp;
 	refcount_init(&bp->bif_refcnt, 1);
 	*driverp = bp;
 	/*
 	 * Reference ifnet pointer, so it won't freed until
 	 * we release it.
 	 */
 	if_ref(ifp);
 	BPF_LOCK();
 	CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
 	BPF_UNLOCK();
 
 	if (bootverbose && IS_DEFAULT_VNET(curvnet))
 		if_printf(ifp, "bpf attached\n");
 }
 
 #ifdef VIMAGE
 /*
  * When moving interfaces between vnet instances we need a way to
  * query the dlt and hdrlen before detach so we can re-attch the if_bpf
  * after the vmove.  We unfortunately have no device driver infrastructure
  * to query the interface for these values after creation/attach, thus
  * add this as a workaround.
  */
 int
 bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
 {
 
 	if (bp == NULL)
 		return (ENXIO);
 	if (bif_dlt == NULL && bif_hdrlen == NULL)
 		return (0);
 
 	if (bif_dlt != NULL)
 		*bif_dlt = bp->bif_dlt;
 	if (bif_hdrlen != NULL)
 		*bif_hdrlen = bp->bif_hdrlen;
 
 	return (0);
 }
 #endif
 
 /*
  * Detach bpf from an interface. This involves detaching each descriptor
  * associated with the interface. Notify each descriptor as it's detached
  * so that any sleepers wake up and get ENXIO.
  */
 void
 bpfdetach(struct ifnet *ifp)
 {
 	struct bpf_if *bp, *bp_temp;
 	struct bpf_d *d;
 
 	BPF_LOCK();
 	/* Find all bpf_if struct's which reference ifp and detach them. */
 	CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
 		if (ifp != bp->bif_ifp)
 			continue;
 
 		CK_LIST_REMOVE(bp, bif_next);
 		*bp->bif_bpf = (struct bpf_if *)&dead_bpf_if;
 
 		CTR4(KTR_NET,
 		    "%s: sheduling free for encap %d (%p) for if %p",
 		    __func__, bp->bif_dlt, bp, ifp);
 
 		/* Detach common descriptors */
 		while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
 			bpf_detachd_locked(d, true);
 		}
 
 		/* Detach writer-only descriptors */
 		while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
 			bpf_detachd_locked(d, true);
 		}
 		bpfif_rele(bp);
 	}
 	BPF_UNLOCK();
 }
 
+bool
+bpf_peers_present_if(struct ifnet *ifp)
+{
+	struct bpf_if *bp = ifp->if_bpf;
+
+	return (bpf_peers_present(bp) > 0);
+}
+
 /*
  * Get a list of available data link type of the interface.
  */
 static int
 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 {
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 	u_int *lst;
 	int error, n, n1;
 
 	BPF_LOCK_ASSERT();
 
 	ifp = d->bd_bif->bif_ifp;
 	n1 = 0;
 	CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp == ifp)
 			n1++;
 	}
 	if (bfl->bfl_list == NULL) {
 		bfl->bfl_len = n1;
 		return (0);
 	}
 	if (n1 > bfl->bfl_len)
 		return (ENOMEM);
 
 	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
 	n = 0;
 	CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp != ifp)
 			continue;
 		lst[n++] = bp->bif_dlt;
 	}
 	error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
 	free(lst, M_TEMP);
 	bfl->bfl_len = n;
 	return (error);
 }
 
 /*
  * Set the data link type of a BPF instance.
  */
 static int
 bpf_setdlt(struct bpf_d *d, u_int dlt)
 {
 	int error, opromisc;
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 
 	BPF_LOCK_ASSERT();
 	MPASS(d->bd_bif != NULL);
 
 	/*
 	 * It is safe to check bd_bif without BPFD_LOCK, it can not be
 	 * changed while we hold global lock.
 	 */
 	if (d->bd_bif->bif_dlt == dlt)
 		return (0);
 
 	ifp = d->bd_bif->bif_ifp;
 	CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
 			break;
 	}
 	if (bp == NULL)
 		return (EINVAL);
 
 	opromisc = d->bd_promisc;
 	bpf_attachd(d, bp);
 	if (opromisc) {
 		error = ifpromisc(bp->bif_ifp, 1);
 		if (error)
 			if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n",
 			    __func__, error);
 		else
 			d->bd_promisc = 1;
 	}
 	return (0);
 }
 
 static void
 bpf_drvinit(void *unused)
 {
 	struct cdev *dev;
 
 	sx_init(&bpf_sx, "bpf global lock");
 	CK_LIST_INIT(&bpf_iflist);
 
 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
 	/* For compatibility */
 	make_dev_alias(dev, "bpf0");
 }
 
 /*
  * Zero out the various packet counters associated with all of the bpf
  * descriptors.  At some point, we will probably want to get a bit more
  * granular and allow the user to specify descriptors to be zeroed.
  */
 static void
 bpf_zero_counters(void)
 {
 	struct bpf_if *bp;
 	struct bpf_d *bd;
 
 	BPF_LOCK();
 	/*
 	 * We are protected by global lock here, interfaces and
 	 * descriptors can not be deleted while we hold it.
 	 */
 	CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			counter_u64_zero(bd->bd_rcount);
 			counter_u64_zero(bd->bd_dcount);
 			counter_u64_zero(bd->bd_fcount);
 			counter_u64_zero(bd->bd_wcount);
 			counter_u64_zero(bd->bd_wfcount);
 			counter_u64_zero(bd->bd_zcopy);
 		}
 	}
 	BPF_UNLOCK();
 }
 
 /*
  * Fill filter statistics
  */
 static void
 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 {
 
 	BPF_LOCK_ASSERT();
 	bzero(d, sizeof(*d));
 	d->bd_structsize = sizeof(*d);
 	d->bd_immediate = bd->bd_immediate;
 	d->bd_promisc = bd->bd_promisc;
 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
 	d->bd_direction = bd->bd_direction;
 	d->bd_feedback = bd->bd_feedback;
 	d->bd_async = bd->bd_async;
 	d->bd_rcount = counter_u64_fetch(bd->bd_rcount);
 	d->bd_dcount = counter_u64_fetch(bd->bd_dcount);
 	d->bd_fcount = counter_u64_fetch(bd->bd_fcount);
 	d->bd_sig = bd->bd_sig;
 	d->bd_slen = bd->bd_slen;
 	d->bd_hlen = bd->bd_hlen;
 	d->bd_bufsize = bd->bd_bufsize;
 	d->bd_pid = bd->bd_pid;
 	strlcpy(d->bd_ifname,
 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
 	d->bd_locked = bd->bd_locked;
 	d->bd_wcount = counter_u64_fetch(bd->bd_wcount);
 	d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount);
 	d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount);
 	d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy);
 	d->bd_bufmode = bd->bd_bufmode;
 }
 
 /*
  * Handle `netstat -B' stats request
  */
 static int
 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	static const struct xbpf_d zerostats;
 	struct xbpf_d *xbdbuf, *xbd, tempstats;
 	int index, error;
 	struct bpf_if *bp;
 	struct bpf_d *bd;
 
 	/*
 	 * XXX This is not technically correct. It is possible for non
 	 * privileged users to open bpf devices. It would make sense
 	 * if the users who opened the devices were able to retrieve
 	 * the statistics for them, too.
 	 */
 	error = priv_check(req->td, PRIV_NET_BPF);
 	if (error)
 		return (error);
 	/*
 	 * Check to see if the user is requesting that the counters be
 	 * zeroed out.  Explicitly check that the supplied data is zeroed,
 	 * as we aren't allowing the user to set the counters currently.
 	 */
 	if (req->newptr != NULL) {
 		if (req->newlen != sizeof(tempstats))
 			return (EINVAL);
 		memset(&tempstats, 0, sizeof(tempstats));
 		error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
 		if (error)
 			return (error);
 		if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
 			return (EINVAL);
 		bpf_zero_counters();
 		return (0);
 	}
 	if (req->oldptr == NULL)
 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
 	if (bpf_bpfd_cnt == 0)
 		return (SYSCTL_OUT(req, 0, 0));
 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
 	BPF_LOCK();
 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
 		BPF_UNLOCK();
 		free(xbdbuf, M_BPF);
 		return (ENOMEM);
 	}
 	index = 0;
 	CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		/* Send writers-only first */
 		CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
 			xbd = &xbdbuf[index++];
 			bpfstats_fill_xbpf(xbd, bd);
 		}
 		CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			xbd = &xbdbuf[index++];
 			bpfstats_fill_xbpf(xbd, bd);
 		}
 	}
 	BPF_UNLOCK();
 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
 	free(xbdbuf, M_BPF);
 	return (error);
 }
 
 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
 
 #else /* !DEV_BPF && !NETGRAPH_BPF */
 
 /*
  * NOP stubs to allow bpf-using drivers to load and function.
  *
  * A 'better' implementation would allow the core bpf functionality
  * to be loaded at runtime.
  */
 
 void
 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 {
 }
 
 void
 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
 {
 }
 
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 {
 }
 
 void
 bpf_mtap_if(if_t ifp, struct mbuf *m)
 {
 }
 
 void
 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
 {
 }
 
 void
 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
 {
 }
 
 void
 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 {
 
 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
 }
 
 void
 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 {
 
 	*driverp = (struct bpf_if *)&dead_bpf_if;
 }
 
 void
 bpfdetach(struct ifnet *ifp)
 {
 }
 
+bool
+bpf_peers_present_if(struct ifnet *ifp)
+{
+	return (false);
+}
+
 u_int
 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 {
 	return -1;	/* "no filter" behaviour */
 }
 
 int
 bpf_validate(const struct bpf_insn *f, int len)
 {
 	return 0;		/* false */
 }
 
 #endif /* !DEV_BPF && !NETGRAPH_BPF */
 
 #ifdef DDB
 static void
 bpf_show_bpf_if(struct bpf_if *bpf_if)
 {
 
 	if (bpf_if == NULL)
 		return;
 	db_printf("%p:\n", bpf_if);
 #define	BPF_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, bpf_if->e);
 #define	BPF_DB_PRINTF_RAW(f, e)	db_printf("   %s = " f "\n", #e, e);
 	/* bif_ext.bif_next */
 	/* bif_ext.bif_dlist */
 	BPF_DB_PRINTF("%#x", bif_dlt);
 	BPF_DB_PRINTF("%u", bif_hdrlen);
 	/* bif_wlist */
 	BPF_DB_PRINTF("%p", bif_ifp);
 	BPF_DB_PRINTF("%p", bif_bpf);
 	BPF_DB_PRINTF_RAW("%u", refcount_load(&bpf_if->bif_refcnt));
 }
 
 DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show bpf_if <struct bpf_if *>\n");
 		return;
 	}
 
 	bpf_show_bpf_if((struct bpf_if *)addr);
 }
 #endif
diff --git a/sys/net/bpf.h b/sys/net/bpf.h
index 924dea5fc9f4..31968445aac1 100644
--- a/sys/net/bpf.h
+++ b/sys/net/bpf.h
@@ -1,467 +1,468 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)bpf.h	8.1 (Berkeley) 6/10/93
  *	@(#)bpf.h	1.34 (LBL)     6/16/96
  */
 
 #ifndef _NET_BPF_H_
 #define _NET_BPF_H_
 
 #include <sys/_eventhandler.h>
 #include <sys/ck.h>
 #include <net/dlt.h>
 
 /* BSD style release date */
 #define	BPF_RELEASE 199606
 
 typedef	int32_t	  bpf_int32;
 typedef	u_int32_t bpf_u_int32;
 typedef	int64_t	  bpf_int64;
 typedef	u_int64_t bpf_u_int64;
 struct ifnet;
 
 /*
  * Alignment macros.  BPF_WORDALIGN rounds up to the next multiple of
  * BPF_ALIGNMENT.
  */
 #define BPF_ALIGNMENT sizeof(long)
 #define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
 
 #define BPF_MAXINSNS 512
 #define BPF_MAXBUFSIZE 0x80000
 #define BPF_MINBUFSIZE 32
 
 /*
  *  Structure for BIOCSETF.
  */
 struct bpf_program {
 	u_int bf_len;
 	struct bpf_insn *bf_insns;
 };
 
 /*
  * Struct returned by BIOCGSTATS.
  */
 struct bpf_stat {
 	u_int bs_recv;		/* number of packets received */
 	u_int bs_drop;		/* number of packets dropped */
 };
 
 /*
  * Struct return by BIOCVERSION.  This represents the version number of
  * the filter language described by the instruction encodings below.
  * bpf understands a program iff kernel_major == filter_major &&
  * kernel_minor >= filter_minor, that is, if the value returned by the
  * running kernel has the same major number and a minor number equal
  * equal to or less than the filter being downloaded.  Otherwise, the
  * results are undefined, meaning an error may be returned or packets
  * may be accepted haphazardly.
  * It has nothing to do with the source code version.
  */
 struct bpf_version {
 	u_short bv_major;
 	u_short bv_minor;
 };
 /* Current version number of filter architecture. */
 #define BPF_MAJOR_VERSION 1
 #define BPF_MINOR_VERSION 1
 
 /*
  * Historically, BPF has supported a single buffering model, first using mbuf
  * clusters in kernel, and later using malloc(9) buffers in kernel.  We now
  * support multiple buffering modes, which may be queried and set using
  * BIOCGETBUFMODE and BIOCSETBUFMODE.  So as to avoid handling the complexity
  * of changing modes while sniffing packets, the mode becomes fixed once an
  * interface has been attached to the BPF descriptor.
  */
 #define	BPF_BUFMODE_BUFFER	1	/* Kernel buffers with read(). */
 #define	BPF_BUFMODE_ZBUF	2	/* Zero-copy buffers. */
 
 /*-
  * Struct used by BIOCSETZBUF, BIOCROTZBUF: describes up to two zero-copy
  * buffer as used by BPF.
  */
 struct bpf_zbuf {
 	void	*bz_bufa;	/* Location of 'a' zero-copy buffer. */
 	void	*bz_bufb;	/* Location of 'b' zero-copy buffer. */
 	size_t	 bz_buflen;	/* Size of zero-copy buffers. */
 };
 
 #define	BIOCGBLEN	_IOR('B', 102, u_int)
 #define	BIOCSBLEN	_IOWR('B', 102, u_int)
 #define	BIOCSETF	_IOW('B', 103, struct bpf_program)
 #define	BIOCFLUSH	_IO('B', 104)
 #define	BIOCPROMISC	_IO('B', 105)
 #define	BIOCGDLT	_IOR('B', 106, u_int)
 #define	BIOCGETIF	_IOR('B', 107, struct ifreq)
 #define	BIOCSETIF	_IOW('B', 108, struct ifreq)
 #define	BIOCSRTIMEOUT	_IOW('B', 109, struct timeval)
 #define	BIOCGRTIMEOUT	_IOR('B', 110, struct timeval)
 #define	BIOCGSTATS	_IOR('B', 111, struct bpf_stat)
 #define	BIOCIMMEDIATE	_IOW('B', 112, u_int)
 #define	BIOCVERSION	_IOR('B', 113, struct bpf_version)
 #define	BIOCGRSIG	_IOR('B', 114, u_int)
 #define	BIOCSRSIG	_IOW('B', 115, u_int)
 #define	BIOCGHDRCMPLT	_IOR('B', 116, u_int)
 #define	BIOCSHDRCMPLT	_IOW('B', 117, u_int)
 #define	BIOCGDIRECTION	_IOR('B', 118, u_int)
 #define	BIOCSDIRECTION	_IOW('B', 119, u_int)
 #define	BIOCSDLT	_IOW('B', 120, u_int)
 #define	BIOCGDLTLIST	_IOWR('B', 121, struct bpf_dltlist)
 #define	BIOCLOCK	_IO('B', 122)
 #define	BIOCSETWF	_IOW('B', 123, struct bpf_program)
 #define	BIOCFEEDBACK	_IOW('B', 124, u_int)
 #define	BIOCGETBUFMODE	_IOR('B', 125, u_int)
 #define	BIOCSETBUFMODE	_IOW('B', 126, u_int)
 #define	BIOCGETZMAX	_IOR('B', 127, size_t)
 #define	BIOCROTZBUF	_IOR('B', 128, struct bpf_zbuf)
 #define	BIOCSETZBUF	_IOW('B', 129, struct bpf_zbuf)
 #define	BIOCSETFNR	_IOW('B', 130, struct bpf_program)
 #define	BIOCGTSTAMP	_IOR('B', 131, u_int)
 #define	BIOCSTSTAMP	_IOW('B', 132, u_int)
 #define	BIOCSETVLANPCP	_IOW('B', 133, u_int)
 
 /* Obsolete */
 #define	BIOCGSEESENT	BIOCGDIRECTION
 #define	BIOCSSEESENT	BIOCSDIRECTION
 
 /* Packet directions */
 enum bpf_direction {
 	BPF_D_IN,	/* See incoming packets */
 	BPF_D_INOUT,	/* See incoming and outgoing packets */
 	BPF_D_OUT	/* See outgoing packets */
 };
 
 /* Time stamping functions */
 #define	BPF_T_MICROTIME		0x0000
 #define	BPF_T_NANOTIME		0x0001
 #define	BPF_T_BINTIME		0x0002
 #define	BPF_T_NONE		0x0003
 #define	BPF_T_FORMAT_MASK	0x0003
 #define	BPF_T_NORMAL		0x0000
 #define	BPF_T_FAST		0x0100
 #define	BPF_T_MONOTONIC		0x0200
 #define	BPF_T_MONOTONIC_FAST	(BPF_T_FAST | BPF_T_MONOTONIC)
 #define	BPF_T_FLAG_MASK		0x0300
 #define	BPF_T_FORMAT(t)		((t) & BPF_T_FORMAT_MASK)
 #define	BPF_T_FLAG(t)		((t) & BPF_T_FLAG_MASK)
 #define	BPF_T_VALID(t)						\
     ((t) == BPF_T_NONE || (BPF_T_FORMAT(t) != BPF_T_NONE &&	\
     ((t) & ~(BPF_T_FORMAT_MASK | BPF_T_FLAG_MASK)) == 0))
 
 #define	BPF_T_MICROTIME_FAST		(BPF_T_MICROTIME | BPF_T_FAST)
 #define	BPF_T_NANOTIME_FAST		(BPF_T_NANOTIME | BPF_T_FAST)
 #define	BPF_T_BINTIME_FAST		(BPF_T_BINTIME | BPF_T_FAST)
 #define	BPF_T_MICROTIME_MONOTONIC	(BPF_T_MICROTIME | BPF_T_MONOTONIC)
 #define	BPF_T_NANOTIME_MONOTONIC	(BPF_T_NANOTIME | BPF_T_MONOTONIC)
 #define	BPF_T_BINTIME_MONOTONIC		(BPF_T_BINTIME | BPF_T_MONOTONIC)
 #define	BPF_T_MICROTIME_MONOTONIC_FAST	(BPF_T_MICROTIME | BPF_T_MONOTONIC_FAST)
 #define	BPF_T_NANOTIME_MONOTONIC_FAST	(BPF_T_NANOTIME | BPF_T_MONOTONIC_FAST)
 #define	BPF_T_BINTIME_MONOTONIC_FAST	(BPF_T_BINTIME | BPF_T_MONOTONIC_FAST)
 
 /*
  * Structure prepended to each packet.
  */
 struct bpf_ts {
 	bpf_int64	bt_sec;		/* seconds */
 	bpf_u_int64	bt_frac;	/* fraction */
 };
 struct bpf_xhdr {
 	struct bpf_ts	bh_tstamp;	/* time stamp */
 	bpf_u_int32	bh_caplen;	/* length of captured portion */
 	bpf_u_int32	bh_datalen;	/* original length of packet */
 	u_short		bh_hdrlen;	/* length of bpf header (this struct
 					   plus alignment padding) */
 };
 /* Obsolete */
 struct bpf_hdr {
 	struct timeval	bh_tstamp;	/* time stamp */
 	bpf_u_int32	bh_caplen;	/* length of captured portion */
 	bpf_u_int32	bh_datalen;	/* original length of packet */
 	u_short		bh_hdrlen;	/* length of bpf header (this struct
 					   plus alignment padding) */
 };
 #ifdef _KERNEL
 #define	MTAG_BPF		0x627066
 #define	MTAG_BPF_TIMESTAMP	0
 #endif
 
 /*
  * When using zero-copy BPF buffers, a shared memory header is present
  * allowing the kernel BPF implementation and user process to synchronize
  * without using system calls.  This structure defines that header.  When
  * accessing these fields, appropriate atomic operation and memory barriers
  * are required in order not to see stale or out-of-order data; see bpf(4)
  * for reference code to access these fields from userspace.
  *
  * The layout of this structure is critical, and must not be changed; if must
  * fit in a single page on all architectures.
  */
 struct bpf_zbuf_header {
 	volatile u_int	bzh_kernel_gen;	/* Kernel generation number. */
 	volatile u_int	bzh_kernel_len;	/* Length of data in the buffer. */
 	volatile u_int	bzh_user_gen;	/* User generation number. */
 	u_int _bzh_pad[5];
 };
 
 /*
  * The instruction encodings.
  *
  * Please inform tcpdump-workers@lists.tcpdump.org if you use any
  * of the reserved values, so that we can note that they're used
  * (and perhaps implement it in the reference BPF implementation
  * and encourage its implementation elsewhere).
  */
 
 /*
  * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
  */
 
 /* instruction classes */
 #define BPF_CLASS(code) ((code) & 0x07)
 #define		BPF_LD		0x00
 #define		BPF_LDX		0x01
 #define		BPF_ST		0x02
 #define		BPF_STX		0x03
 #define		BPF_ALU		0x04
 #define		BPF_JMP		0x05
 #define		BPF_RET		0x06
 #define		BPF_MISC	0x07
 
 /* ld/ldx fields */
 #define BPF_SIZE(code)	((code) & 0x18)
 #define		BPF_W		0x00
 #define		BPF_H		0x08
 #define		BPF_B		0x10
 /*				0x18	reserved; used by BSD/OS */
 #define BPF_MODE(code)	((code) & 0xe0)
 #define		BPF_IMM 	0x00
 #define		BPF_ABS		0x20
 #define		BPF_IND		0x40
 #define		BPF_MEM		0x60
 #define		BPF_LEN		0x80
 #define		BPF_MSH		0xa0
 /*				0xc0	reserved; used by BSD/OS */
 /*				0xe0	reserved; used by BSD/OS */
 
 /* alu/jmp fields */
 #define BPF_OP(code)	((code) & 0xf0)
 #define		BPF_ADD		0x00
 #define		BPF_SUB		0x10
 #define		BPF_MUL		0x20
 #define		BPF_DIV		0x30
 #define		BPF_OR		0x40
 #define		BPF_AND		0x50
 #define		BPF_LSH		0x60
 #define		BPF_RSH		0x70
 #define		BPF_NEG		0x80
 #define		BPF_MOD		0x90
 #define		BPF_XOR		0xa0
 /*				0xb0	reserved */
 /*				0xc0	reserved */
 /*				0xd0	reserved */
 /*				0xe0	reserved */
 /*				0xf0	reserved */
 
 #define		BPF_JA		0x00
 #define		BPF_JEQ		0x10
 #define		BPF_JGT		0x20
 #define		BPF_JGE		0x30
 #define		BPF_JSET	0x40
 /*				0x50	reserved; used on BSD/OS */
 /*				0x60	reserved */
 /*				0x70	reserved */
 /*				0x80	reserved */
 /*				0x90	reserved */
 /*				0xa0	reserved */
 /*				0xb0	reserved */
 /*				0xc0	reserved */
 /*				0xd0	reserved */
 /*				0xe0	reserved */
 /*				0xf0	reserved */
 #define BPF_SRC(code)	((code) & 0x08)
 #define		BPF_K		0x00
 #define		BPF_X		0x08
 
 /* ret - BPF_K and BPF_X also apply */
 #define BPF_RVAL(code)	((code) & 0x18)
 #define		BPF_A		0x10
 /*				0x18	reserved */
 
 /* misc */
 #define BPF_MISCOP(code) ((code) & 0xf8)
 #define		BPF_TAX		0x00
 /*				0x08	reserved */
 /*				0x10	reserved */
 /*				0x18	reserved */
 /* #define	BPF_COP		0x20	NetBSD "coprocessor" extensions */
 /*				0x28	reserved */
 /*				0x30	reserved */
 /*				0x38	reserved */
 /* #define	BPF_COPX	0x40	NetBSD "coprocessor" extensions */
 /*					also used on BSD/OS */
 /*				0x48	reserved */
 /*				0x50	reserved */
 /*				0x58	reserved */
 /*				0x60	reserved */
 /*				0x68	reserved */
 /*				0x70	reserved */
 /*				0x78	reserved */
 #define		BPF_TXA		0x80
 /*				0x88	reserved */
 /*				0x90	reserved */
 /*				0x98	reserved */
 /*				0xa0	reserved */
 /*				0xa8	reserved */
 /*				0xb0	reserved */
 /*				0xb8	reserved */
 /*				0xc0	reserved; used on BSD/OS */
 /*				0xc8	reserved */
 /*				0xd0	reserved */
 /*				0xd8	reserved */
 /*				0xe0	reserved */
 /*				0xe8	reserved */
 /*				0xf0	reserved */
 /*				0xf8	reserved */
 
 /*
  * The instruction data structure.
  */
 struct bpf_insn {
 	u_short		code;
 	u_char		jt;
 	u_char		jf;
 	bpf_u_int32	k;
 };
 
 /*
  * Macros for insn array initializers.
  */
 #define BPF_STMT(code, k) { (u_short)(code), 0, 0, k }
 #define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k }
 
 /*
  * Structure to retrieve available DLTs for the interface.
  */
 struct bpf_dltlist {
 	u_int	bfl_len;	/* number of bfd_list array */
 	u_int	*bfl_list;	/* array of DLTs */
 };
 
 #ifdef _KERNEL
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_BPF);
 #endif
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_bpf);
 #endif
 
 /*
  * Rotate the packet buffers in descriptor d.  Move the store buffer into the
  * hold slot, and the free buffer into the store slot.  Zero the length of the
  * new store buffer.  Descriptor lock should be held.  One must be careful to
  * not rotate the buffers twice, i.e. if fbuf != NULL.
  */
 #define	ROTATE_BUFFERS(d)	do {					\
 	(d)->bd_hbuf = (d)->bd_sbuf;					\
 	(d)->bd_hlen = (d)->bd_slen;					\
 	(d)->bd_sbuf = (d)->bd_fbuf;					\
 	(d)->bd_slen = 0;						\
 	(d)->bd_fbuf = NULL;						\
 	bpf_bufheld(d);							\
 } while (0)
 
 /*
  * Descriptor associated with each attached hardware interface.
  * Part of this structure is exposed to external callers to speed up
  * bpf_peers_present() calls.
  */
 struct bpf_if;
 CK_LIST_HEAD(bpfd_list, bpf_d);
 
 struct bpf_if_ext {
 	CK_LIST_ENTRY(bpf_if)	bif_next;	/* list of all interfaces */
 	struct bpfd_list	bif_dlist;	/* descriptor list */
 };
 
 void	 bpf_bufheld(struct bpf_d *d);
 int	 bpf_validate(const struct bpf_insn *, int);
 void	 bpf_tap(struct bpf_if *, u_char *, u_int);
 void	 bpf_tap_if(struct ifnet *, u_char *, u_int);
 void	 bpf_mtap(struct bpf_if *, struct mbuf *);
 void	 bpf_mtap_if(struct ifnet *, struct mbuf *);
 void	 bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
 void	 bpf_mtap2_if(struct ifnet *, void *, u_int, struct mbuf *);
 void	 bpfattach(struct ifnet *, u_int, u_int);
 void	 bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
 void	 bpfdetach(struct ifnet *);
+bool	 bpf_peers_present_if(struct ifnet *);
 #ifdef VIMAGE
 int	 bpf_get_bp_params(struct bpf_if *, u_int *, u_int *);
 #endif
 
 void	 bpfilterattach(int);
 u_int	 bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
 
 static __inline int
 bpf_peers_present(struct bpf_if *bpf)
 {
 	struct bpf_if_ext *ext;
 
 	ext = (struct bpf_if_ext *)bpf;
 	if (!CK_LIST_EMPTY(&ext->bif_dlist))
 		return (1);
 	return (0);
 }
 
 #define	BPF_TAP(_ifp,_pkt,_pktlen)				\
 		bpf_tap_if((_ifp), (_pkt), (_pktlen))
 #define	BPF_MTAP(_ifp,_m) 					\
 	bpf_mtap_if((_ifp), (_m))
 #define	BPF_MTAP2(_ifp,_data,_dlen,_m) 				\
 	bpf_mtap2_if((_ifp), (_data), (_dlen), (_m))
 #endif
 
 /*
  * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
  */
 #define BPF_MEMWORDS 16
 
 /* BPF attach/detach events */
 typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
     int /* 1 =>'s attach */);
 EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
 
 #endif /* _NET_BPF_H_ */