Index: head/sys/dev/virtio/network/if_vtnet.c
===================================================================
--- head/sys/dev/virtio/network/if_vtnet.c	(revision 284347)
+++ head/sys/dev/virtio/network/if_vtnet.c	(revision 284348)
@@ -1,3958 +1,3962 @@
 /*-
  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* Driver for VirtIO network devices. */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/random.h>
 #include <sys/sglist.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/taskqueue.h>
 #include <sys/smp.h>
 #include <machine/smp.h>
 
 #include <vm/uma.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 
 #include <net/bpf.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <netinet/sctp.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/virtio/virtio.h>
 #include <dev/virtio/virtqueue.h>
 #include <dev/virtio/network/virtio_net.h>
 #include <dev/virtio/network/if_vtnetvar.h>
 
 #include "virtio_if.h"
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 static int	vtnet_modevent(module_t, int, void *);
 
 static int	vtnet_probe(device_t);
 static int	vtnet_attach(device_t);
 static int	vtnet_detach(device_t);
 static int	vtnet_suspend(device_t);
 static int	vtnet_resume(device_t);
 static int	vtnet_shutdown(device_t);
 static int	vtnet_attach_completed(device_t);
 static int	vtnet_config_change(device_t);
 
 static void	vtnet_negotiate_features(struct vtnet_softc *);
 static void	vtnet_setup_features(struct vtnet_softc *);
 static int	vtnet_init_rxq(struct vtnet_softc *, int);
 static int	vtnet_init_txq(struct vtnet_softc *, int);
 static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
 static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
 static void	vtnet_free_rx_filters(struct vtnet_softc *);
 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
 static int	vtnet_setup_interface(struct vtnet_softc *);
 static int	vtnet_change_mtu(struct vtnet_softc *, int);
 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
 static uint64_t	vtnet_get_counter(struct ifnet *, ift_counter);
 
 static int	vtnet_rxq_populate(struct vtnet_rxq *);
 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
 static struct mbuf *
 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
 static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
 		    struct mbuf *, int);
 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
 		     struct virtio_net_hdr *);
 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
 static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
 static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
 static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
 		    struct virtio_net_hdr *);
 static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_intr(void *);
 static void	vtnet_rxq_tq_intr(void *, int);
 
 static int	vtnet_txq_below_threshold(struct vtnet_txq *);
 static int	vtnet_txq_notify(struct vtnet_txq *);
 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
 		    int *, int *, int *);
 static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
 		    int, struct virtio_net_hdr *);
 static struct mbuf *
 		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
 		    struct virtio_net_hdr *);
 static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
 		    struct vtnet_tx_header *);
 static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
 #ifdef VTNET_LEGACY_TX
 static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
 static void	vtnet_start(struct ifnet *);
 #else
 static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
 static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
 static void	vtnet_txq_tq_deferred(void *, int);
 #endif
 static void	vtnet_txq_start(struct vtnet_txq *);
 static void	vtnet_txq_tq_intr(void *, int);
 static int	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
 static void	vtnet_tx_start_all(struct vtnet_softc *);
 
 #ifndef VTNET_LEGACY_TX
 static void	vtnet_qflush(struct ifnet *);
 #endif
 
 static int	vtnet_watchdog(struct vtnet_txq *);
 static void	vtnet_accum_stats(struct vtnet_softc *,
 		    struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
 static void	vtnet_tick(void *);
 
 static void	vtnet_start_taskqueues(struct vtnet_softc *);
 static void	vtnet_free_taskqueues(struct vtnet_softc *);
 static void	vtnet_drain_taskqueues(struct vtnet_softc *);
 
 static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_stop_rendezvous(struct vtnet_softc *);
 static void	vtnet_stop(struct vtnet_softc *);
 static int	vtnet_virtio_reinit(struct vtnet_softc *);
 static void	vtnet_init_rx_filters(struct vtnet_softc *);
 static int	vtnet_init_rx_queues(struct vtnet_softc *);
 static int	vtnet_init_tx_queues(struct vtnet_softc *);
 static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
 static int	vtnet_reinit(struct vtnet_softc *);
 static void	vtnet_init_locked(struct vtnet_softc *);
 static void	vtnet_init(void *);
 
 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
 		    struct sglist *, int, int);
 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
 static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
 static int	vtnet_set_promisc(struct vtnet_softc *, int);
 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
 static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
 static void	vtnet_rx_filter(struct vtnet_softc *);
 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
 
 static int	vtnet_is_link_up(struct vtnet_softc *);
 static void	vtnet_update_link_status(struct vtnet_softc *);
 static int	vtnet_ifmedia_upd(struct ifnet *);
 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 static void	vtnet_get_hwaddr(struct vtnet_softc *);
 static void	vtnet_set_hwaddr(struct vtnet_softc *);
 static void	vtnet_vlan_tag_remove(struct mbuf *);
 static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
 static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
 
 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_rxq *);
 static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_txq *);
 static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
 static void	vtnet_setup_sysctl(struct vtnet_softc *);
 
 static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
 static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
 static int	vtnet_txq_enable_intr(struct vtnet_txq *);
 static void	vtnet_txq_disable_intr(struct vtnet_txq *);
 static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
 static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
 static void	vtnet_enable_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_interrupts(struct vtnet_softc *);
 
 static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
 
 /* Tunables. */
 static int vtnet_csum_disable = 0;
 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
 static int vtnet_tso_disable = 0;
 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
 static int vtnet_lro_disable = 0;
 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
 static int vtnet_mq_disable = 0;
 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
 static int vtnet_mq_max_pairs = 0;
 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
 static int vtnet_rx_process_limit = 512;
 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
 static uma_zone_t vtnet_tx_header_zone;
 
 static struct virtio_feature_desc vtnet_feature_desc[] = {
 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
 	{ VIRTIO_NET_F_STATUS,		"Status"	},
 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
 	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
 
 	{ 0, NULL }
 };
 
 static device_method_t vtnet_methods[] = {
 	/* Device methods. */
 	DEVMETHOD(device_probe,			vtnet_probe),
 	DEVMETHOD(device_attach,		vtnet_attach),
 	DEVMETHOD(device_detach,		vtnet_detach),
 	DEVMETHOD(device_suspend,		vtnet_suspend),
 	DEVMETHOD(device_resume,		vtnet_resume),
 	DEVMETHOD(device_shutdown,		vtnet_shutdown),
 
 	/* VirtIO methods. */
 	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
 	DEVMETHOD(virtio_config_change,		vtnet_config_change),
 
 	DEVMETHOD_END
 };
 
 #ifdef DEV_NETMAP
 #include <dev/netmap/if_vtnet_netmap.h>
 #endif /* DEV_NETMAP */
 
 static driver_t vtnet_driver = {
 	"vtnet",
 	vtnet_methods,
 	sizeof(struct vtnet_softc)
 };
 static devclass_t vtnet_devclass;
 
 DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass,
     vtnet_modevent, 0);
 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
     vtnet_modevent, 0);
 MODULE_VERSION(vtnet, 1);
 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
 
 static int
 vtnet_modevent(module_t mod, int type, void *unused)
 {
 	int error;
 
 	error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
 		    sizeof(struct vtnet_tx_header),
 		    NULL, NULL, NULL, NULL, 0, 0);
 		break;
 	case MOD_QUIESCE:
 	case MOD_UNLOAD:
 		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
 			error = EBUSY;
 		else if (type == MOD_UNLOAD) {
 			uma_zdestroy(vtnet_tx_header_zone);
 			vtnet_tx_header_zone = NULL;
 		}
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 vtnet_probe(device_t dev)
 {
 
 	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
 		return (ENXIO);
 
 	device_set_desc(dev, "VirtIO Networking Adapter");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 vtnet_attach(device_t dev)
 {
 	struct vtnet_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->vtnet_dev = dev;
 
 	/* Register our feature descriptions. */
 	virtio_set_feature_desc(dev, vtnet_feature_desc);
 
 	VTNET_CORE_LOCK_INIT(sc);
 	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
 
 	vtnet_setup_sysctl(sc);
 	vtnet_setup_features(sc);
 
 	error = vtnet_alloc_rx_filters(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate Rx filters\n");
 		goto fail;
 	}
 
 	error = vtnet_alloc_rxtx_queues(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate queues\n");
 		goto fail;
 	}
 
 	error = vtnet_alloc_virtqueues(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate virtqueues\n");
 		goto fail;
 	}
 
 	error = vtnet_setup_interface(sc);
 	if (error) {
 		device_printf(dev, "cannot setup interface\n");
 		goto fail;
 	}
 
 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
 	if (error) {
 		device_printf(dev, "cannot setup virtqueue interrupts\n");
 		/* BMV: This will crash if during boot! */
 		ether_ifdetach(sc->vtnet_ifp);
 		goto fail;
 	}
 
 #ifdef DEV_NETMAP
 	vtnet_netmap_attach(sc);
 #endif /* DEV_NETMAP */
 
 	vtnet_start_taskqueues(sc);
 
 fail:
 	if (error)
 		vtnet_detach(dev);
 
 	return (error);
 }
 
 static int
 vtnet_detach(device_t dev)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
 	if (device_is_attached(dev)) {
 		VTNET_CORE_LOCK(sc);
 		vtnet_stop(sc);
 		VTNET_CORE_UNLOCK(sc);
 
 		callout_drain(&sc->vtnet_tick_ch);
 		vtnet_drain_taskqueues(sc);
 
 		ether_ifdetach(ifp);
 	}
 
 #ifdef DEV_NETMAP
 	netmap_detach(ifp);
 #endif /* DEV_NETMAP */
 
 	vtnet_free_taskqueues(sc);
 
 	if (sc->vtnet_vlan_attach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
 		sc->vtnet_vlan_attach = NULL;
 	}
 	if (sc->vtnet_vlan_detach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
 		sc->vtnet_vlan_detach = NULL;
 	}
 
 	ifmedia_removeall(&sc->vtnet_media);
 
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vtnet_ifp = NULL;
 	}
 
 	vtnet_free_rxtx_queues(sc);
 	vtnet_free_rx_filters(sc);
 
 	if (sc->vtnet_ctrl_vq != NULL)
 		vtnet_free_ctrl_vq(sc);
 
 	VTNET_CORE_LOCK_DESTROY(sc);
 
 	return (0);
 }
 
 static int
 vtnet_suspend(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_stop(sc);
 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_resume(device_t dev)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK(sc);
 	if (ifp->if_flags & IFF_UP)
 		vtnet_init_locked(sc);
 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_shutdown(device_t dev)
 {
 
 	/*
 	 * Suspend already does all of what we need to
 	 * do here; we just never expect to be resumed.
 	 */
 	return (vtnet_suspend(dev));
 }
 
 static int
 vtnet_attach_completed(device_t dev)
 {
 
 	vtnet_attach_disable_promisc(device_get_softc(dev));
 
 	return (0);
 }
 
 static int
 vtnet_config_change(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_update_link_status(sc);
 	if (sc->vtnet_link_active != 0)
 		vtnet_tx_start_all(sc);
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static void
 vtnet_negotiate_features(struct vtnet_softc *sc)
 {
 	device_t dev;
 	uint64_t mask, features;
 
 	dev = sc->vtnet_dev;
 	mask = 0;
 
 	/*
 	 * TSO and LRO are only available when their corresponding checksum
 	 * offload feature is also negotiated.
 	 */
 	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
 		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
 	}
 	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
 		mask |= VTNET_TSO_FEATURES;
 	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
 		mask |= VTNET_LRO_FEATURES;
 #ifndef VTNET_LEGACY_TX
 	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
 		mask |= VIRTIO_NET_F_MQ;
 #else
 	mask |= VIRTIO_NET_F_MQ;
 #endif
 
 	features = VTNET_FEATURES & ~mask;
 	sc->vtnet_features = virtio_negotiate_features(dev, features);
 
 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
 	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
 		/*
 		 * LRO without mergeable buffers requires special care. This
 		 * is not ideal because every receive buffer must be large
 		 * enough to hold the maximum TCP packet, the Ethernet header,
 		 * and the header. This requires up to 34 descriptors with
 		 * MCLBYTES clusters. If we do not have indirect descriptors,
 		 * LRO is disabled since the virtqueue will not contain very
 		 * many receive buffers.
 		 */
 		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
 			device_printf(dev,
 			    "LRO disabled due to both mergeable buffers and "
 			    "indirect descriptors not negotiated\n");
 
 			features &= ~VTNET_LRO_FEATURES;
 			sc->vtnet_features =
 			    virtio_negotiate_features(dev, features);
 		} else
 			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
 	}
 }
 
 static void
 vtnet_setup_features(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int max_pairs, max;
 
 	dev = sc->vtnet_dev;
 
 	vtnet_negotiate_features(sc);
 
 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
 		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
 	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
 		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
 		/* This feature should always be negotiated. */
 		sc->vtnet_flags |= VTNET_FLAG_MAC;
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	} else
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
 
 	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
 		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
 		sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
 	else
 		sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
 		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
 	else
 		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
 	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
 		max_pairs = virtio_read_dev_config_2(dev,
 		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
 		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
 		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
 			max_pairs = 1;
 	} else
 		max_pairs = 1;
 
 	if (max_pairs > 1) {
 		/*
 		 * Limit the maximum number of queue pairs to the number of
 		 * CPUs or the configured maximum. The actual number of
 		 * queues that get used may be less.
 		 */
 		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
 		if (max > 0 && max_pairs > max)
 			max_pairs = max;
 		if (max_pairs > mp_ncpus)
 			max_pairs = mp_ncpus;
 		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
 			max_pairs = VTNET_MAX_QUEUE_PAIRS;
 		if (max_pairs > 1)
 			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
 	}
 
 	sc->vtnet_max_vq_pairs = max_pairs;
 }
 
 static int
 vtnet_init_rxq(struct vtnet_softc *sc, int id)
 {
 	struct vtnet_rxq *rxq;
 
 	rxq = &sc->vtnet_rxqs[id];
 
 	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
 	    device_get_nameunit(sc->vtnet_dev), id);
 	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
 
 	rxq->vtnrx_sc = sc;
 	rxq->vtnrx_id = id;
 
 	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
 	if (rxq->vtnrx_sg == NULL)
 		return (ENOMEM);
 
 	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
 	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
 	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
 
 	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
 }
 
 static int
 vtnet_init_txq(struct vtnet_softc *sc, int id)
 {
 	struct vtnet_txq *txq;
 
 	txq = &sc->vtnet_txqs[id];
 
 	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
 	    device_get_nameunit(sc->vtnet_dev), id);
 	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
 
 	txq->vtntx_sc = sc;
 	txq->vtntx_id = id;
 
 	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
 	if (txq->vtntx_sg == NULL)
 		return (ENOMEM);
 
 #ifndef VTNET_LEGACY_TX
 	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
 	    M_NOWAIT, &txq->vtntx_mtx);
 	if (txq->vtntx_br == NULL)
 		return (ENOMEM);
 
 	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
 #endif
 	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
 	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
 	    taskqueue_thread_enqueue, &txq->vtntx_tq);
 	if (txq->vtntx_tq == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 static int
 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
 {
 	int i, npairs, error;
 
 	npairs = sc->vtnet_max_vq_pairs;
 
 	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
 	    M_NOWAIT | M_ZERO);
 	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
 	    M_NOWAIT | M_ZERO);
 	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < npairs; i++) {
 		error = vtnet_init_rxq(sc, i);
 		if (error)
 			return (error);
 		error = vtnet_init_txq(sc, i);
 		if (error)
 			return (error);
 	}
 
 	vtnet_setup_queue_sysctl(sc);
 
 	return (0);
 }
 
 static void
 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
 {
 
 	rxq->vtnrx_sc = NULL;
 	rxq->vtnrx_id = -1;
 
 	if (rxq->vtnrx_sg != NULL) {
 		sglist_free(rxq->vtnrx_sg);
 		rxq->vtnrx_sg = NULL;
 	}
 
 	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
 		mtx_destroy(&rxq->vtnrx_mtx);
 }
 
 static void
 vtnet_destroy_txq(struct vtnet_txq *txq)
 {
 
 	txq->vtntx_sc = NULL;
 	txq->vtntx_id = -1;
 
 	if (txq->vtntx_sg != NULL) {
 		sglist_free(txq->vtntx_sg);
 		txq->vtntx_sg = NULL;
 	}
 
 #ifndef VTNET_LEGACY_TX
 	if (txq->vtntx_br != NULL) {
 		buf_ring_free(txq->vtntx_br, M_DEVBUF);
 		txq->vtntx_br = NULL;
 	}
 #endif
 
 	if (mtx_initialized(&txq->vtntx_mtx) != 0)
 		mtx_destroy(&txq->vtntx_mtx);
 }
 
 static void
 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
 {
 	int i;
 
 	if (sc->vtnet_rxqs != NULL) {
 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
 		free(sc->vtnet_rxqs, M_DEVBUF);
 		sc->vtnet_rxqs = NULL;
 	}
 
 	if (sc->vtnet_txqs != NULL) {
 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
 		free(sc->vtnet_txqs, M_DEVBUF);
 		sc->vtnet_txqs = NULL;
 	}
 }
 
 static int
 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
 {
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
 		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
 		    M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (sc->vtnet_mac_filter == NULL)
 			return (ENOMEM);
 	}
 
 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
 		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
 		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (sc->vtnet_vlan_filter == NULL)
 			return (ENOMEM);
 	}
 
 	return (0);
 }
 
 static void
 vtnet_free_rx_filters(struct vtnet_softc *sc)
 {
 
 	if (sc->vtnet_mac_filter != NULL) {
 		free(sc->vtnet_mac_filter, M_DEVBUF);
 		sc->vtnet_mac_filter = NULL;
 	}
 
 	if (sc->vtnet_vlan_filter != NULL) {
 		free(sc->vtnet_vlan_filter, M_DEVBUF);
 		sc->vtnet_vlan_filter = NULL;
 	}
 }
 
 static int
 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vq_alloc_info *info;
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i, idx, flags, nvqs, error;
 
 	dev = sc->vtnet_dev;
 	flags = 0;
 
 	nvqs = sc->vtnet_max_vq_pairs * 2;
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 		nvqs++;
 
 	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
 	if (info == NULL)
 		return (ENOMEM);
 
 	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
 		rxq = &sc->vtnet_rxqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
 		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
 		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
 
 		txq = &sc->vtnet_txqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
 		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
 		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
 	}
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
 		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
 	}
 
 	/*
 	 * Enable interrupt binding if this is multiqueue. This only matters
 	 * when per-vq MSIX is available.
 	 */
 	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
 		flags |= 0;
 
 	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
 	free(info, M_TEMP);
 
 	return (error);
 }
 
 static int
 vtnet_setup_interface(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 
 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "cannot allocate ifnet structure\n");
 		return (ENOSPC);
 	}
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_baudrate = IF_Gbps(10);	/* Approx. */
 	ifp->if_softc = sc;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_init = vtnet_init;
 	ifp->if_ioctl = vtnet_ioctl;
 	ifp->if_get_counter = vtnet_get_counter;
 #ifndef VTNET_LEGACY_TX
 	ifp->if_transmit = vtnet_txq_mq_start;
 	ifp->if_qflush = vtnet_qflush;
 #else
 	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
 	ifp->if_start = vtnet_start;
 	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
 	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 #endif
 
 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
 	    vtnet_ifmedia_sts);
 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
 
 	/* Read (or generate) the MAC address for the adapter. */
 	vtnet_get_hwaddr(sc);
 
 	ether_ifattach(ifp, sc->vtnet_hwaddr);
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
 		ifp->if_capabilities |= IFCAP_LINKSTATE;
 
 	/* Tell the upper layer(s) we support long frames. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
 		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
 			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
 		} else {
 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
 				ifp->if_capabilities |= IFCAP_TSO4;
 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
 				ifp->if_capabilities |= IFCAP_TSO6;
 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
 				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
 		}
 
 		if (ifp->if_capabilities & IFCAP_TSO)
 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
 		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
 			ifp->if_capabilities |= IFCAP_LRO;
 	}
 
 	if (ifp->if_capabilities & IFCAP_HWCSUM) {
 		/*
 		 * VirtIO does not support VLAN tagging, but we can fake
 		 * it by inserting and removing the 802.1Q header during
 		 * transmit and receive. We are then able to do checksum
 		 * offloading of VLAN frames.
 		 */
 		ifp->if_capabilities |=
 		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
 	}
 
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/*
 	 * Capabilities after here are not enabled by default.
 	 */
 
 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
 
 	vtnet_set_rx_process_limit(sc);
 	vtnet_set_tx_intr_threshold(sc);
 
 	return (0);
 }
 
 static int
 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
 {
 	struct ifnet *ifp;
 	int frame_size, clsize;
 
 	ifp = sc->vtnet_ifp;
 
 	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
 		return (EINVAL);
 
 	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
 	    new_mtu;
 
 	/*
 	 * Based on the new MTU (and hence frame size) determine which
 	 * cluster size is most appropriate for the receive queues.
 	 */
 	if (frame_size <= MCLBYTES) {
 		clsize = MCLBYTES;
 	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
 		/* Avoid going past 9K jumbos. */
 		if (frame_size > MJUM9BYTES)
 			return (EINVAL);
 		clsize = MJUM9BYTES;
 	} else
 		clsize = MJUMPAGESIZE;
 
 	ifp->if_mtu = new_mtu;
 	sc->vtnet_rx_new_clsize = clsize;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vtnet_init_locked(sc);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct vtnet_softc *sc;
 	struct ifreq *ifr;
 	int reinit, mask, error;
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifp->if_mtu != ifr->ifr_mtu) {
 			VTNET_CORE_LOCK(sc);
 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
 			VTNET_CORE_UNLOCK(sc);
 		}
 		break;
 
 	case SIOCSIFFLAGS:
 		VTNET_CORE_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) == 0) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				vtnet_stop(sc);
 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
 			    (IFF_PROMISC | IFF_ALLMULTI)) {
 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
 					vtnet_rx_filter(sc);
-				else
-					error = ENOTSUP;
+				else {
+					ifp->if_flags |= IFF_PROMISC;
+					if ((ifp->if_flags ^ sc->vtnet_if_flags)
+					    & IFF_ALLMULTI)
+						error = ENOTSUP;
+				}
 			}
 		} else
 			vtnet_init_locked(sc);
 
 		if (error == 0)
 			sc->vtnet_if_flags = ifp->if_flags;
 		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
 			break;
 		VTNET_CORE_LOCK(sc);
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			vtnet_rx_filter_mac(sc);
 		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
 		VTNET_CORE_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
 		if (mask & IFCAP_TXCSUM)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 		if (mask & IFCAP_TXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
 		if (mask & IFCAP_TSO6)
 			ifp->if_capenable ^= IFCAP_TSO6;
 
 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
 		    IFCAP_VLAN_HWFILTER)) {
 			/* These Rx features require us to renegotiate. */
 			reinit = 1;
 
 			if (mask & IFCAP_RXCSUM)
 				ifp->if_capenable ^= IFCAP_RXCSUM;
 			if (mask & IFCAP_RXCSUM_IPV6)
 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 			if (mask & IFCAP_LRO)
 				ifp->if_capenable ^= IFCAP_LRO;
 			if (mask & IFCAP_VLAN_HWFILTER)
 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 		} else
 			reinit = 0;
 
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 
 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			vtnet_init_locked(sc);
 		}
 
 		VTNET_CORE_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_populate(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	int nbufs, error;
 
 	vq = rxq->vtnrx_vq;
 	error = ENOSPC;
 
 	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
 		error = vtnet_rxq_new_buf(rxq);
 		if (error)
 			break;
 	}
 
 	if (nbufs > 0) {
 		virtqueue_notify(vq);
 		/*
 		 * EMSGSIZE signifies the virtqueue did not have enough
 		 * entries available to hold the last mbuf. This is not
 		 * an error.
 		 */
 		if (error == EMSGSIZE)
 			error = 0;
 	}
 
 	return (error);
 }
 
 static void
 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	struct mbuf *m;
 	int last;
 
 	vq = rxq->vtnrx_vq;
 	last = 0;
 
 	while ((m = virtqueue_drain(vq, &last)) != NULL)
 		m_freem(m);
 
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
 }
 
 static struct mbuf *
 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 {
 	struct mbuf *m_head, *m_tail, *m;
 	int i, clsize;
 
 	clsize = sc->vtnet_rx_clsize;
 
 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
 	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
 
 	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
 	if (m_head == NULL)
 		goto fail;
 
 	m_head->m_len = clsize;
 	m_tail = m_head;
 
 	/* Allocate the rest of the chain. */
 	for (i = 1; i < nbufs; i++) {
 		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
 		if (m == NULL)
 			goto fail;
 
 		m->m_len = clsize;
 		m_tail->m_next = m;
 		m_tail = m;
 	}
 
 	if (m_tailp != NULL)
 		*m_tailp = m_tail;
 
 	return (m_head);
 
 fail:
 	sc->vtnet_stats.mbuf_alloc_failed++;
 	m_freem(m_head);
 
 	return (NULL);
 }
 
 /*
  * Slow path for when LRO without mergeable buffers is negotiated.
  */
 static int
 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
     int len0)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m, *m_prev;
 	struct mbuf *m_new, *m_tail;
 	int len, clsize, nreplace, error;
 
 	sc = rxq->vtnrx_sc;
 	clsize = sc->vtnet_rx_clsize;
 
 	m_prev = NULL;
 	m_tail = NULL;
 	nreplace = 0;
 
 	m = m0;
 	len = len0;
 
 	/*
 	 * Since these mbuf chains are so large, we avoid allocating an
 	 * entire replacement chain if possible. When the received frame
 	 * did not consume the entire chain, the unused mbufs are moved
 	 * to the replacement chain.
 	 */
 	while (len > 0) {
 		/*
 		 * Something is seriously wrong if we received a frame
 		 * larger than the chain. Drop it.
 		 */
 		if (m == NULL) {
 			sc->vtnet_stats.rx_frame_too_large++;
 			return (EMSGSIZE);
 		}
 
 		/* We always allocate the same cluster size. */
 		KASSERT(m->m_len == clsize,
 		    ("%s: mbuf size %d is not the cluster size %d",
 		    __func__, m->m_len, clsize));
 
 		m->m_len = MIN(m->m_len, len);
 		len -= m->m_len;
 
 		m_prev = m;
 		m = m->m_next;
 		nreplace++;
 	}
 
 	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
 	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
 	    sc->vtnet_rx_nmbufs));
 
 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
 	if (m_new == NULL) {
 		m_prev->m_len = clsize;
 		return (ENOBUFS);
 	}
 
 	/*
 	 * Move any unused mbufs from the received chain onto the end
 	 * of the new chain.
 	 */
 	if (m_prev->m_next != NULL) {
 		m_tail->m_next = m_prev->m_next;
 		m_prev->m_next = NULL;
 	}
 
 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
 	if (error) {
 		/*
 		 * BAD! We could not enqueue the replacement mbuf chain. We
 		 * must restore the m0 chain to the original state if it was
 		 * modified so we can subsequently discard it.
 		 *
 		 * NOTE: The replacement is suppose to be an identical copy
 		 * to the one just dequeued so this is an unexpected error.
 		 */
 		sc->vtnet_stats.rx_enq_replacement_failed++;
 
 		if (m_tail->m_next != NULL) {
 			m_prev->m_next = m_tail->m_next;
 			m_tail->m_next = NULL;
 		}
 
 		m_prev->m_len = clsize;
 		m_freem(m_new);
 	}
 
 	return (error);
 }
 
 static int
 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m_new;
 	int error;
 
 	sc = rxq->vtnrx_sc;
 
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
 
 	if (m->m_next == NULL) {
 		/* Fast-path for the common case of just one mbuf. */
 		if (m->m_len < len)
 			return (EINVAL);
 
 		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
 		if (m_new == NULL)
 			return (ENOBUFS);
 
 		error = vtnet_rxq_enqueue_buf(rxq, m_new);
 		if (error) {
 			/*
 			 * The new mbuf is suppose to be an identical
 			 * copy of the one just dequeued so this is an
 			 * unexpected error.
 			 */
 			m_freem(m_new);
 			sc->vtnet_stats.rx_enq_replacement_failed++;
 		} else
 			m->m_len = len;
 	} else
 		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct sglist *sg;
 	struct vtnet_rx_header *rxhdr;
 	uint8_t *mdata;
 	int offset, error;
 
 	sc = rxq->vtnrx_sc;
 	sg = rxq->vtnrx_sg;
 	mdata = mtod(m, uint8_t *);
 
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
 	KASSERT(m->m_len == sc->vtnet_rx_clsize,
 	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
 	     sc->vtnet_rx_clsize));
 
 	sglist_reset(sg);
 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
 		rxhdr = (struct vtnet_rx_header *) mdata;
 		sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
 		offset = sizeof(struct vtnet_rx_header);
 	} else
 		offset = 0;
 
 	sglist_append(sg, mdata + offset, m->m_len - offset);
 	if (m->m_next != NULL) {
 		error = sglist_append_mbuf(sg, m->m_next);
 		MPASS(error == 0);
 	}
 
 	error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m;
 	int error;
 
 	sc = rxq->vtnrx_sc;
 
 	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	error = vtnet_rxq_enqueue_buf(rxq, m);
 	if (error)
 		m_freem(m);
 
 	return (error);
 }
 
 /*
  * Use the checksum offset in the VirtIO header to set the
  * correct CSUM_* flags.
  */
 static int
 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 #if defined(INET) || defined(INET6)
 	int offset = hdr->csum_start + hdr->csum_offset;
 #endif
 
 	sc = rxq->vtnrx_sc;
 
 	/* Only do a basic sanity check on the offset. */
 	switch (eth_type) {
 #if defined(INET)
 	case ETHERTYPE_IP:
 		if (__predict_false(offset < ip_start + sizeof(struct ip)))
 			return (1);
 		break;
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
 			return (1);
 		break;
 #endif
 	default:
 		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
 	/*
 	 * Use the offset to determine the appropriate CSUM_* flags. This is
 	 * a bit dirty, but we can get by with it since the checksum offsets
 	 * happen to be different. We assume the host host does not do IPv4
 	 * header checksum offloading.
 	 */
 	switch (hdr->csum_offset) {
 	case offsetof(struct udphdr, uh_sum):
 	case offsetof(struct tcphdr, th_sum):
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	case offsetof(struct sctphdr, checksum):
 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 		break;
 	default:
 		sc->vtnet_stats.rx_csum_bad_offset++;
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	int offset, proto;
 
 	sc = rxq->vtnrx_sc;
 
 	switch (eth_type) {
 #if defined(INET)
 	case ETHERTYPE_IP: {
 		struct ip *ip;
 		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
 			return (1);
 		ip = (struct ip *)(m->m_data + ip_start);
 		proto = ip->ip_p;
 		offset = ip_start + (ip->ip_hl << 2);
 		break;
 	}
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		if (__predict_false(m->m_len < ip_start +
 		    sizeof(struct ip6_hdr)))
 			return (1);
 		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
 		if (__predict_false(offset < 0))
 			return (1);
 		break;
 #endif
 	default:
 		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
 	switch (proto) {
 	case IPPROTO_TCP:
 		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
 			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	case IPPROTO_UDP:
 		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
 			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	case IPPROTO_SCTP:
 		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
 			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 		break;
 	default:
 		/*
 		 * For the remaining protocols, FreeBSD does not support
 		 * checksum offloading, so the checksum will be recomputed.
 		 */
 #if 0
 		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
 		    "protocol eth_type=%#x proto=%d csum_start=%d "
 		    "csum_offset=%d\n", __func__, eth_type, proto,
 		    hdr->csum_start, hdr->csum_offset);
 #endif
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Set the appropriate CSUM_* flags. Unfortunately, the information
  * provided is not directly useful to us. The VirtIO header gives the
  * offset of the checksum, which is all Linux needs, but this is not
  * how FreeBSD does things. We are forced to peek inside the packet
  * a bit.
  *
  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
  * could accept the offsets and let the stack figure it out.
  */
 static int
 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct ether_header *eh;
 	struct ether_vlan_header *evh;
 	uint16_t eth_type;
 	int offset, error;
 
 	eh = mtod(m, struct ether_header *);
 	eth_type = ntohs(eh->ether_type);
 	if (eth_type == ETHERTYPE_VLAN) {
 		/* BMV: We should handle nested VLAN tags too. */
 		evh = mtod(m, struct ether_vlan_header *);
 		eth_type = ntohs(evh->evl_proto);
 		offset = sizeof(struct ether_vlan_header);
 	} else
 		offset = sizeof(struct ether_header);
 
 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
 	else
 		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
 
 	return (error);
 }
 
 static void
 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
 {
 	struct mbuf *m;
 
 	while (--nbufs > 0) {
 		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
 		if (m == NULL)
 			break;
 		vtnet_rxq_discard_buf(rxq, m);
 	}
 }
 
 static void
 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	int error;
 
 	/*
 	 * Requeue the discarded mbuf. This should always be successful
 	 * since it was just dequeued.
 	 */
 	error = vtnet_rxq_enqueue_buf(rxq, m);
 	KASSERT(error == 0,
 	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
 }
 
 static int
 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
 	struct mbuf *m, *m_tail;
 	int len;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
 	m_tail = m_head;
 
 	while (--nbufs > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL) {
 			rxq->vtnrx_stats.vrxs_ierrors++;
 			goto fail;
 		}
 
 		if (vtnet_rxq_new_buf(rxq) != 0) {
 			rxq->vtnrx_stats.vrxs_iqdrops++;
 			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			goto fail;
 		}
 
 		if (m->m_len < len)
 			len = m->m_len;
 
 		m->m_len = len;
 		m->m_flags &= ~M_PKTHDR;
 
 		m_head->m_pkthdr.len += len;
 		m_tail->m_next = m;
 		m_tail = m;
 	}
 
 	return (0);
 
 fail:
 	sc->vtnet_stats.rx_mergeable_failed++;
 	m_freem(m_head);
 
 	return (1);
 }
 
 static void
 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct ether_header *eh;
 
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
 		eh = mtod(m, struct ether_header *);
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			vtnet_vlan_tag_remove(m);
 			/*
 			 * With the 802.1Q header removed, update the
 			 * checksum starting location accordingly.
 			 */
 			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
 		}
 	}
 
 	m->m_pkthdr.flowid = rxq->vtnrx_id;
 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 
 	/*
 	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
 	 * distinction that Linux does. Need to reevaluate if performing
 	 * offloading for the NEEDS_CSUM case is really appropriate.
 	 */
 	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
 	    VIRTIO_NET_HDR_F_DATA_VALID)) {
 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
 			rxq->vtnrx_stats.vrxs_csum++;
 		else
 			rxq->vtnrx_stats.vrxs_csum_failed++;
 	}
 
 	rxq->vtnrx_stats.vrxs_ipackets++;
 	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
 
 	VTNET_RXQ_UNLOCK(rxq);
 	(*ifp->if_input)(ifp, m);
 	VTNET_RXQ_LOCK(rxq);
 }
 
 static int
 vtnet_rxq_eof(struct vtnet_rxq *rxq)
 {
 	struct virtio_net_hdr lhdr, *hdr;
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
 	struct mbuf *m;
 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
 	int len, deq, nbufs, adjsz, count;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
 	hdr = &lhdr;
 	deq = 0;
 	count = sc->vtnet_rx_process_limit;
 
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 
 #ifdef DEV_NETMAP
 	if (netmap_rx_irq(ifp, 0, &deq)) {
 		return (FALSE);
 	}
 #endif /* DEV_NETMAP */
 
 	while (count-- > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL)
 			break;
 		deq++;
 
 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
 			rxq->vtnrx_stats.vrxs_ierrors++;
 			vtnet_rxq_discard_buf(rxq, m);
 			continue;
 		}
 
 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
 			nbufs = 1;
 			adjsz = sizeof(struct vtnet_rx_header);
 			/*
 			 * Account for our pad inserted between the header
 			 * and the actual start of the frame.
 			 */
 			len += VTNET_RX_HEADER_PAD;
 		} else {
 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
 			nbufs = mhdr->num_buffers;
 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 		}
 
 		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
 			rxq->vtnrx_stats.vrxs_iqdrops++;
 			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			continue;
 		}
 
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.csum_flags = 0;
 
 		if (nbufs > 1) {
 			/* Dequeue the rest of chain. */
 			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
 				continue;
 		}
 
 		/*
 		 * Save copy of header before we strip it. For both mergeable
 		 * and non-mergeable, the header is at the beginning of the
 		 * mbuf data. We no longer need num_buffers, so always use a
 		 * regular header.
 		 *
 		 * BMV: Is this memcpy() expensive? We know the mbuf data is
 		 * still valid even after the m_adj().
 		 */
 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
 		m_adj(m, adjsz);
 
 		vtnet_rxq_input(rxq, m, hdr);
 
 		/* Must recheck after dropping the Rx lock. */
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 	}
 
 	if (deq > 0)
 		virtqueue_notify(vq);
 
 	return (count > 0 ? 0 : EAGAIN);
 }
 
 static void
 vtnet_rx_vq_intr(void *xrxq)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_rxq *rxq;
 	struct ifnet *ifp;
 	int tries, more;
 
 	rxq = xrxq;
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 	tries = 0;
 
 	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
 		 * Ignore this interrupt. Either this is a spurious interrupt
 		 * or multiqueue without per-VQ MSIX so every queue needs to
 		 * be polled (a brain dead configuration we could try harder
 		 * to avoid).
 		 */
 		vtnet_rxq_disable_intr(rxq);
 		return;
 	}
 
 	VTNET_RXQ_LOCK(rxq);
 
 again:
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
 
 	more = vtnet_rxq_eof(rxq);
 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 		if (!more)
 			vtnet_rxq_disable_intr(rxq);
 		/*
 		 * This is an occasional condition or race (when !more),
 		 * so retry a few times before scheduling the taskqueue.
 		 */
 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
 			goto again;
 
 		VTNET_RXQ_UNLOCK(rxq);
 		rxq->vtnrx_stats.vrxs_rescheduled++;
 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	} else
 		VTNET_RXQ_UNLOCK(rxq);
 }
 
 static void
 vtnet_rxq_tq_intr(void *xrxq, int pending)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_rxq *rxq;
 	struct ifnet *ifp;
 	int more;
 
 	rxq = xrxq;
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_RXQ_LOCK(rxq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
 
 	more = vtnet_rxq_eof(rxq);
 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 		if (!more)
 			vtnet_rxq_disable_intr(rxq);
 		rxq->vtnrx_stats.vrxs_rescheduled++;
 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	}
 
 	VTNET_RXQ_UNLOCK(rxq);
 }
 
 static int
 vtnet_txq_below_threshold(struct vtnet_txq *txq)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 
 	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
 }
 
 static int
 vtnet_txq_notify(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 
 	vq = txq->vtntx_vq;
 
 	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
 	virtqueue_notify(vq);
 
 	if (vtnet_txq_enable_intr(txq) == 0)
 		return (0);
 
 	/*
 	 * Drain frames that were completed since last checked. If this
 	 * causes the queue to go above the threshold, the caller should
 	 * continue transmitting.
 	 */
 	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
 		virtqueue_disable_intr(vq);
 		return (1);
 	}
 
 	return (0);
 }
 
 static void
 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	int last;
 
 	vq = txq->vtntx_vq;
 	last = 0;
 
 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
 		m_freem(txhdr->vth_mbuf);
 		uma_zfree(vtnet_tx_header_zone, txhdr);
 	}
 
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
 }
 
 /*
  * BMV: Much of this can go away once we finally have offsets in
  * the mbuf packet header. Bug andre@.
  */
 static int
 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
     int *etype, int *proto, int *start)
 {
 	struct vtnet_softc *sc;
 	struct ether_vlan_header *evh;
 	int offset;
 
 	sc = txq->vtntx_sc;
 
 	evh = mtod(m, struct ether_vlan_header *);
 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		/* BMV: We should handle nested VLAN tags too. */
 		*etype = ntohs(evh->evl_proto);
 		offset = sizeof(struct ether_vlan_header);
 	} else {
 		*etype = ntohs(evh->evl_encap_proto);
 		offset = sizeof(struct ether_header);
 	}
 
 	switch (*etype) {
 #if defined(INET)
 	case ETHERTYPE_IP: {
 		struct ip *ip, iphdr;
 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
 			m_copydata(m, offset, sizeof(struct ip),
 			    (caddr_t) &iphdr);
 			ip = &iphdr;
 		} else
 			ip = (struct ip *)(m->m_data + offset);
 		*proto = ip->ip_p;
 		*start = offset + (ip->ip_hl << 2);
 		break;
 	}
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		*proto = -1;
 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
 		/* Assert the network stack sent us a valid packet. */
 		KASSERT(*start > offset,
 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
 		    *start, offset, *proto));
 		break;
 #endif
 	default:
 		sc->vtnet_stats.tx_csum_bad_ethtype++;
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
     int offset, struct virtio_net_hdr *hdr)
 {
 	static struct timeval lastecn;
 	static int curecn;
 	struct vtnet_softc *sc;
 	struct tcphdr *tcp, tcphdr;
 
 	sc = txq->vtntx_sc;
 
 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
 		tcp = &tcphdr;
 	} else
 		tcp = (struct tcphdr *)(m->m_data + offset);
 
 	hdr->hdr_len = offset + (tcp->th_off << 2);
 	hdr->gso_size = m->m_pkthdr.tso_segsz;
 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
 	    VIRTIO_NET_HDR_GSO_TCPV6;
 
 	if (tcp->th_flags & TH_CWR) {
 		/*
 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
 		 * ECN support is not on a per-interface basis, but globally via
 		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
 		 */
 		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
 			if (ppsratecheck(&lastecn, &curecn, 1))
 				if_printf(sc->vtnet_ifp,
 				    "TSO with ECN not negotiated with host\n");
 			return (ENOTSUP);
 		}
 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 	}
 
 	txq->vtntx_stats.vtxs_tso++;
 
 	return (0);
 }
 
 static struct mbuf *
 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	int flags, etype, csum_start, proto, error;
 
 	sc = txq->vtntx_sc;
 	flags = m->m_pkthdr.csum_flags;
 
 	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
 	if (error)
 		goto drop;
 
 	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
 	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
 		/*
 		 * We could compare the IP protocol vs the CSUM_ flag too,
 		 * but that really should not be necessary.
 		 */
 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
 		hdr->csum_start = csum_start;
 		hdr->csum_offset = m->m_pkthdr.csum_data;
 		txq->vtntx_stats.vtxs_csum++;
 	}
 
 	if (flags & CSUM_TSO) {
 		if (__predict_false(proto != IPPROTO_TCP)) {
 			/* Likely failed to correctly parse the mbuf. */
 			sc->vtnet_stats.tx_tso_not_tcp++;
 			goto drop;
 		}
 
 		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
 		    ("%s: mbuf %p TSO without checksum offload %#x",
 		    __func__, m, flags));
 
 		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
 		if (error)
 			goto drop;
 	}
 
 	return (m);
 
 drop:
 	m_freem(m);
 	return (NULL);
 }
 
 static int
 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
     struct vtnet_tx_header *txhdr)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct sglist *sg;
 	struct mbuf *m;
 	int error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	sg = txq->vtntx_sg;
 	m = *m_head;
 
 	sglist_reset(sg);
 	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
 	KASSERT(error == 0 && sg->sg_nseg == 1,
 	    ("%s: error %d adding header to sglist", __func__, error));
 
 	error = sglist_append_mbuf(sg, m);
 	if (error) {
 		m = m_defrag(m, M_NOWAIT);
 		if (m == NULL)
 			goto fail;
 
 		*m_head = m;
 		sc->vtnet_stats.tx_defragged++;
 
 		error = sglist_append_mbuf(sg, m);
 		if (error)
 			goto fail;
 	}
 
 	txhdr->vth_mbuf = m;
 	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
 
 	return (error);
 
 fail:
 	sc->vtnet_stats.tx_defrag_failed++;
 	m_freem(*m_head);
 	*m_head = NULL;
 
 	return (ENOBUFS);
 }
 
 static int
 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
 {
 	struct vtnet_tx_header *txhdr;
 	struct virtio_net_hdr *hdr;
 	struct mbuf *m;
 	int error;
 
 	m = *m_head;
 	M_ASSERTPKTHDR(m);
 
 	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
 	if (txhdr == NULL) {
 		m_freem(m);
 		*m_head = NULL;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Always use the non-mergeable header, regardless if the feature
 	 * was negotiated. For transmit, num_buffers is always zero. The
 	 * vtnet_hdr_size is used to enqueue the correct header size.
 	 */
 	hdr = &txhdr->vth_uhdr.hdr;
 
 	if (m->m_flags & M_VLANTAG) {
 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 		m->m_flags &= ~M_VLANTAG;
 	}
 
 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
 		m = vtnet_txq_offload(txq, m, hdr);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
 	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
 	if (error == 0)
 		return (0);
 
 fail:
 	uma_zfree(vtnet_tx_header_zone, txhdr);
 
 	return (error);
 }
 
 #ifdef VTNET_LEGACY_TX
 
 static void
 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
 	int tries, enq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	tries = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->vtnet_link_active == 0)
 		return;
 
 	vtnet_txq_eof(txq);
 
 again:
 	enq = 0;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (virtqueue_full(vq))
 			break;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
 		if (m0 == NULL)
 			break;
 
 		if (vtnet_txq_encap(txq, &m0) != 0) {
 			if (m0 != NULL)
 				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
 			break;
 		}
 
 		enq++;
 		ETHER_BPF_MTAP(ifp, m0);
 	}
 
 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 		if (tries++ < VTNET_NOTIFY_RETRIES)
 			goto again;
 
 		txq->vtntx_stats.vtxs_rescheduled++;
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 }
 
 static void
 vtnet_start(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 
 	sc = ifp->if_softc;
 	txq = &sc->vtnet_txqs[0];
 
 	VTNET_TXQ_LOCK(txq);
 	vtnet_start_locked(txq, ifp);
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 #else /* !VTNET_LEGACY_TX */
 
 static int
 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct buf_ring *br;
 	struct ifnet *ifp;
 	int enq, tries, error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	br = txq->vtntx_br;
 	ifp = sc->vtnet_ifp;
 	tries = 0;
 	error = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->vtnet_link_active == 0) {
 		if (m != NULL)
 			error = drbr_enqueue(ifp, br, m);
 		return (error);
 	}
 
 	if (m != NULL) {
 		error = drbr_enqueue(ifp, br, m);
 		if (error)
 			return (error);
 	}
 
 	vtnet_txq_eof(txq);
 
 again:
 	enq = 0;
 
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (virtqueue_full(vq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
 		if (vtnet_txq_encap(txq, &m) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
 				drbr_advance(ifp, br);
 			break;
 		}
 		drbr_advance(ifp, br);
 
 		enq++;
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 		if (tries++ < VTNET_NOTIFY_RETRIES)
 			goto again;
 
 		txq->vtntx_stats.vtxs_rescheduled++;
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	int i, npairs, error;
 
 	sc = ifp->if_softc;
 	npairs = sc->vtnet_act_vq_pairs;
 
 	/* check if flowid is set */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % npairs;
 	else
 		i = curcpu % npairs;
 
 	txq = &sc->vtnet_txqs[i];
 
 	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
 		error = vtnet_txq_mq_start_locked(txq, m);
 		VTNET_TXQ_UNLOCK(txq);
 	} else {
 		error = drbr_enqueue(ifp, txq->vtntx_br, m);
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
 	}
 
 	return (error);
 }
 
 static void
 vtnet_txq_tq_deferred(void *xtxq, int pending)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 
 	VTNET_TXQ_LOCK(txq);
 	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
 		vtnet_txq_mq_start_locked(txq, NULL);
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 #endif /* VTNET_LEGACY_TX */
 
 static void
 vtnet_txq_start(struct vtnet_txq *txq)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 #ifdef VTNET_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		vtnet_start_locked(txq, ifp);
 #else
 	if (!drbr_empty(ifp, txq->vtntx_br))
 		vtnet_txq_mq_start_locked(txq, NULL);
 #endif
 }
 
 static void
 vtnet_txq_tq_intr(void *xtxq, int pending)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
 	vtnet_txq_start(txq);
 
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 static int
 vtnet_txq_eof(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	struct mbuf *m;
 	int deq;
 
 	vq = txq->vtntx_vq;
 	deq = 0;
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 #ifdef DEV_NETMAP
 	if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) {
 		virtqueue_disable_intr(vq); // XXX luigi
 		return 0; // XXX or 1 ?
 	}
 #endif /* DEV_NETMAP */
 
 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 		m = txhdr->vth_mbuf;
 		deq++;
 
 		txq->vtntx_stats.vtxs_opackets++;
 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
 		if (m->m_flags & M_MCAST)
 			txq->vtntx_stats.vtxs_omcasts++;
 
 		m_freem(m);
 		uma_zfree(vtnet_tx_header_zone, txhdr);
 	}
 
 	if (virtqueue_empty(vq))
 		txq->vtntx_watchdog = 0;
 
 	return (deq);
 }
 
 static void
 vtnet_tx_vq_intr(void *xtxq)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
 		 * Ignore this interrupt. Either this is a spurious interrupt
 		 * or multiqueue without per-VQ MSIX so every queue needs to
 		 * be polled (a brain dead configuration we could try harder
 		 * to avoid).
 		 */
 		vtnet_txq_disable_intr(txq);
 		return;
 	}
 
 	VTNET_TXQ_LOCK(txq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
 	vtnet_txq_start(txq);
 
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 static void
 vtnet_tx_start_all(struct vtnet_softc *sc)
 {
 	struct vtnet_txq *txq;
 	int i;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 
 		VTNET_TXQ_LOCK(txq);
 		vtnet_txq_start(txq);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 }
 
 #ifndef VTNET_LEGACY_TX
 static void
 vtnet_qflush(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct mbuf *m;
 	int i;
 
 	sc = ifp->if_softc;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 
 		VTNET_TXQ_LOCK(txq);
 		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
 			m_freem(m);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 
 	if_qflush(ifp);
 }
 #endif
 
 static int
 vtnet_watchdog(struct vtnet_txq *txq)
 {
 	struct ifnet *ifp;
 
 	ifp = txq->vtntx_sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
 	if (txq->vtntx_watchdog == 1) {
 		/*
 		 * Only drain completed frames if the watchdog is about to
 		 * expire. If any frames were drained, there may be enough
 		 * free descriptors now available to transmit queued frames.
 		 * In that case, the timer will immediately be decremented
 		 * below, but the timeout is generous enough that should not
 		 * be a problem.
 		 */
 		if (vtnet_txq_eof(txq) != 0)
 			vtnet_txq_start(txq);
 	}
 
 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 		VTNET_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VTNET_TXQ_UNLOCK(txq);
 
 	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 	return (1);
 }
 
 static void
 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
     struct vtnet_txq_stats *txacc)
 {
 
 	bzero(rxacc, sizeof(struct vtnet_rxq_stats));
 	bzero(txacc, sizeof(struct vtnet_txq_stats));
 
 	for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		struct vtnet_rxq_stats *rxst;
 		struct vtnet_txq_stats *txst;
 
 		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
 		rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
 		rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
 		rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
 		rxacc->vrxs_csum += rxst->vrxs_csum;
 		rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
 		rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
 
 		txst = &sc->vtnet_txqs[i].vtntx_stats;
 		txacc->vtxs_opackets += txst->vtxs_opackets;
 		txacc->vtxs_obytes += txst->vtxs_obytes;
 		txacc->vtxs_csum += txst->vtxs_csum;
 		txacc->vtxs_tso += txst->vtxs_tso;
 		txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
 	}
 }
 
 static uint64_t
 vtnet_get_counter(if_t ifp, ift_counter cnt)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_rxq_stats rxaccum;
 	struct vtnet_txq_stats txaccum;
 
 	sc = if_getsoftc(ifp);
 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
 
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		return (rxaccum.vrxs_ipackets);
 	case IFCOUNTER_IQDROPS:
 		return (rxaccum.vrxs_iqdrops);
 	case IFCOUNTER_IERRORS:
 		return (rxaccum.vrxs_ierrors);
 	case IFCOUNTER_OPACKETS:
 		return (txaccum.vtxs_opackets);
 #ifndef VTNET_LEGACY_TX
 	case IFCOUNTER_OBYTES:
 		return (txaccum.vtxs_obytes);
 	case IFCOUNTER_OMCASTS:
 		return (txaccum.vtxs_omcasts);
 #endif
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 static void
 vtnet_tick(void *xsc)
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	int i, timedout;
 
 	sc = xsc;
 	ifp = sc->vtnet_ifp;
 	timedout = 0;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
 
 	if (timedout != 0) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vtnet_init_locked(sc);
 	} else
 		callout_schedule(&sc->vtnet_tick_ch, hz);
 }
 
 static void
 vtnet_start_taskqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i, error;
 
 	dev = sc->vtnet_dev;
 
 	/*
 	 * Errors here are very difficult to recover from - we cannot
 	 * easily fail because, if this is during boot, we will hang
 	 * when freeing any successfully started taskqueues because
 	 * the scheduler isn't up yet.
 	 *
 	 * Most drivers just ignore the return value - it only fails
 	 * with ENOMEM so an error is not likely.
 	 */
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
 		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
 		if (error) {
 			device_printf(dev, "failed to start rx taskq %d\n",
 			    rxq->vtnrx_id);
 		}
 
 		txq = &sc->vtnet_txqs[i];
 		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
 		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
 		if (error) {
 			device_printf(dev, "failed to start tx taskq %d\n",
 			    txq->vtntx_id);
 		}
 	}
 }
 
 static void
 vtnet_free_taskqueues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		if (rxq->vtnrx_tq != NULL) {
 			taskqueue_free(rxq->vtnrx_tq);
 			rxq->vtnrx_vq = NULL;
 		}
 
 		txq = &sc->vtnet_txqs[i];
 		if (txq->vtntx_tq != NULL) {
 			taskqueue_free(txq->vtntx_tq);
 			txq->vtntx_tq = NULL;
 		}
 	}
 }
 
 static void
 vtnet_drain_taskqueues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		if (rxq->vtnrx_tq != NULL)
 			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 
 		txq = &sc->vtnet_txqs[i];
 		if (txq->vtntx_tq != NULL) {
 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
 #ifndef VTNET_LEGACY_TX
 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
 #endif
 		}
 	}
 }
 
 static void
 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 #ifdef DEV_NETMAP
 	if (nm_native_on(NA(sc->vtnet_ifp)))
 		return;
 #endif /* DEV_NETMAP */
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		vtnet_rxq_free_mbufs(rxq);
 
 		txq = &sc->vtnet_txqs[i];
 		vtnet_txq_free_mbufs(txq);
 	}
 }
 
 static void
 vtnet_stop_rendezvous(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	/*
 	 * Lock and unlock the per-queue mutex so we known the stop
 	 * state is visible. Doing only the active queues should be
 	 * sufficient, but it does not cost much extra to do all the
 	 * queues. Note we hold the core mutex here too.
 	 */
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		VTNET_RXQ_LOCK(rxq);
 		VTNET_RXQ_UNLOCK(rxq);
 
 		txq = &sc->vtnet_txqs[i];
 		VTNET_TXQ_LOCK(txq);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 }
 
 static void
 vtnet_stop(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	sc->vtnet_link_active = 0;
 	callout_stop(&sc->vtnet_tick_ch);
 
 	/* Only advisory. */
 	vtnet_disable_interrupts(sc);
 
 	/*
 	 * Stop the host adapter. This resets it to the pre-initialized
 	 * state. It will not generate any interrupts until after it is
 	 * reinitialized.
 	 */
 	virtio_stop(dev);
 	vtnet_stop_rendezvous(sc);
 
 	/* Free any mbufs left in the virtqueues. */
 	vtnet_drain_rxtx_queues(sc);
 }
 
 static int
 vtnet_virtio_reinit(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 	uint64_t features;
 	int mask, error;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 	features = sc->vtnet_features;
 
 	mask = 0;
 #if defined(INET)
 	mask |= IFCAP_RXCSUM;
 #endif
 #if defined (INET6)
 	mask |= IFCAP_RXCSUM_IPV6;
 #endif
 
 	/*
 	 * Re-negotiate with the host, removing any disabled receive
 	 * features. Transmit features are disabled only on our side
 	 * via if_capenable and if_hwassist.
 	 */
 
 	if (ifp->if_capabilities & mask) {
 		/*
 		 * We require both IPv4 and IPv6 offloading to be enabled
 		 * in order to negotiated it: VirtIO does not distinguish
 		 * between the two.
 		 */
 		if ((ifp->if_capenable & mask) != mask)
 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
 	}
 
 	if (ifp->if_capabilities & IFCAP_LRO) {
 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
 			features &= ~VTNET_LRO_FEATURES;
 	}
 
 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
 	}
 
 	error = virtio_reinit(dev, features);
 	if (error)
 		device_printf(dev, "virtio reinit error %d\n", error);
 
 	return (error);
 }
 
 static void
 vtnet_init_rx_filters(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vtnet_ifp;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
 		/* Restore promiscuous and all-multicast modes. */
 		vtnet_rx_filter(sc);
 		/* Restore filtered MAC addresses. */
 		vtnet_rx_filter_mac(sc);
 	}
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		vtnet_rx_filter_vlan(sc);
 }
 
 static int
 vtnet_init_rx_queues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vtnet_rxq *rxq;
 	int i, clsize, error;
 
 	dev = sc->vtnet_dev;
 
 	/*
 	 * Use the new cluster size if one has been set (via a MTU
 	 * change). Otherwise, use the standard 2K clusters.
 	 *
 	 * BMV: It might make sense to use page sized clusters as
 	 * the default (depending on the features negotiated).
 	 */
 	if (sc->vtnet_rx_new_clsize != 0) {
 		clsize = sc->vtnet_rx_new_clsize;
 		sc->vtnet_rx_new_clsize = 0;
 	} else
 		clsize = MCLBYTES;
 
 	sc->vtnet_rx_clsize = clsize;
 	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
 
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
 	    sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
 	    ("%s: too many rx mbufs %d for %d segments", __func__,
 	    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
 
 #ifdef DEV_NETMAP
 	if (vtnet_netmap_init_rx_buffers(sc))
 		return 0;
 #endif /* DEV_NETMAP */
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 
 		/* Hold the lock to satisfy asserts. */
 		VTNET_RXQ_LOCK(rxq);
 		error = vtnet_rxq_populate(rxq);
 		VTNET_RXQ_UNLOCK(rxq);
 
 		if (error) {
 			device_printf(dev,
 			    "cannot allocate mbufs for Rx queue %d\n", i);
 			return (error);
 		}
 	}
 
 	return (0);
 }
 
 static int
 vtnet_init_tx_queues(struct vtnet_softc *sc)
 {
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 		txq->vtntx_watchdog = 0;
 	}
 
 	return (0);
 }
 
 static int
 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
 {
 	int error;
 
 	error = vtnet_init_rx_queues(sc);
 	if (error)
 		return (error);
 
 	error = vtnet_init_tx_queues(sc);
 	if (error)
 		return (error);
 
 	return (0);
 }
 
 static void
 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int npairs;
 
 	dev = sc->vtnet_dev;
 
 	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
 		MPASS(sc->vtnet_max_vq_pairs == 1);
 		sc->vtnet_act_vq_pairs = 1;
 		return;
 	}
 
 	/* BMV: Just use the maximum configured for now. */
 	npairs = sc->vtnet_max_vq_pairs;
 
 	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
 		device_printf(dev,
 		    "cannot set active queue pairs to %d\n", npairs);
 		npairs = 1;
 	}
 
 	sc->vtnet_act_vq_pairs = npairs;
 }
 
 static int
 vtnet_reinit(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 	int error;
 
 	ifp = sc->vtnet_ifp;
 
 	/* Use the current MAC address. */
 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
 	vtnet_set_hwaddr(sc);
 
 	vtnet_set_active_vq_pairs(sc);
 
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_IP_TSO;
 	if (ifp->if_capenable & IFCAP_TSO6)
 		ifp->if_hwassist |= CSUM_IP6_TSO;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 		vtnet_init_rx_filters(sc);
 
 	error = vtnet_init_rxtx_queues(sc);
 	if (error)
 		return (error);
 
 	vtnet_enable_interrupts(sc);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
 	return (0);
 }
 
 static void
 vtnet_init_locked(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	vtnet_stop(sc);
 
 	/* Reinitialize with the host. */
 	if (vtnet_virtio_reinit(sc) != 0)
 		goto fail;
 
 	if (vtnet_reinit(sc) != 0)
 		goto fail;
 
 	virtio_reinit_complete(dev);
 
 	vtnet_update_link_status(sc);
 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
 
 	return;
 
 fail:
 	vtnet_stop(sc);
 }
 
 static void
 vtnet_init(void *xsc)
 {
 	struct vtnet_softc *sc;
 
 	sc = xsc;
 
 #ifdef DEV_NETMAP
 	if (!NA(sc->vtnet_ifp)) {
 		D("try to attach again");
 		vtnet_netmap_attach(sc);
 	}
 #endif /* DEV_NETMAP */
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_init_locked(sc);
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
 {
 	struct virtqueue *vq;
 
 	vq = sc->vtnet_ctrl_vq;
 
 	/*
 	 * The control virtqueue is only polled and therefore it should
 	 * already be empty.
 	 */
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: ctrl vq %p not empty", __func__, vq));
 }
 
 static void
 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
     struct sglist *sg, int readable, int writable)
 {
 	struct virtqueue *vq;
 
 	vq = sc->vtnet_ctrl_vq;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
 	    ("%s: CTRL_VQ feature not negotiated", __func__));
 
 	if (!virtqueue_empty(vq))
 		return;
 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
 		return;
 
 	/*
 	 * Poll for the response, but the command is likely already
 	 * done when we return from the notify.
 	 */
 	virtqueue_notify(vq);
 	virtqueue_poll(vq, NULL);
 }
 
 static int
 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
 {
 	struct virtio_net_ctrl_hdr hdr __aligned(2);
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	uint8_t ack;
 	int error;
 
 	hdr.class = VIRTIO_NET_CTRL_MAC;
 	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
 	ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr;
 		uint8_t pad1;
 		struct virtio_net_ctrl_mq mq;
 		uint8_t pad2;
 		uint8_t ack;
 	} s __aligned(2);
 	int error;
 
 	s.hdr.class = VIRTIO_NET_CTRL_MQ;
 	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
 	s.mq.virtqueue_pairs = npairs;
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding MQ message to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr;
 		uint8_t pad1;
 		uint8_t onoff;
 		uint8_t pad2;
 		uint8_t ack;
 	} s __aligned(2);
 	int error;
 
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
 	    ("%s: CTRL_RX feature not negotiated", __func__));
 
 	s.hdr.class = VIRTIO_NET_CTRL_RX;
 	s.hdr.cmd = cmd;
 	s.onoff = !!on;
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding Rx message to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_set_promisc(struct vtnet_softc *sc, int on)
 {
 
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
 }
 
 static int
 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
 {
 
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
 }
 
 /*
  * The device defaults to promiscuous mode for backwards compatibility.
  * Turn it off at attach time if possible.
  */
 static void
 vtnet_attach_disable_promisc(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK(sc);
 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
 		ifp->if_flags |= IFF_PROMISC;
 	} else if (vtnet_set_promisc(sc, 0) != 0) {
 		ifp->if_flags |= IFF_PROMISC;
 		device_printf(sc->vtnet_dev,
 		    "cannot disable default promiscuous mode\n");
 	}
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_rx_filter(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
 		device_printf(dev, "cannot %s promiscuous mode\n",
 		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
 
 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
 		device_printf(dev, "cannot %s all-multicast mode\n",
 		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
 }
 
 static void
 vtnet_rx_filter_mac(struct vtnet_softc *sc)
 {
 	struct virtio_net_ctrl_hdr hdr __aligned(2);
 	struct vtnet_mac_filter *filter;
 	struct sglist_seg segs[4];
 	struct sglist sg;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifmultiaddr *ifma;
 	int ucnt, mcnt, promisc, allmulti, error;
 	uint8_t ack;
 
 	ifp = sc->vtnet_ifp;
 	filter = sc->vtnet_mac_filter;
 	ucnt = 0;
 	mcnt = 0;
 	promisc = 0;
 	allmulti = 0;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
 	    ("%s: CTRL_RX feature not negotiated", __func__));
 
 	/* Unicast MAC addresses: */
 	if_addr_rlock(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
 		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
 			continue;
 		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
 			promisc = 1;
 			break;
 		}
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
 		ucnt++;
 	}
 	if_addr_runlock(ifp);
 
 	if (promisc != 0) {
 		filter->vmf_unicast.nentries = 0;
 		if_printf(ifp, "more than %d MAC addresses assigned, "
 		    "falling back to promiscuous mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	} else
 		filter->vmf_unicast.nentries = ucnt;
 
 	/* Multicast MAC addresses: */
 	if_maddr_rlock(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
 			allmulti = 1;
 			break;
 		}
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
 		mcnt++;
 	}
 	if_maddr_runlock(ifp);
 
 	if (allmulti != 0) {
 		filter->vmf_multicast.nentries = 0;
 		if_printf(ifp, "more than %d multicast MAC addresses "
 		    "assigned, falling back to all-multicast mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	} else
 		filter->vmf_multicast.nentries = mcnt;
 
 	if (promisc != 0 && allmulti != 0)
 		goto out;
 
 	hdr.class = VIRTIO_NET_CTRL_MAC;
 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
 	ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 4, segs);
 	error = 0;
 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &filter->vmf_unicast,
 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &filter->vmf_multicast,
 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 4,
 	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
 	if (ack != VIRTIO_NET_OK)
 		if_printf(ifp, "error setting host MAC filter table\n");
 
 out:
 	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
 		if_printf(ifp, "cannot enable promiscuous mode\n");
 	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
 		if_printf(ifp, "cannot enable all-multicast mode\n");
 }
 
 static int
 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr;
 		uint8_t pad1;
 		uint16_t tag;
 		uint8_t pad2;
 		uint8_t ack;
 	} s __aligned(2);
 	int error;
 
 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
 	s.tag = tag;
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
 	error = 0;
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
 	    ("%s: error %d adding VLAN message to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static void
 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
 {
 	uint32_t w;
 	uint16_t tag;
 	int i, bit;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
 	    ("%s: VLAN_FILTER feature not negotiated", __func__));
 
 	/* Enable the filter for each configured VLAN. */
 	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
 		w = sc->vtnet_vlan_filter[i];
 
 		while ((bit = ffs(w) - 1) != -1) {
 			w &= ~(1 << bit);
 			tag = sizeof(w) * CHAR_BIT * i + bit;
 
 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
 				device_printf(sc->vtnet_dev,
 				    "cannot enable VLAN %d filter\n", tag);
 			}
 		}
 	}
 }
 
 static void
 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	struct ifnet *ifp;
 	int idx, bit;
 
 	ifp = sc->vtnet_ifp;
 	idx = (tag >> 5) & 0x7F;
 	bit = tag & 0x1F;
 
 	if (tag == 0 || tag > 4095)
 		return;
 
 	VTNET_CORE_LOCK(sc);
 
 	if (add)
 		sc->vtnet_vlan_filter[idx] |= (1 << bit);
 	else
 		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
 		device_printf(sc->vtnet_dev,
 		    "cannot %s VLAN %d %s the host filter table\n",
 		    add ? "add" : "remove", tag, add ? "to" : "from");
 	}
 
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 {
 
 	if (ifp->if_softc != arg)
 		return;
 
 	vtnet_update_vlan_filter(arg, 1, tag);
 }
 
 static void
 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 {
 
 	if (ifp->if_softc != arg)
 		return;
 
 	vtnet_update_vlan_filter(arg, 0, tag);
 }
 
 static int
 vtnet_is_link_up(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
 	uint16_t status;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
 		status = VIRTIO_NET_S_LINK_UP;
 	else
 		status = virtio_read_dev_config_2(dev,
 		    offsetof(struct virtio_net_config, status));
 
 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
 }
 
 static void
 vtnet_update_link_status(struct vtnet_softc *sc)
 {
 	struct ifnet *ifp;
 	int link;
 
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 	link = vtnet_is_link_up(sc);
 
 	/* Notify if the link status has changed. */
 	if (link != 0 && sc->vtnet_link_active == 0) {
 		sc->vtnet_link_active = 1;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (link == 0 && sc->vtnet_link_active != 0) {
 		sc->vtnet_link_active = 0;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
 static int
 vtnet_ifmedia_upd(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct ifmedia *ifm;
 
 	sc = ifp->if_softc;
 	ifm = &sc->vtnet_media;
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	return (0);
 }
 
 static void
 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct vtnet_softc *sc;
 
 	sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	VTNET_CORE_LOCK(sc);
 	if (vtnet_is_link_up(sc) != 0) {
 		ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_active |= VTNET_MEDIATYPE;
 	} else
 		ifmr->ifm_active |= IFM_NONE;
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_set_hwaddr(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int i;
 
 	dev = sc->vtnet_dev;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
 			device_printf(dev, "unable to set MAC address\n");
 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
 		for (i = 0; i < ETHER_ADDR_LEN; i++) {
 			virtio_write_dev_config_1(dev,
 			    offsetof(struct virtio_net_config, mac) + i,
 			    sc->vtnet_hwaddr[i]);
 		}
 	}
 }
 
 static void
 vtnet_get_hwaddr(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int i;
 
 	dev = sc->vtnet_dev;
 
 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
 		/*
 		 * Generate a random locally administered unicast address.
 		 *
 		 * It would be nice to generate the same MAC address across
 		 * reboots, but it seems all the hosts currently available
 		 * support the MAC feature, so this isn't too important.
 		 */
 		sc->vtnet_hwaddr[0] = 0xB2;
 		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
 		vtnet_set_hwaddr(sc);
 		return;
 	}
 
 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
 		    offsetof(struct virtio_net_config, mac) + i);
 	}
 }
 
 static void
 vtnet_vlan_tag_remove(struct mbuf *m)
 {
 	struct ether_vlan_header *evh;
 
 	evh = mtod(m, struct ether_vlan_header *);
 	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
 	m->m_flags |= M_VLANTAG;
 
 	/* Strip the 802.1Q header. */
 	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
 }
 
 static void
 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
 {
 	int limit;
 
 	limit = vtnet_tunable_int(sc, "rx_process_limit",
 	    vtnet_rx_process_limit);
 	if (limit < 0)
 		limit = INT_MAX;
 	sc->vtnet_rx_process_limit = limit;
 }
 
 static void
 vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int size, thresh;
 
 	dev = sc->vtnet_dev;
 	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
 
 	/*
 	 * The Tx interrupt is disabled until the queue free count falls
 	 * below our threshold. Completed frames are drained from the Tx
 	 * virtqueue before transmitting new frames and in the watchdog
 	 * callout, so the frequency of Tx interrupts is greatly reduced,
 	 * at the cost of not freeing mbufs as quickly as they otherwise
 	 * would be.
 	 *
 	 * N.B. We assume all the Tx queues are the same size.
 	 */
 	thresh = size / 4;
 
 	/*
 	 * Without indirect descriptors, leave enough room for the most
 	 * segments we handle.
 	 */
 	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
 	    thresh < sc->vtnet_tx_nsegs)
 		thresh = sc->vtnet_tx_nsegs;
 
 	sc->vtnet_tx_intr_thresh = thresh;
 }
 
 static void
 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 {
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	struct vtnet_rxq_stats *stats;
 	char namebuf[16];
 
 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 	    CTLFLAG_RD, NULL, "Receive Queue");
 	list = SYSCTL_CHILDREN(node);
 
 	stats = &rxq->vtnrx_stats;
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
 	    &stats->vrxs_ipackets, "Receive packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
 	    &stats->vrxs_ibytes, "Receive bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
 	    &stats->vrxs_iqdrops, "Receive drops");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
 	    &stats->vrxs_ierrors, "Receive errors");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 	    &stats->vrxs_csum, "Receive checksum offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
 	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 	    &stats->vrxs_rescheduled,
 	    "Receive interrupt handler rescheduled");
 }
 
 static void
 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_txq *txq)
 {
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	struct vtnet_txq_stats *stats;
 	char namebuf[16];
 
 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 	    CTLFLAG_RD, NULL, "Transmit Queue");
 	list = SYSCTL_CHILDREN(node);
 
 	stats = &txq->vtntx_stats;
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
 	    &stats->vtxs_opackets, "Transmit packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
 	    &stats->vtxs_obytes, "Transmit bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
 	    &stats->vtxs_omcasts, "Transmit multicasts");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 	    &stats->vtxs_csum, "Transmit checksum offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
 	    &stats->vtxs_tso, "Transmit segmentation offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 	    &stats->vtxs_rescheduled,
 	    "Transmit interrupt handler rescheduled");
 }
 
 static void
 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 	int i;
 
 	dev = sc->vtnet_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
 		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
 	}
 }
 
 static void
 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_softc *sc)
 {
 	struct vtnet_statistics *stats;
 	struct vtnet_rxq_stats rxaccum;
 	struct vtnet_txq_stats txaccum;
 
 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
 
 	stats = &sc->vtnet_stats;
 	stats->rx_csum_offloaded = rxaccum.vrxs_csum;
 	stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
 	stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
 	stats->tx_csum_offloaded = txaccum.vtxs_csum;
 	stats->tx_tso_offloaded = txaccum.vtxs_tso;
 	stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
 	    "Mbuf cluster allocation failures");
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
 	    CTLFLAG_RD, &stats->rx_frame_too_large,
 	    "Received frame larger than the mbuf chain");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
 	    "Enqueuing the replacement receive mbuf failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
 	    "Mergeable buffers receive failures");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
 	    "Received checksum offloaded buffer with unsupported "
 	    "Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
 	    "Received checksum offloaded buffer with incorrect IP protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
 	    "Received checksum offloaded buffer with incorrect offset");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
 	    "Received checksum offloaded buffer with incorrect protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
 	    CTLFLAG_RD, &stats->rx_csum_failed,
 	    "Received buffer checksum offload failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
 	    "Received buffer checksum offload succeeded");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
 	    "Times the receive interrupt task rescheduled itself");
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
 	    "Aborted transmit of checksum offloaded buffer with unknown "
 	    "Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
 	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
 	    "Aborted transmit of TSO buffer with non TCP protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
 	    CTLFLAG_RD, &stats->tx_defragged,
 	    "Transmit mbufs defragged");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
 	    CTLFLAG_RD, &stats->tx_defrag_failed,
 	    "Aborted transmit of buffer because defrag failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
 	    CTLFLAG_RD, &stats->tx_csum_offloaded,
 	    "Offloaded checksum of transmitted buffer");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
 	    CTLFLAG_RD, &stats->tx_tso_offloaded,
 	    "Segmentation offload of transmitted buffer");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
 	    "Times the transmit interrupt task rescheduled itself");
 }
 
 static void
 vtnet_setup_sysctl(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 
 	dev = sc->vtnet_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
 	    "Maximum number of supported virtqueue pairs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
 	    "Number of active virtqueue pairs");
 
 	vtnet_setup_stat_sysctl(ctx, child, sc);
 }
 
 static int
 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
 {
 
 	return (virtqueue_enable_intr(rxq->vtnrx_vq));
 }
 
 static void
 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 {
 
 	virtqueue_disable_intr(rxq->vtnrx_vq);
 }
 
 static int
 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 
 	vq = txq->vtntx_vq;
 
 	if (vtnet_txq_below_threshold(txq) != 0)
 		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 
 	/*
 	 * The free count is above our threshold. Keep the Tx interrupt
 	 * disabled until the queue is fuller.
 	 */
 	return (0);
 }
 
 static void
 vtnet_txq_disable_intr(struct vtnet_txq *txq)
 {
 
 	virtqueue_disable_intr(txq->vtntx_vq);
 }
 
 static void
 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
 }
 
 static void
 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
 }
 
 static void
 vtnet_enable_interrupts(struct vtnet_softc *sc)
 {
 
 	vtnet_enable_rx_interrupts(sc);
 	vtnet_enable_tx_interrupts(sc);
 }
 
 static void
 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
 }
 
 static void
 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
 }
 
 static void
 vtnet_disable_interrupts(struct vtnet_softc *sc)
 {
 
 	vtnet_disable_rx_interrupts(sc);
 	vtnet_disable_tx_interrupts(sc);
 }
 
 static int
 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
 {
 	char path[64];
 
 	snprintf(path, sizeof(path),
 	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
 	TUNABLE_INT_FETCH(path, &def);
 
 	return (def);
 }
Index: head/sys/net/if_bridge.c
===================================================================
--- head/sys/net/if_bridge.c	(revision 284347)
+++ head/sys/net/if_bridge.c	(revision 284348)
@@ -1,3571 +1,3572 @@
 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
 
 /*
  * Copyright 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed for the NetBSD Project by
  *	Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
  */
 
 /*
  * Network interface bridge support.
  *
  * TODO:
  *
  *	- Currently only supports Ethernet-like interfaces (Ethernet,
  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
  *	  consider heterogenous bridges).
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/protosw.h>
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/time.h>
 #include <sys/socket.h> /* for net/if.h */
 #include <sys/sockio.h>
 #include <sys/ctype.h>  /* string functions */
 #include <sys/kernel.h>
 #include <sys/random.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <vm/uma.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif
 #if defined(INET) || defined(INET6)
 #include <netinet/ip_carp.h>
 #endif
 #include <machine/in_cksum.h>
 #include <netinet/if_ether.h>
 #include <net/bridgestp.h>
 #include <net/if_bridgevar.h>
 #include <net/if_llc.h>
 #include <net/if_vlan_var.h>
 
 #include <net/route.h>
 
 /*
  * Size of the route hash table.  Must be a power of two.
  */
 #ifndef BRIDGE_RTHASH_SIZE
 #define	BRIDGE_RTHASH_SIZE		1024
 #endif
 
 #define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
 
 /*
  * Default maximum number of addresses to cache.
  */
 #ifndef BRIDGE_RTABLE_MAX
 #define	BRIDGE_RTABLE_MAX		2000
 #endif
 
 /*
  * Timeout (in seconds) for entries learned dynamically.
  */
 #ifndef BRIDGE_RTABLE_TIMEOUT
 #define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
 #endif
 
 /*
  * Number of seconds between walks of the route list.
  */
 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
 #define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
 #endif
 
 /*
  * List of capabilities to possibly mask on the member interface.
  */
 #define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
 
 /*
  * List of capabilities to strip
  */
 #define	BRIDGE_IFCAPS_STRIP		IFCAP_LRO
 
 /*
  * Bridge interface list entry.
  */
 struct bridge_iflist {
 	LIST_ENTRY(bridge_iflist) bif_next;
 	struct ifnet		*bif_ifp;	/* member if */
 	struct bstp_port	bif_stp;	/* STP state */
 	uint32_t		bif_flags;	/* member if flags */
 	int			bif_savedcaps;	/* saved capabilities */
 	uint32_t		bif_addrmax;	/* max # of addresses */
 	uint32_t		bif_addrcnt;	/* cur. # of addresses */
 	uint32_t		bif_addrexceeded;/* # of address violations */
 };
 
 /*
  * Bridge route node.
  */
 struct bridge_rtnode {
 	LIST_ENTRY(bridge_rtnode) brt_hash;	/* hash table linkage */
 	LIST_ENTRY(bridge_rtnode) brt_list;	/* list linkage */
 	struct bridge_iflist	*brt_dst;	/* destination if */
 	unsigned long		brt_expire;	/* expiration time */
 	uint8_t			brt_flags;	/* address flags */
 	uint8_t			brt_addr[ETHER_ADDR_LEN];
 	uint16_t		brt_vlan;	/* vlan id */
 };
 #define	brt_ifp			brt_dst->bif_ifp
 
 /*
  * Software state for each bridge.
  */
 struct bridge_softc {
 	struct ifnet		*sc_ifp;	/* make this an interface */
 	LIST_ENTRY(bridge_softc) sc_list;
 	struct mtx		sc_mtx;
 	struct cv		sc_cv;
 	uint32_t		sc_brtmax;	/* max # of addresses */
 	uint32_t		sc_brtcnt;	/* cur. # of addresses */
 	uint32_t		sc_brttimeout;	/* rt timeout in seconds */
 	struct callout		sc_brcallout;	/* bridge callout */
 	uint32_t		sc_iflist_ref;	/* refcount for sc_iflist */
 	uint32_t		sc_iflist_xcnt;	/* refcount for sc_iflist */
 	LIST_HEAD(, bridge_iflist) sc_iflist;	/* member interface list */
 	LIST_HEAD(, bridge_rtnode) *sc_rthash;	/* our forwarding table */
 	LIST_HEAD(, bridge_rtnode) sc_rtlist;	/* list version of above */
 	uint32_t		sc_rthash_key;	/* key for hash */
 	LIST_HEAD(, bridge_iflist) sc_spanlist;	/* span ports list */
 	struct bstp_state	sc_stp;		/* STP state */
 	uint32_t		sc_brtexceeded;	/* # of cache drops */
 	struct ifnet		*sc_ifaddr;	/* member mac copied from */
 	u_char			sc_defaddr[6];	/* Default MAC address */
 };
 
 static VNET_DEFINE(struct mtx, bridge_list_mtx);
 #define	V_bridge_list_mtx	VNET(bridge_list_mtx)
 static eventhandler_tag bridge_detach_cookie;
 
 int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
 
 uma_zone_t bridge_rtnode_zone;
 
 static int	bridge_clone_create(struct if_clone *, int, caddr_t);
 static void	bridge_clone_destroy(struct ifnet *);
 
 static int	bridge_ioctl(struct ifnet *, u_long, caddr_t);
 static void	bridge_mutecaps(struct bridge_softc *);
 static void	bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
 		    int);
 static void	bridge_ifdetach(void *arg __unused, struct ifnet *);
 static void	bridge_init(void *);
 static void	bridge_dummynet(struct mbuf *, struct ifnet *);
 static void	bridge_stop(struct ifnet *, int);
 static int	bridge_transmit(struct ifnet *, struct mbuf *);
 static void	bridge_qflush(struct ifnet *);
 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
 static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
 static int	bridge_enqueue(struct bridge_softc *, struct ifnet *,
 		    struct mbuf *);
 static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
 
 static void	bridge_forward(struct bridge_softc *, struct bridge_iflist *,
 		    struct mbuf *m);
 
 static void	bridge_timer(void *);
 
 static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
 		    struct mbuf *, int);
 static void	bridge_span(struct bridge_softc *, struct mbuf *);
 
 static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
 		    uint16_t, struct bridge_iflist *, int, uint8_t);
 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
 		    uint16_t);
 static void	bridge_rttrim(struct bridge_softc *);
 static void	bridge_rtage(struct bridge_softc *);
 static void	bridge_rtflush(struct bridge_softc *, int);
 static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
 		    uint16_t);
 
 static void	bridge_rtable_init(struct bridge_softc *);
 static void	bridge_rtable_fini(struct bridge_softc *);
 
 static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
 		    const uint8_t *, uint16_t);
 static int	bridge_rtnode_insert(struct bridge_softc *,
 		    struct bridge_rtnode *);
 static void	bridge_rtnode_destroy(struct bridge_softc *,
 		    struct bridge_rtnode *);
 static void	bridge_rtable_expire(struct ifnet *, int);
 static void	bridge_state_change(struct ifnet *, int);
 
 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
 		    const char *name);
 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
 		    struct ifnet *ifp);
 static void	bridge_delete_member(struct bridge_softc *,
 		    struct bridge_iflist *, int);
 static void	bridge_delete_span(struct bridge_softc *,
 		    struct bridge_iflist *);
 
 static int	bridge_ioctl_add(struct bridge_softc *, void *);
 static int	bridge_ioctl_del(struct bridge_softc *, void *);
 static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
 static int	bridge_ioctl_scache(struct bridge_softc *, void *);
 static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
 static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
 static int	bridge_ioctl_rts(struct bridge_softc *, void *);
 static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
 static int	bridge_ioctl_sto(struct bridge_softc *, void *);
 static int	bridge_ioctl_gto(struct bridge_softc *, void *);
 static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
 static int	bridge_ioctl_flush(struct bridge_softc *, void *);
 static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
 static int	bridge_ioctl_spri(struct bridge_softc *, void *);
 static int	bridge_ioctl_ght(struct bridge_softc *, void *);
 static int	bridge_ioctl_sht(struct bridge_softc *, void *);
 static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
 static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
 static int	bridge_ioctl_gma(struct bridge_softc *, void *);
 static int	bridge_ioctl_sma(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
 static int	bridge_ioctl_addspan(struct bridge_softc *, void *);
 static int	bridge_ioctl_delspan(struct bridge_softc *, void *);
 static int	bridge_ioctl_gbparam(struct bridge_softc *, void *);
 static int	bridge_ioctl_grte(struct bridge_softc *, void *);
 static int	bridge_ioctl_gifsstp(struct bridge_softc *, void *);
 static int	bridge_ioctl_sproto(struct bridge_softc *, void *);
 static int	bridge_ioctl_stxhc(struct bridge_softc *, void *);
 static int	bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
 		    int);
 static int	bridge_ip_checkbasic(struct mbuf **mp);
 #ifdef INET6
 static int	bridge_ip6_checkbasic(struct mbuf **mp);
 #endif /* INET6 */
 static int	bridge_fragment(struct ifnet *, struct mbuf *,
 		    struct ether_header *, int, struct llc *);
 static void	bridge_linkstate(struct ifnet *ifp);
 static void	bridge_linkcheck(struct bridge_softc *sc);
 
 extern void (*bridge_linkstate_p)(struct ifnet *ifp);
 
 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
 #define	VLANTAGOF(_m)	\
     (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : 1
 
 static struct bstp_cb_ops bridge_ops = {
 	.bcb_state = bridge_state_change,
 	.bcb_rtage = bridge_rtable_expire
 };
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
 
 /* only pass IP[46] packets when pfil is enabled */
 static VNET_DEFINE(int, pfil_onlyip) = 1;
 #define	V_pfil_onlyip	VNET(pfil_onlyip)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
     "Only pass IP packets when pfil is enabled");
 
 /* run pfil hooks on the bridge interface */
 static VNET_DEFINE(int, pfil_bridge) = 1;
 #define	V_pfil_bridge	VNET(pfil_bridge)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
     "Packet filter on the bridge interface");
 
 /* layer2 filter with ipfw */
 static VNET_DEFINE(int, pfil_ipfw);
 #define	V_pfil_ipfw	VNET(pfil_ipfw)
 
 /* layer2 ARP filter with ipfw */
 static VNET_DEFINE(int, pfil_ipfw_arp);
 #define	V_pfil_ipfw_arp	VNET(pfil_ipfw_arp)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
     "Filter ARP packets through IPFW layer2");
 
 /* run pfil hooks on the member interface */
 static VNET_DEFINE(int, pfil_member) = 1;
 #define	V_pfil_member	VNET(pfil_member)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
     "Packet filter on the member interface");
 
 /* run pfil hooks on the physical interface for locally destined packets */
 static VNET_DEFINE(int, pfil_local_phys);
 #define	V_pfil_local_phys	VNET(pfil_local_phys)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
     "Packet filter on the physical interface for locally destined packets");
 
 /* log STP state changes */
 static VNET_DEFINE(int, log_stp);
 #define	V_log_stp	VNET(log_stp)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
     "Log STP state changes");
 
 /* share MAC with first bridge member */
 static VNET_DEFINE(int, bridge_inherit_mac);
 #define	V_bridge_inherit_mac	VNET(bridge_inherit_mac)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
     "Inherit MAC address from the first bridge member");
 
 static VNET_DEFINE(int, allow_llz_overlap) = 0;
 #define	V_allow_llz_overlap	VNET(allow_llz_overlap)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
     "Allow overlap of link-local scope "
     "zones of a bridge interface and the member interfaces");
 
 struct bridge_control {
 	int	(*bc_func)(struct bridge_softc *, void *);
 	int	bc_argsize;
 	int	bc_flags;
 };
 
 #define	BC_F_COPYIN		0x01	/* copy arguments in */
 #define	BC_F_COPYOUT		0x02	/* copy arguments out */
 #define	BC_F_SUSER		0x04	/* do super-user check */
 
 const struct bridge_control bridge_control_table[] = {
 	{ bridge_ioctl_add,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_del,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gifflags,	sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 	{ bridge_ioctl_sifflags,	sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_scache,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_gcache,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_gifs,		sizeof(struct ifbifconf),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 	{ bridge_ioctl_rts,		sizeof(struct ifbaconf),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 
 	{ bridge_ioctl_saddr,		sizeof(struct ifbareq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sto,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_gto,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_daddr,		sizeof(struct ifbareq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_flush,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gpri,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_spri,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_ght,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_sht,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gfd,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_sfd,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gma,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_sma,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sifprio,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sifcost,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_addspan,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_delspan,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gbparam,		sizeof(struct ifbropreq),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_grte,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_gifsstp,		sizeof(struct ifbpstpconf),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 
 	{ bridge_ioctl_sproto,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_stxhc,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sifmaxaddr,	sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 };
 const int bridge_control_table_size = nitems(bridge_control_table);
 
 static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list);
 #define	V_bridge_list	VNET(bridge_list)
 #define	BRIDGE_LIST_LOCK_INIT(x)	mtx_init(&V_bridge_list_mtx,	\
 					    "if_bridge list", NULL, MTX_DEF)
 #define	BRIDGE_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_bridge_list_mtx)
 #define	BRIDGE_LIST_LOCK(x)		mtx_lock(&V_bridge_list_mtx)
 #define	BRIDGE_LIST_UNLOCK(x)		mtx_unlock(&V_bridge_list_mtx)
 
 static VNET_DEFINE(struct if_clone *, bridge_cloner);
 #define	V_bridge_cloner	VNET(bridge_cloner)
 
 static const char bridge_name[] = "bridge";
 
 static void
 vnet_bridge_init(const void *unused __unused)
 {
 
 	BRIDGE_LIST_LOCK_INIT();
 	LIST_INIT(&V_bridge_list);
 	V_bridge_cloner = if_clone_simple(bridge_name,
 	    bridge_clone_create, bridge_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_bridge_init, NULL);
 
 static void
 vnet_bridge_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_bridge_cloner);
 	V_bridge_cloner = NULL;
 	BRIDGE_LIST_LOCK_DESTROY();
 }
 VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_bridge_uninit, NULL);
 
 static int
 bridge_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
 		    sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		bridge_input_p = bridge_input;
 		bridge_output_p = bridge_output;
 		bridge_dn_p = bridge_dummynet;
 		bridge_linkstate_p = bridge_linkstate;
 		bridge_detach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, bridge_ifdetach, NULL,
 		    EVENTHANDLER_PRI_ANY);
 		break;
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    bridge_detach_cookie);
 		uma_zdestroy(bridge_rtnode_zone);
 		bridge_input_p = NULL;
 		bridge_output_p = NULL;
 		bridge_dn_p = NULL;
 		bridge_linkstate_p = NULL;
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t bridge_mod = {
 	"if_bridge",
 	bridge_modevent,
 	0
 };
 
 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
 
 /*
  * handler for net.link.bridge.ipfw
  */
 static int
 sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
 {
 	int enable = V_pfil_ipfw;
 	int error;
 
 	error = sysctl_handle_int(oidp, &enable, 0, req);
 	enable &= 1;
 
 	if (enable != V_pfil_ipfw) {
 		V_pfil_ipfw = enable;
 
 		/*
 		 * Disable pfil so that ipfw doesnt run twice, if the user
 		 * really wants both then they can re-enable pfil_bridge and/or
 		 * pfil_member. Also allow non-ip packets as ipfw can filter by
 		 * layer2 type.
 		 */
 		if (V_pfil_ipfw) {
 			V_pfil_onlyip = 0;
 			V_pfil_bridge = 0;
 			V_pfil_member = 0;
 		}
 	}
 
 	return (error);
 }
 SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I",
     "Layer2 filter with IPFW");
 
 /*
  * bridge_clone_create:
  *
  *	Create a new bridge instance.
  */
 static int
 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct bridge_softc *sc, *sc2;
 	struct ifnet *bifp, *ifp;
 	int fb, retry;
 	unsigned long hostid;
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		free(sc, M_DEVBUF);
 		return (ENOSPC);
 	}
 
 	BRIDGE_LOCK_INIT(sc);
 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
 
 	/* Initialize our routing table. */
 	bridge_rtable_init(sc);
 
 	callout_init_mtx(&sc->sc_brcallout, &sc->sc_mtx, 0);
 
 	LIST_INIT(&sc->sc_iflist);
 	LIST_INIT(&sc->sc_spanlist);
 
 	ifp->if_softc = sc;
 	if_initname(ifp, bridge_name, unit);
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = bridge_ioctl;
 	ifp->if_transmit = bridge_transmit;
 	ifp->if_qflush = bridge_qflush;
 	ifp->if_init = bridge_init;
 	ifp->if_type = IFT_BRIDGE;
 
 	/*
 	 * Generate an ethernet address with a locally administered address.
 	 *
 	 * Since we are using random ethernet addresses for the bridge, it is
 	 * possible that we might have address collisions, so make sure that
 	 * this hardware address isn't already in use on another bridge.
 	 * The first try uses the hostid and falls back to arc4rand().
 	 */
 	fb = 0;
 	getcredhostid(curthread->td_ucred, &hostid);
 	do {
 		if (fb || hostid == 0) {
 			arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
 			sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
 			sc->sc_defaddr[0] |= 2;	/* set the LAA bit */
 		} else {
 			sc->sc_defaddr[0] = 0x2;
 			sc->sc_defaddr[1] = (hostid >> 24) & 0xff;
 			sc->sc_defaddr[2] = (hostid >> 16) & 0xff;
 			sc->sc_defaddr[3] = (hostid >> 8 ) & 0xff;
 			sc->sc_defaddr[4] =  hostid        & 0xff;
 			sc->sc_defaddr[5] = ifp->if_dunit & 0xff;
 		}
 
 		fb = 1;
 		retry = 0;
 		BRIDGE_LIST_LOCK();
 		LIST_FOREACH(sc2, &V_bridge_list, sc_list) {
 			bifp = sc2->sc_ifp;
 			if (memcmp(sc->sc_defaddr,
 			    IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
 				retry = 1;
 				break;
 			}
 		}
 		BRIDGE_LIST_UNLOCK();
 	} while (retry == 1);
 
 	bstp_attach(&sc->sc_stp, &bridge_ops);
 	ether_ifattach(ifp, sc->sc_defaddr);
 	/* Now undo some of the damage... */
 	ifp->if_baudrate = 0;
 	ifp->if_type = IFT_BRIDGE;
 
 	BRIDGE_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
 	BRIDGE_LIST_UNLOCK();
 
 	return (0);
 }
 
 /*
  * bridge_clone_destroy:
  *
  *	Destroy a bridge instance.
  */
 static void
 bridge_clone_destroy(struct ifnet *ifp)
 {
 	struct bridge_softc *sc = ifp->if_softc;
 	struct bridge_iflist *bif;
 
 	BRIDGE_LOCK(sc);
 
 	bridge_stop(ifp, 1);
 	ifp->if_flags &= ~IFF_UP;
 
 	while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL)
 		bridge_delete_member(sc, bif, 0);
 
 	while ((bif = LIST_FIRST(&sc->sc_spanlist)) != NULL) {
 		bridge_delete_span(sc, bif);
 	}
 
 	BRIDGE_UNLOCK(sc);
 
 	callout_drain(&sc->sc_brcallout);
 
 	BRIDGE_LIST_LOCK();
 	LIST_REMOVE(sc, sc_list);
 	BRIDGE_LIST_UNLOCK();
 
 	bstp_detach(&sc->sc_stp);
 	ether_ifdetach(ifp);
 	if_free(ifp);
 
 	/* Tear down the routing table. */
 	bridge_rtable_fini(sc);
 
 	BRIDGE_LOCK_DESTROY(sc);
 	free(sc, M_DEVBUF);
 }
 
 /*
  * bridge_ioctl:
  *
  *	Handle a control request from the operator.
  */
 static int
 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct bridge_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct bridge_iflist *bif;
 	struct thread *td = curthread;
 	union {
 		struct ifbreq ifbreq;
 		struct ifbifconf ifbifconf;
 		struct ifbareq ifbareq;
 		struct ifbaconf ifbaconf;
 		struct ifbrparam ifbrparam;
 		struct ifbropreq ifbropreq;
 	} args;
 	struct ifdrv *ifd = (struct ifdrv *) data;
 	const struct bridge_control *bc;
 	int error = 0;
 
 	switch (cmd) {
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 
 	case SIOCGDRVSPEC:
 	case SIOCSDRVSPEC:
 		if (ifd->ifd_cmd >= bridge_control_table_size) {
 			error = EINVAL;
 			break;
 		}
 		bc = &bridge_control_table[ifd->ifd_cmd];
 
 		if (cmd == SIOCGDRVSPEC &&
 		    (bc->bc_flags & BC_F_COPYOUT) == 0) {
 			error = EINVAL;
 			break;
 		}
 		else if (cmd == SIOCSDRVSPEC &&
 		    (bc->bc_flags & BC_F_COPYOUT) != 0) {
 			error = EINVAL;
 			break;
 		}
 
 		if (bc->bc_flags & BC_F_SUSER) {
 			error = priv_check(td, PRIV_NET_BRIDGE);
 			if (error)
 				break;
 		}
 
 		if (ifd->ifd_len != bc->bc_argsize ||
 		    ifd->ifd_len > sizeof(args)) {
 			error = EINVAL;
 			break;
 		}
 
 		bzero(&args, sizeof(args));
 		if (bc->bc_flags & BC_F_COPYIN) {
 			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
 			if (error)
 				break;
 		}
 
 		BRIDGE_LOCK(sc);
 		error = (*bc->bc_func)(sc, &args);
 		BRIDGE_UNLOCK(sc);
 		if (error)
 			break;
 
 		if (bc->bc_flags & BC_F_COPYOUT)
 			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
 
 		break;
 
 	case SIOCSIFFLAGS:
 		if (!(ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			/*
 			 * If interface is marked down and it is running,
 			 * then stop and disable it.
 			 */
 			BRIDGE_LOCK(sc);
 			bridge_stop(ifp, 1);
 			BRIDGE_UNLOCK(sc);
 		} else if ((ifp->if_flags & IFF_UP) &&
 		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			/*
 			 * If interface is marked up and it is stopped, then
 			 * start it.
 			 */
 			(*ifp->if_init)(sc);
 		}
 		break;
 
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < 576) {
 			error = EINVAL;
 			break;
 		}
 		if (LIST_EMPTY(&sc->sc_iflist)) {
 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
 			break;
 		}
 		BRIDGE_LOCK(sc);
 		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 			if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) {
 				log(LOG_NOTICE, "%s: invalid MTU: %u(%s)"
 				    " != %d\n", sc->sc_ifp->if_xname,
 				    bif->bif_ifp->if_mtu,
 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
 				error = EINVAL;
 				break;
 			}
 		}
 		if (!error)
 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
 		BRIDGE_UNLOCK(sc);
 		break;
 	default:
 		/*
 		 * drop the lock as ether_ioctl() will call bridge_start() and
 		 * cause the lock to be recursed.
 		 */
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * bridge_mutecaps:
  *
  *	Clear or restore unwanted capabilities on the member interface
  */
 static void
 bridge_mutecaps(struct bridge_softc *sc)
 {
 	struct bridge_iflist *bif;
 	int enabled, mask;
 
 	/* Initial bitmask of capabilities to test */
 	mask = BRIDGE_IFCAPS_MASK;
 
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		/* Every member must support it or its disabled */
 		mask &= bif->bif_savedcaps;
 	}
 
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		enabled = bif->bif_ifp->if_capenable;
 		enabled &= ~BRIDGE_IFCAPS_STRIP;
 		/* strip off mask bits and enable them again if allowed */
 		enabled &= ~BRIDGE_IFCAPS_MASK;
 		enabled |= mask;
 		bridge_set_ifcap(sc, bif, enabled);
 	}
 
 }
 
 static void
 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
 {
 	struct ifnet *ifp = bif->bif_ifp;
 	struct ifreq ifr;
 	int error;
 
 	bzero(&ifr, sizeof(ifr));
 	ifr.ifr_reqcap = set;
 
 	if (ifp->if_capenable != set) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
 		if (error)
 			if_printf(sc->sc_ifp,
 			    "error setting interface capabilities on %s\n",
 			    ifp->if_xname);
 	}
 }
 
 /*
  * bridge_lookup_member:
  *
  *	Lookup a bridge member interface.
  */
 static struct bridge_iflist *
 bridge_lookup_member(struct bridge_softc *sc, const char *name)
 {
 	struct bridge_iflist *bif;
 	struct ifnet *ifp;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		ifp = bif->bif_ifp;
 		if (strcmp(ifp->if_xname, name) == 0)
 			return (bif);
 	}
 
 	return (NULL);
 }
 
 /*
  * bridge_lookup_member_if:
  *
  *	Lookup a bridge member interface by ifnet*.
  */
 static struct bridge_iflist *
 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
 {
 	struct bridge_iflist *bif;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (bif->bif_ifp == member_ifp)
 			return (bif);
 	}
 
 	return (NULL);
 }
 
 /*
  * bridge_delete_member:
  *
  *	Delete the specified member interface.
  */
 static void
 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
     int gone)
 {
 	struct ifnet *ifs = bif->bif_ifp;
 	struct ifnet *fif = NULL;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	if (bif->bif_flags & IFBIF_STP)
 		bstp_disable(&bif->bif_stp);
 
 	ifs->if_bridge = NULL;
 	BRIDGE_XLOCK(sc);
 	LIST_REMOVE(bif, bif_next);
 	BRIDGE_XDROP(sc);
 
 	/*
 	 * If removing the interface that gave the bridge its mac address, set
 	 * the mac address of the bridge to the address of the next member, or
 	 * to its default address if no members are left.
 	 */
 	if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
 		if (LIST_EMPTY(&sc->sc_iflist)) {
 			bcopy(sc->sc_defaddr,
 			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 			sc->sc_ifaddr = NULL;
 		} else {
 			fif = LIST_FIRST(&sc->sc_iflist)->bif_ifp;
 			bcopy(IF_LLADDR(fif),
 			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 			sc->sc_ifaddr = fif;
 		}
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	}
 
 	bridge_linkcheck(sc);
 	bridge_mutecaps(sc);	/* recalcuate now this interface is removed */
 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
 	KASSERT(bif->bif_addrcnt == 0,
 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
 
 	BRIDGE_UNLOCK(sc);
 	if (!gone) {
 		switch (ifs->if_type) {
 		case IFT_ETHER:
 		case IFT_L2VLAN:
 			/*
-			 * Take the interface out of promiscuous mode.
+			 * Take the interface out of promiscuous mode, but only
+			 * if it was promiscuous in the first place. It might
+			 * not be if we're in the bridge_ioctl_add() error path.
 			 */
-			(void) ifpromisc(ifs, 0);
+			if (ifs->if_flags & IFF_PROMISC)
+				(void) ifpromisc(ifs, 0);
 			break;
 
 		case IFT_GIF:
 			break;
 
 		default:
 #ifdef DIAGNOSTIC
 			panic("bridge_delete_member: impossible");
 #endif
 			break;
 		}
 		/* reneable any interface capabilities */
 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
 	}
 	bstp_destroy(&bif->bif_stp);	/* prepare to free */
 	BRIDGE_LOCK(sc);
 	free(bif, M_DEVBUF);
 }
 
 /*
  * bridge_delete_span:
  *
  *	Delete the specified span interface.
  */
 static void
 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
 {
 	BRIDGE_LOCK_ASSERT(sc);
 
 	KASSERT(bif->bif_ifp->if_bridge == NULL,
 	    ("%s: not a span interface", __func__));
 
 	LIST_REMOVE(bif, bif_next);
 	free(bif, M_DEVBUF);
 }
 
 static int
 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif = NULL;
 	struct ifnet *ifs;
 	int error = 0;
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
 	if (ifs->if_ioctl == NULL)	/* must be supported */
 		return (EINVAL);
 
 	/* If it's in the span list, it can't be a member. */
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
 			return (EBUSY);
 
 	if (ifs->if_bridge == sc)
 		return (EEXIST);
 
 	if (ifs->if_bridge != NULL)
 		return (EBUSY);
 
 	switch (ifs->if_type) {
 	case IFT_ETHER:
 	case IFT_L2VLAN:
 	case IFT_GIF:
 		/* permitted interface types */
 		break;
 	default:
 		return (EINVAL);
 	}
 
 #ifdef INET6
 	/*
 	 * Two valid inet6 addresses with link-local scope must not be
 	 * on the parent interface and the member interfaces at the
 	 * same time.  This restriction is needed to prevent violation
 	 * of link-local scope zone.  Attempts to add a member
 	 * interface which has inet6 addresses when the parent has
 	 * inet6 triggers removal of all inet6 addresses on the member
 	 * interface.
 	 */
 
 	/* Check if the parent interface has a link-local scope addr. */
 	if (V_allow_llz_overlap == 0 &&
 	    in6ifa_llaonifp(sc->sc_ifp) != NULL) {
 		/*
 		 * If any, remove all inet6 addresses from the member
 		 * interfaces.
 		 */
 		BRIDGE_XLOCK(sc);
 		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
  			if (in6ifa_llaonifp(bif->bif_ifp)) {
 				BRIDGE_UNLOCK(sc);
 				in6_ifdetach(bif->bif_ifp);
 				BRIDGE_LOCK(sc);
 				if_printf(sc->sc_ifp,
 				    "IPv6 addresses on %s have been removed "
 				    "before adding it as a member to prevent "
 				    "IPv6 address scope violation.\n",
 				    bif->bif_ifp->if_xname);
 			}
 		}
 		BRIDGE_XDROP(sc);
 		if (in6ifa_llaonifp(ifs)) {
 			BRIDGE_UNLOCK(sc);
 			in6_ifdetach(ifs);
 			BRIDGE_LOCK(sc);
 			if_printf(sc->sc_ifp,
 			    "IPv6 addresses on %s have been removed "
 			    "before adding it as a member to prevent "
 			    "IPv6 address scope violation.\n",
 			    ifs->if_xname);
 		}
 	}
 #endif
 	/* Allow the first Ethernet member to define the MTU */
 	if (LIST_EMPTY(&sc->sc_iflist))
 		sc->sc_ifp->if_mtu = ifs->if_mtu;
 	else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
 		if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n",
 		    ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
 		return (EINVAL);
 	}
 
 	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (bif == NULL)
 		return (ENOMEM);
 
 	bif->bif_ifp = ifs;
 	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
 	bif->bif_savedcaps = ifs->if_capenable;
 
 	/*
 	 * Assign the interface's MAC address to the bridge if it's the first
 	 * member and the MAC address of the bridge has not been changed from
 	 * the default randomly generated one.
 	 */
 	if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
 	    !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
 		bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 		sc->sc_ifaddr = ifs;
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	}
 
 	ifs->if_bridge = sc;
 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
 	/*
 	 * XXX: XLOCK HERE!?!
 	 *
 	 * NOTE: insert_***HEAD*** should be safe for the traversals.
 	 */
 	LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
 
 	/* Set interface capabilities to the intersection set of all members */
 	bridge_mutecaps(sc);
 	bridge_linkcheck(sc);
 
 	/* Place the interface into promiscuous mode */
 	switch (ifs->if_type) {
 		case IFT_ETHER:
 		case IFT_L2VLAN:
 			BRIDGE_UNLOCK(sc);
 			error = ifpromisc(ifs, 1);
 			BRIDGE_LOCK(sc);
 			break;
 	}
 
-	if (error) {
+	if (error)
 		bridge_delete_member(sc, bif, 0);
-		free(bif, M_DEVBUF);
-	}
 	return (error);
 }
 
 static int
 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	bridge_delete_member(sc, bif, 0);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 	struct bstp_port *bp;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	bp = &bif->bif_stp;
 	req->ifbr_ifsflags = bif->bif_flags;
 	req->ifbr_state = bp->bp_state;
 	req->ifbr_priority = bp->bp_priority;
 	req->ifbr_path_cost = bp->bp_path_cost;
 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
 	req->ifbr_proto = bp->bp_protover;
 	req->ifbr_role = bp->bp_role;
 	req->ifbr_stpflags = bp->bp_flags;
 	req->ifbr_addrcnt = bif->bif_addrcnt;
 	req->ifbr_addrmax = bif->bif_addrmax;
 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
 
 	/* Copy STP state options as flags */
 	if (bp->bp_operedge)
 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE)
 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
 	if (bp->bp_ptp_link)
 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
 	if (bp->bp_flags & BSTP_PORT_AUTOPTP)
 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
 	if (bp->bp_flags & BSTP_PORT_ADMEDGE)
 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
 	if (bp->bp_flags & BSTP_PORT_ADMCOST)
 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
 	return (0);
 }
 
 static int
 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 	struct bstp_port *bp;
 	int error;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 	bp = &bif->bif_stp;
 
 	if (req->ifbr_ifsflags & IFBIF_SPAN)
 		/* SPAN is readonly */
 		return (EINVAL);
 
 	if (req->ifbr_ifsflags & IFBIF_STP) {
 		if ((bif->bif_flags & IFBIF_STP) == 0) {
 			error = bstp_enable(&bif->bif_stp);
 			if (error)
 				return (error);
 		}
 	} else {
 		if ((bif->bif_flags & IFBIF_STP) != 0)
 			bstp_disable(&bif->bif_stp);
 	}
 
 	/* Pass on STP flags */
 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
 
 	/* Save the bits relating to the bridge */
 	bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
 
 	return (0);
 }
 
 static int
 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	sc->sc_brtmax = param->ifbrp_csize;
 	bridge_rttrim(sc);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	param->ifbrp_csize = sc->sc_brtmax;
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
 {
 	struct ifbifconf *bifc = arg;
 	struct bridge_iflist *bif;
 	struct ifbreq breq;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
 
 	count = 0;
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
 		count++;
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		count++;
 
 	buflen = sizeof(breq) * count;
 	if (bifc->ifbic_len == 0) {
 		bifc->ifbic_len = buflen;
 		return (0);
 	}
 	BRIDGE_UNLOCK(sc);
 	outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 	BRIDGE_LOCK(sc);
 
 	count = 0;
 	buf = outbuf;
 	len = min(bifc->ifbic_len, buflen);
 	bzero(&breq, sizeof(breq));
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (len < sizeof(breq))
 			break;
 
 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
 		    sizeof(breq.ifbr_ifsname));
 		/* Fill in the ifbreq structure */
 		error = bridge_ioctl_gifflags(sc, &breq);
 		if (error)
 			break;
 		memcpy(buf, &breq, sizeof(breq));
 		count++;
 		buf += sizeof(breq);
 		len -= sizeof(breq);
 	}
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
 		if (len < sizeof(breq))
 			break;
 
 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
 		    sizeof(breq.ifbr_ifsname));
 		breq.ifbr_ifsflags = bif->bif_flags;
 		breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff;
 		memcpy(buf, &breq, sizeof(breq));
 		count++;
 		buf += sizeof(breq);
 		len -= sizeof(breq);
 	}
 
 	BRIDGE_UNLOCK(sc);
 	bifc->ifbic_len = sizeof(breq) * count;
 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);
 	BRIDGE_LOCK(sc);
 	free(outbuf, M_TEMP);
 	return (error);
 }
 
 static int
 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
 {
 	struct ifbaconf *bac = arg;
 	struct bridge_rtnode *brt;
 	struct ifbareq bareq;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
 
 	if (bac->ifbac_len == 0)
 		return (0);
 
 	count = 0;
 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)
 		count++;
 	buflen = sizeof(bareq) * count;
 
 	BRIDGE_UNLOCK(sc);
 	outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 	BRIDGE_LOCK(sc);
 
 	count = 0;
 	buf = outbuf;
 	len = min(bac->ifbac_len, buflen);
 	bzero(&bareq, sizeof(bareq));
 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
 		if (len < sizeof(bareq))
 			goto out;
 		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
 		    sizeof(bareq.ifba_ifsname));
 		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
 		bareq.ifba_vlan = brt->brt_vlan;
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
 				time_uptime < brt->brt_expire)
 			bareq.ifba_expire = brt->brt_expire - time_uptime;
 		else
 			bareq.ifba_expire = 0;
 		bareq.ifba_flags = brt->brt_flags;
 
 		memcpy(buf, &bareq, sizeof(bareq));
 		count++;
 		buf += sizeof(bareq);
 		len -= sizeof(bareq);
 	}
 out:
 	BRIDGE_UNLOCK(sc);
 	bac->ifbac_len = sizeof(bareq) * count;
 	error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);
 	BRIDGE_LOCK(sc);
 	free(outbuf, M_TEMP);
 	return (error);
 }
 
 static int
 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
 {
 	struct ifbareq *req = arg;
 	struct bridge_iflist *bif;
 	int error;
 
 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
 	    req->ifba_flags);
 
 	return (error);
 }
 
 static int
 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	sc->sc_brttimeout = param->ifbrp_ctime;
 	return (0);
 }
 
 static int
 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	param->ifbrp_ctime = sc->sc_brttimeout;
 	return (0);
 }
 
 static int
 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
 {
 	struct ifbareq *req = arg;
 
 	return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan));
 }
 
 static int
 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 
 	bridge_rtflush(sc, req->ifbr_ifsflags);
 	return (0);
 }
 
 static int
 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_prio = bs->bs_bridge_priority;
 	return (0);
 }
 
 static int
 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
 }
 
 static int
 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
 	return (0);
 }
 
 static int
 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
 }
 
 static int
 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
 	return (0);
 }
 
 static int
 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
 }
 
 static int
 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
 	return (0);
 }
 
 static int
 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
 }
 
 static int
 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
 }
 
 static int
 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
 }
 
 static int
 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	bif->bif_addrmax = req->ifbr_addrmax;
 	return (0);
 }
 
 static int
 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif = NULL;
 	struct ifnet *ifs;
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
 
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
 			return (EBUSY);
 
 	if (ifs->if_bridge != NULL)
 		return (EBUSY);
 
 	switch (ifs->if_type) {
 		case IFT_ETHER:
 		case IFT_GIF:
 		case IFT_L2VLAN:
 			break;
 		default:
 			return (EINVAL);
 	}
 
 	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (bif == NULL)
 		return (ENOMEM);
 
 	bif->bif_ifp = ifs;
 	bif->bif_flags = IFBIF_SPAN;
 
 	LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 	struct ifnet *ifs;
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
 
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
 			break;
 
 	if (bif == NULL)
 		return (ENOENT);
 
 	bridge_delete_span(sc, bif);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg)
 {
 	struct ifbropreq *req = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 	struct bstp_port *root_port;
 
 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;
 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;
 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;
 
 	root_port = bs->bs_root_port;
 	if (root_port == NULL)
 		req->ifbop_root_port = 0;
 	else
 		req->ifbop_root_port = root_port->bp_ifp->if_index;
 
 	req->ifbop_holdcount = bs->bs_txholdcount;
 	req->ifbop_priority = bs->bs_bridge_priority;
 	req->ifbop_protocol = bs->bs_protover;
 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;
 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;
 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;
 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;
 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;
 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;
 
 	return (0);
 }
 
 static int
 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
 	return (0);
 }
 
 static int
 bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg)
 {
 	struct ifbpstpconf *bifstp = arg;
 	struct bridge_iflist *bif;
 	struct bstp_port *bp;
 	struct ifbpstpreq bpreq;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
 
 	count = 0;
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if ((bif->bif_flags & IFBIF_STP) != 0)
 			count++;
 	}
 
 	buflen = sizeof(bpreq) * count;
 	if (bifstp->ifbpstp_len == 0) {
 		bifstp->ifbpstp_len = buflen;
 		return (0);
 	}
 
 	BRIDGE_UNLOCK(sc);
 	outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 	BRIDGE_LOCK(sc);
 
 	count = 0;
 	buf = outbuf;
 	len = min(bifstp->ifbpstp_len, buflen);
 	bzero(&bpreq, sizeof(bpreq));
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (len < sizeof(bpreq))
 			break;
 
 		if ((bif->bif_flags & IFBIF_STP) == 0)
 			continue;
 
 		bp = &bif->bif_stp;
 		bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;
 		bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;
 		bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;
 		bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;
 		bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id;
 		bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;
 
 		memcpy(buf, &bpreq, sizeof(bpreq));
 		count++;
 		buf += sizeof(bpreq);
 		len -= sizeof(bpreq);
 	}
 
 	BRIDGE_UNLOCK(sc);
 	bifstp->ifbpstp_len = sizeof(bpreq) * count;
 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);
 	BRIDGE_LOCK(sc);
 	free(outbuf, M_TEMP);
 	return (error);
 }
 
 static int
 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
 }
 
 static int
 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
 }
 
 /*
  * bridge_ifdetach:
  *
  *	Detach an interface from a bridge.  Called when a member
  *	interface is detaching.
  */
 static void
 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_iflist *bif;
 
 	if (ifp->if_flags & IFF_RENAMING)
 		return;
 	if (V_bridge_cloner == NULL) {
 		/*
 		 * This detach handler can be called after
 		 * vnet_bridge_uninit().  Just return in that case.
 		 */
 		return;
 	}
 	/* Check if the interface is a bridge member */
 	if (sc != NULL) {
 		BRIDGE_LOCK(sc);
 
 		bif = bridge_lookup_member_if(sc, ifp);
 		if (bif != NULL)
 			bridge_delete_member(sc, bif, 1);
 
 		BRIDGE_UNLOCK(sc);
 		return;
 	}
 
 	/* Check if the interface is a span port */
 	BRIDGE_LIST_LOCK();
 	LIST_FOREACH(sc, &V_bridge_list, sc_list) {
 		BRIDGE_LOCK(sc);
 		LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 			if (ifp == bif->bif_ifp) {
 				bridge_delete_span(sc, bif);
 				break;
 			}
 
 		BRIDGE_UNLOCK(sc);
 	}
 	BRIDGE_LIST_UNLOCK();
 }
 
 /*
  * bridge_init:
  *
  *	Initialize a bridge interface.
  */
 static void
 bridge_init(void *xsc)
 {
 	struct bridge_softc *sc = (struct bridge_softc *)xsc;
 	struct ifnet *ifp = sc->sc_ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	BRIDGE_LOCK(sc);
 	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
 	    bridge_timer, sc);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	bstp_init(&sc->sc_stp);		/* Initialize Spanning Tree */
 
 	BRIDGE_UNLOCK(sc);
 }
 
 /*
  * bridge_stop:
  *
  *	Stop the bridge interface.
  */
 static void
 bridge_stop(struct ifnet *ifp, int disable)
 {
 	struct bridge_softc *sc = ifp->if_softc;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	callout_stop(&sc->sc_brcallout);
 	bstp_stop(&sc->sc_stp);
 
 	bridge_rtflush(sc, IFBF_FLUSHDYN);
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 }
 
 /*
  * bridge_enqueue:
  *
  *	Enqueue a packet on a bridge member interface.
  *
  */
 static int
 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 {
 	int len, err = 0;
 	short mflags;
 	struct mbuf *m0;
 
 	/* We may be sending a fragment so traverse the mbuf */
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		len = m->m_pkthdr.len;
 		mflags = m->m_flags;
 
 		/*
 		 * If underlying interface can not do VLAN tag insertion itself
 		 * then attach a packet tag that holds it.
 		 */
 		if ((m->m_flags & M_VLANTAG) &&
 		    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
 			m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
 			if (m == NULL) {
 				if_printf(dst_ifp,
 				    "unable to prepend VLAN header\n");
 				if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 			m->m_flags &= ~M_VLANTAG;
 		}
 
 		if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
 			m_freem(m0);
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 			break;
 		}
 
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
 		if (mflags & M_MCAST)
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1);
 	}
 
 	return (err);
 }
 
 /*
  * bridge_dummynet:
  *
  * 	Receive a queued packet from dummynet and pass it on to the output
  * 	interface.
  *
  *	The mbuf has the Ethernet header already attached.
  */
 static void
 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
 {
 	struct bridge_softc *sc;
 
 	sc = ifp->if_bridge;
 
 	/*
 	 * The packet didnt originate from a member interface. This should only
 	 * ever happen if a member interface is removed while packets are
 	 * queued for it.
 	 */
 	if (sc == NULL) {
 		m_freem(m);
 		return;
 	}
 
 	if (PFIL_HOOKED(&V_inet_pfil_hook)
 #ifdef INET6
 	    || PFIL_HOOKED(&V_inet6_pfil_hook)
 #endif
 	    ) {
 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
 			return;
 		if (m == NULL)
 			return;
 	}
 
 	bridge_enqueue(sc, ifp, m);
 }
 
 /*
  * bridge_output:
  *
  *	Send output from a bridge member interface.  This
  *	performs the bridging function for locally originated
  *	packets.
  *
  *	The mbuf has the Ethernet header already attached.  We must
  *	enqueue or free the mbuf before returning.
  */
 static int
 bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
     struct rtentry *rt)
 {
 	struct ether_header *eh;
 	struct ifnet *dst_if;
 	struct bridge_softc *sc;
 	uint16_t vlan;
 
 	if (m->m_len < ETHER_HDR_LEN) {
 		m = m_pullup(m, ETHER_HDR_LEN);
 		if (m == NULL)
 			return (0);
 	}
 
 	eh = mtod(m, struct ether_header *);
 	sc = ifp->if_bridge;
 	vlan = VLANTAGOF(m);
 
 	BRIDGE_LOCK(sc);
 
 	/*
 	 * If bridge is down, but the original output interface is up,
 	 * go ahead and send out that interface.  Otherwise, the packet
 	 * is dropped below.
 	 */
 	if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		dst_if = ifp;
 		goto sendunicast;
 	}
 
 	/*
 	 * If the packet is a multicast, or we don't know a better way to
 	 * get there, send to all interfaces.
 	 */
 	if (ETHER_IS_MULTICAST(eh->ether_dhost))
 		dst_if = NULL;
 	else
 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
 	if (dst_if == NULL) {
 		struct bridge_iflist *bif;
 		struct mbuf *mc;
 		int error = 0, used = 0;
 
 		bridge_span(sc, m);
 
 		BRIDGE_LOCK2REF(sc, error);
 		if (error) {
 			m_freem(m);
 			return (0);
 		}
 
 		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 			dst_if = bif->bif_ifp;
 
 			if (dst_if->if_type == IFT_GIF)
 				continue;
 			if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				continue;
 
 			/*
 			 * If this is not the original output interface,
 			 * and the interface is participating in spanning
 			 * tree, make sure the port is in a state that
 			 * allows forwarding.
 			 */
 			if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) &&
 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 				continue;
 
 			if (LIST_NEXT(bif, bif_next) == NULL) {
 				used = 1;
 				mc = m;
 			} else {
 				mc = m_copypacket(m, M_NOWAIT);
 				if (mc == NULL) {
 					if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 					continue;
 				}
 			}
 
 			bridge_enqueue(sc, dst_if, mc);
 		}
 		if (used == 0)
 			m_freem(m);
 		BRIDGE_UNREF(sc);
 		return (0);
 	}
 
 sendunicast:
 	/*
 	 * XXX Spanning tree consideration here?
 	 */
 
 	bridge_span(sc, m);
 	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		m_freem(m);
 		BRIDGE_UNLOCK(sc);
 		return (0);
 	}
 
 	BRIDGE_UNLOCK(sc);
 	bridge_enqueue(sc, dst_if, m);
 	return (0);
 }
 
 /*
  * bridge_transmit:
  *
  *	Do output on a bridge.
  *
  */
 static int
 bridge_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct bridge_softc *sc;
 	struct ether_header *eh;
 	struct ifnet *dst_if;
 	int error = 0;
 
 	sc = ifp->if_softc;
 
 	ETHER_BPF_MTAP(ifp, m);
 
 	eh = mtod(m, struct ether_header *);
 
 	BRIDGE_LOCK(sc);
 	if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
 	    (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) {
 		BRIDGE_UNLOCK(sc);
 		error = bridge_enqueue(sc, dst_if, m);
 	} else
 		bridge_broadcast(sc, ifp, m, 0);
 
 	return (error);
 }
 
 /*
  * The ifp->if_qflush entry point for if_bridge(4) is no-op.
  */
 static void
 bridge_qflush(struct ifnet *ifp __unused)
 {
 }
 
 /*
  * bridge_forward:
  *
  *	The forwarding function of the bridge.
  *
  *	NOTE: Releases the lock on return.
  */
 static void
 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
     struct mbuf *m)
 {
 	struct bridge_iflist *dbif;
 	struct ifnet *src_if, *dst_if, *ifp;
 	struct ether_header *eh;
 	uint16_t vlan;
 	uint8_t *dst;
 	int error;
 
 	src_if = m->m_pkthdr.rcvif;
 	ifp = sc->sc_ifp;
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	vlan = VLANTAGOF(m);
 
 	if ((sbif->bif_flags & IFBIF_STP) &&
 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 		goto drop;
 
 	eh = mtod(m, struct ether_header *);
 	dst = eh->ether_dhost;
 
 	/* If the interface is learning, record the address. */
 	if (sbif->bif_flags & IFBIF_LEARNING) {
 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
 		    sbif, 0, IFBAF_DYNAMIC);
 		/*
 		 * If the interface has addresses limits then deny any source
 		 * that is not in the cache.
 		 */
 		if (error && sbif->bif_addrmax)
 			goto drop;
 	}
 
 	if ((sbif->bif_flags & IFBIF_STP) != 0 &&
 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING)
 		goto drop;
 
 	/*
 	 * At this point, the port either doesn't participate
 	 * in spanning tree or it is in the forwarding state.
 	 */
 
 	/*
 	 * If the packet is unicast, destined for someone on
 	 * "this" side of the bridge, drop it.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
 		dst_if = bridge_rtlookup(sc, dst, vlan);
 		if (src_if == dst_if)
 			goto drop;
 	} else {
 		/*
 		 * Check if its a reserved multicast address, any address
 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
 		 * bridge.
 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
 		 */
 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
 		    dst[4] == 0x00 && dst[5] <= 0x0f)
 			goto drop;
 
 		/* ...forward it to all interfaces. */
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 		dst_if = NULL;
 	}
 
 	/*
 	 * If we have a destination interface which is a member of our bridge,
 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
 	 * For broadcast or multicast packets, don't bother because it will
 	 * be reinjected into ether_input. We do this before we pass the packets
 	 * through the pfil(9) framework, as it is possible that pfil(9) will
 	 * drop the packet, or possibly modify it, making it difficult to debug
 	 * firewall issues on the bridge.
 	 */
 	if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0)
 		ETHER_BPF_MTAP(ifp, m);
 
 	/* run the packet filter */
 	if (PFIL_HOOKED(&V_inet_pfil_hook)
 #ifdef INET6
 	    || PFIL_HOOKED(&V_inet6_pfil_hook)
 #endif
 	    ) {
 		BRIDGE_UNLOCK(sc);
 		if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
 			return;
 		if (m == NULL)
 			return;
 		BRIDGE_LOCK(sc);
 	}
 
 	if (dst_if == NULL) {
 		bridge_broadcast(sc, src_if, m, 1);
 		return;
 	}
 
 	/*
 	 * At this point, we're dealing with a unicast frame
 	 * going to a different interface.
 	 */
 	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		goto drop;
 
 	dbif = bridge_lookup_member_if(sc, dst_if);
 	if (dbif == NULL)
 		/* Not a member of the bridge (anymore?) */
 		goto drop;
 
 	/* Private segments can not talk to each other */
 	if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)
 		goto drop;
 
 	if ((dbif->bif_flags & IFBIF_STP) &&
 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 		goto drop;
 
 	BRIDGE_UNLOCK(sc);
 
 	if (PFIL_HOOKED(&V_inet_pfil_hook)
 #ifdef INET6
 	    || PFIL_HOOKED(&V_inet6_pfil_hook)
 #endif
 	    ) {
 		if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
 			return;
 		if (m == NULL)
 			return;
 	}
 
 	bridge_enqueue(sc, dst_if, m);
 	return;
 
 drop:
 	BRIDGE_UNLOCK(sc);
 	m_freem(m);
 }
 
 /*
  * bridge_input:
  *
  *	Receive input from a member interface.  Queue the packet for
  *	bridging if it is not for us.
  */
 static struct mbuf *
 bridge_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_iflist *bif, *bif2;
 	struct ifnet *bifp;
 	struct ether_header *eh;
 	struct mbuf *mc, *mc2;
 	uint16_t vlan;
 	int error;
 
 	if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return (m);
 
 	bifp = sc->sc_ifp;
 	vlan = VLANTAGOF(m);
 
 	/*
 	 * Implement support for bridge monitoring. If this flag has been
 	 * set on this interface, discard the packet once we push it through
 	 * the bpf(4) machinery, but before we do, increment the byte and
 	 * packet counters associated with this interface.
 	 */
 	if ((bifp->if_flags & IFF_MONITOR) != 0) {
 		m->m_pkthdr.rcvif  = bifp;
 		ETHER_BPF_MTAP(bifp, m);
 		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 		m_freem(m);
 		return (NULL);
 	}
 	BRIDGE_LOCK(sc);
 	bif = bridge_lookup_member_if(sc, ifp);
 	if (bif == NULL) {
 		BRIDGE_UNLOCK(sc);
 		return (m);
 	}
 
 	eh = mtod(m, struct ether_header *);
 
 	bridge_span(sc, m);
 
 	if (m->m_flags & (M_BCAST|M_MCAST)) {
 		/* Tap off 802.1D packets; they do not get forwarded. */
 		if (memcmp(eh->ether_dhost, bstp_etheraddr,
 		    ETHER_ADDR_LEN) == 0) {
 			bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */
 			BRIDGE_UNLOCK(sc);
 			return (NULL);
 		}
 
 		if ((bif->bif_flags & IFBIF_STP) &&
 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
 			BRIDGE_UNLOCK(sc);
 			return (m);
 		}
 
 		/*
 		 * Make a deep copy of the packet and enqueue the copy
 		 * for bridge processing; return the original packet for
 		 * local processing.
 		 */
 		mc = m_dup(m, M_NOWAIT);
 		if (mc == NULL) {
 			BRIDGE_UNLOCK(sc);
 			return (m);
 		}
 
 		/* Perform the bridge forwarding function with the copy. */
 		bridge_forward(sc, bif, mc);
 
 		/*
 		 * Reinject the mbuf as arriving on the bridge so we have a
 		 * chance at claiming multicast packets. We can not loop back
 		 * here from ether_input as a bridge is never a member of a
 		 * bridge.
 		 */
 		KASSERT(bifp->if_bridge == NULL,
 		    ("loop created in bridge_input"));
 		mc2 = m_dup(m, M_NOWAIT);
 		if (mc2 != NULL) {
 			/* Keep the layer3 header aligned */
 			int i = min(mc2->m_pkthdr.len, max_protohdr);
 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
 		}
 		if (mc2 != NULL) {
 			mc2->m_pkthdr.rcvif = bifp;
 			(*bifp->if_input)(bifp, mc2);
 		}
 
 		/* Return the original packet for local processing. */
 		return (m);
 	}
 
 	if ((bif->bif_flags & IFBIF_STP) &&
 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
 		BRIDGE_UNLOCK(sc);
 		return (m);
 	}
 
 #if (defined(INET) || defined(INET6))
 #   define OR_CARP_CHECK_WE_ARE_DST(iface) \
 	|| ((iface)->if_carp \
 	    && (*carp_forus_p)((iface), eh->ether_dhost))
 #   define OR_CARP_CHECK_WE_ARE_SRC(iface) \
 	|| ((iface)->if_carp \
 	    && (*carp_forus_p)((iface), eh->ether_shost))
 #else
 #   define OR_CARP_CHECK_WE_ARE_DST(iface)
 #   define OR_CARP_CHECK_WE_ARE_SRC(iface)
 #endif
 
 #ifdef INET6
 #   define OR_PFIL_HOOKED_INET6 \
 	|| PFIL_HOOKED(&V_inet6_pfil_hook)
 #else
 #   define OR_PFIL_HOOKED_INET6
 #endif
 
 #define GRAB_OUR_PACKETS(iface) \
 	if ((iface)->if_type == IFT_GIF) \
 		continue; \
 	/* It is destined for us. */ \
 	if (memcmp(IF_LLADDR((iface)), eh->ether_dhost,  ETHER_ADDR_LEN) == 0 \
 	    OR_CARP_CHECK_WE_ARE_DST((iface))				\
 	    ) {								\
 		if ((iface)->if_type == IFT_BRIDGE) {			\
 			ETHER_BPF_MTAP(iface, m);			\
 			if_inc_counter(iface, IFCOUNTER_IPACKETS, 1);				\
 			if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len);		\
 			/* Filter on the physical interface. */		\
 			if (V_pfil_local_phys &&			\
 			    (PFIL_HOOKED(&V_inet_pfil_hook)		\
 			     OR_PFIL_HOOKED_INET6)) {			\
 				if (bridge_pfil(&m, NULL, ifp,		\
 				    PFIL_IN) != 0 || m == NULL) {	\
 					BRIDGE_UNLOCK(sc);		\
 					return (NULL);			\
 				}					\
 				eh = mtod(m, struct ether_header *);	\
 			}						\
 		}							\
 		if (bif->bif_flags & IFBIF_LEARNING) {			\
 			error = bridge_rtupdate(sc, eh->ether_shost,	\
 			    vlan, bif, 0, IFBAF_DYNAMIC);		\
 			if (error && bif->bif_addrmax) {		\
 				BRIDGE_UNLOCK(sc);			\
 				m_freem(m);				\
 				return (NULL);				\
 			}						\
 		}							\
 		m->m_pkthdr.rcvif = iface;				\
 		BRIDGE_UNLOCK(sc);					\
 		return (m);						\
 	}								\
 									\
 	/* We just received a packet that we sent out. */		\
 	if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \
 	    OR_CARP_CHECK_WE_ARE_SRC((iface))			\
 	    ) {								\
 		BRIDGE_UNLOCK(sc);					\
 		m_freem(m);						\
 		return (NULL);						\
 	}
 
 	/*
 	 * Unicast.  Make sure it's not for the bridge.
 	 */
 	do { GRAB_OUR_PACKETS(bifp) } while (0);
 
 	/*
 	 * Give a chance for ifp at first priority. This will help when	the
 	 * packet comes through the interface like VLAN's with the same MACs
 	 * on several interfaces from the same bridge. This also will save
 	 * some CPU cycles in case the destination interface and the input
 	 * interface (eq ifp) are the same.
 	 */
 	do { GRAB_OUR_PACKETS(ifp) } while (0);
 
 	/* Now check the all bridge members. */
 	LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
 		GRAB_OUR_PACKETS(bif2->bif_ifp)
 	}
 
 #undef OR_CARP_CHECK_WE_ARE_DST
 #undef OR_CARP_CHECK_WE_ARE_SRC
 #undef OR_PFIL_HOOKED_INET6
 #undef GRAB_OUR_PACKETS
 
 	/* Perform the bridge forwarding function. */
 	bridge_forward(sc, bif, m);
 
 	return (NULL);
 }
 
 /*
  * bridge_broadcast:
  *
  *	Send a frame to all interfaces that are members of
  *	the bridge, except for the one on which the packet
  *	arrived.
  *
  *	NOTE: Releases the lock on return.
  */
 static void
 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
     struct mbuf *m, int runfilt)
 {
 	struct bridge_iflist *dbif, *sbif;
 	struct mbuf *mc;
 	struct ifnet *dst_if;
 	int error = 0, used = 0, i;
 
 	sbif = bridge_lookup_member_if(sc, src_if);
 
 	BRIDGE_LOCK2REF(sc, error);
 	if (error) {
 		m_freem(m);
 		return;
 	}
 
 	/* Filter on the bridge interface before broadcasting */
 	if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
 #ifdef INET6
 	    || PFIL_HOOKED(&V_inet6_pfil_hook)
 #endif
 	    )) {
 		if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
 			goto out;
 		if (m == NULL)
 			goto out;
 	}
 
 	LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) {
 		dst_if = dbif->bif_ifp;
 		if (dst_if == src_if)
 			continue;
 
 		/* Private segments can not talk to each other */
 		if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
 			continue;
 
 		if ((dbif->bif_flags & IFBIF_STP) &&
 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 			continue;
 
 		if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 &&
 		    (m->m_flags & (M_BCAST|M_MCAST)) == 0)
 			continue;
 
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
 		if (LIST_NEXT(dbif, bif_next) == NULL) {
 			mc = m;
 			used = 1;
 		} else {
 			mc = m_dup(m, M_NOWAIT);
 			if (mc == NULL) {
 				if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		}
 
 		/*
 		 * Filter on the output interface. Pass a NULL bridge interface
 		 * pointer so we do not redundantly filter on the bridge for
 		 * each interface we broadcast on.
 		 */
 		if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
 #ifdef INET6
 		    || PFIL_HOOKED(&V_inet6_pfil_hook)
 #endif
 		    )) {
 			if (used == 0) {
 				/* Keep the layer3 header aligned */
 				i = min(mc->m_pkthdr.len, max_protohdr);
 				mc = m_copyup(mc, i, ETHER_ALIGN);
 				if (mc == NULL) {
 					if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 					continue;
 				}
 			}
 			if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
 				continue;
 			if (mc == NULL)
 				continue;
 		}
 
 		bridge_enqueue(sc, dst_if, mc);
 	}
 	if (used == 0)
 		m_freem(m);
 
 out:
 	BRIDGE_UNREF(sc);
 }
 
 /*
  * bridge_span:
  *
  *	Duplicate a packet out one or more interfaces that are in span mode,
  *	the original mbuf is unmodified.
  */
 static void
 bridge_span(struct bridge_softc *sc, struct mbuf *m)
 {
 	struct bridge_iflist *bif;
 	struct ifnet *dst_if;
 	struct mbuf *mc;
 
 	if (LIST_EMPTY(&sc->sc_spanlist))
 		return;
 
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
 		dst_if = bif->bif_ifp;
 
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
 		mc = m_copypacket(m, M_NOWAIT);
 		if (mc == NULL) {
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 			continue;
 		}
 
 		bridge_enqueue(sc, dst_if, mc);
 	}
 }
 
 /*
  * bridge_rtupdate:
  *
  *	Add a bridge routing entry.
  */
 static int
 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
     struct bridge_iflist *bif, int setflags, uint8_t flags)
 {
 	struct bridge_rtnode *brt;
 	int error;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	/* Check the source address is valid and not multicast. */
 	if (ETHER_IS_MULTICAST(dst) ||
 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
 	     dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
 		return (EINVAL);
 
 	/* 802.1p frames map to vlan 1 */
 	if (vlan == 0)
 		vlan = 1;
 
 	/*
 	 * A route for this destination might already exist.  If so,
 	 * update it, otherwise create a new one.
 	 */
 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
 			sc->sc_brtexceeded++;
 			return (ENOSPC);
 		}
 		/* Check per interface address limits (if enabled) */
 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
 			bif->bif_addrexceeded++;
 			return (ENOSPC);
 		}
 
 		/*
 		 * Allocate a new bridge forwarding node, and
 		 * initialize the expiration time and Ethernet
 		 * address.
 		 */
 		brt = uma_zalloc(bridge_rtnode_zone, M_NOWAIT | M_ZERO);
 		if (brt == NULL)
 			return (ENOMEM);
 
 		if (bif->bif_flags & IFBIF_STICKY)
 			brt->brt_flags = IFBAF_STICKY;
 		else
 			brt->brt_flags = IFBAF_DYNAMIC;
 
 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
 		brt->brt_vlan = vlan;
 
 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
 			uma_zfree(bridge_rtnode_zone, brt);
 			return (error);
 		}
 		brt->brt_dst = bif;
 		bif->bif_addrcnt++;
 	}
 
 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
 	    brt->brt_dst != bif) {
 		brt->brt_dst->bif_addrcnt--;
 		brt->brt_dst = bif;
 		brt->brt_dst->bif_addrcnt++;
 	}
 
 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
 		brt->brt_expire = time_uptime + sc->sc_brttimeout;
 	if (setflags)
 		brt->brt_flags = flags;
 
 	return (0);
 }
 
 /*
  * bridge_rtlookup:
  *
  *	Lookup the destination interface for an address.
  */
 static struct ifnet *
 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL)
 		return (NULL);
 
 	return (brt->brt_ifp);
 }
 
 /*
  * bridge_rttrim:
  *
  *	Trim the routine table so that we have a number
  *	of routing entries less than or equal to the
  *	maximum number.
  */
 static void
 bridge_rttrim(struct bridge_softc *sc)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	/* Make sure we actually need to do this. */
 	if (sc->sc_brtcnt <= sc->sc_brtmax)
 		return;
 
 	/* Force an aging cycle; this might trim enough addresses. */
 	bridge_rtage(sc);
 	if (sc->sc_brtcnt <= sc->sc_brtmax)
 		return;
 
 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
 			bridge_rtnode_destroy(sc, brt);
 			if (sc->sc_brtcnt <= sc->sc_brtmax)
 				return;
 		}
 	}
 }
 
 /*
  * bridge_timer:
  *
  *	Aging timer for the bridge.
  */
 static void
 bridge_timer(void *arg)
 {
 	struct bridge_softc *sc = arg;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	bridge_rtage(sc);
 
 	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		callout_reset(&sc->sc_brcallout,
 		    bridge_rtable_prune_period * hz, bridge_timer, sc);
 }
 
 /*
  * bridge_rtage:
  *
  *	Perform an aging cycle.
  */
 static void
 bridge_rtage(struct bridge_softc *sc)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
 			if (time_uptime >= brt->brt_expire)
 				bridge_rtnode_destroy(sc, brt);
 		}
 	}
 }
 
 /*
  * bridge_rtflush:
  *
  *	Remove all dynamic addresses from the bridge.
  */
 static void
 bridge_rtflush(struct bridge_softc *sc, int full)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
 			bridge_rtnode_destroy(sc, brt);
 	}
 }
 
 /*
  * bridge_rtdaddr:
  *
  *	Remove an address from the table.
  */
 static int
 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 	int found = 0;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	/*
 	 * If vlan is zero then we want to delete for all vlans so the lookup
 	 * may return more than one.
 	 */
 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
 		bridge_rtnode_destroy(sc, brt);
 		found = 1;
 	}
 
 	return (found ? 0 : ENOENT);
 }
 
 /*
  * bridge_rtdelete:
  *
  *	Delete routes to a speicifc member interface.
  */
 static void
 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if (brt->brt_ifp == ifp && (full ||
 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
 			bridge_rtnode_destroy(sc, brt);
 	}
 }
 
 /*
  * bridge_rtable_init:
  *
  *	Initialize the route table for this bridge.
  */
 static void
 bridge_rtable_init(struct bridge_softc *sc)
 {
 	int i;
 
 	sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
 	    M_DEVBUF, M_WAITOK);
 
 	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
 		LIST_INIT(&sc->sc_rthash[i]);
 
 	sc->sc_rthash_key = arc4random();
 	LIST_INIT(&sc->sc_rtlist);
 }
 
 /*
  * bridge_rtable_fini:
  *
  *	Deconstruct the route table for this bridge.
  */
 static void
 bridge_rtable_fini(struct bridge_softc *sc)
 {
 
 	KASSERT(sc->sc_brtcnt == 0,
 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
 	free(sc->sc_rthash, M_DEVBUF);
 }
 
 /*
  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
  */
 #define	mix(a, b, c)							\
 do {									\
 	a -= b; a -= c; a ^= (c >> 13);					\
 	b -= c; b -= a; b ^= (a << 8);					\
 	c -= a; c -= b; c ^= (b >> 13);					\
 	a -= b; a -= c; a ^= (c >> 12);					\
 	b -= c; b -= a; b ^= (a << 16);					\
 	c -= a; c -= b; c ^= (b >> 5);					\
 	a -= b; a -= c; a ^= (c >> 3);					\
 	b -= c; b -= a; b ^= (a << 10);					\
 	c -= a; c -= b; c ^= (b >> 15);					\
 } while (/*CONSTCOND*/0)
 
 static __inline uint32_t
 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
 {
 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
 
 	b += addr[5] << 8;
 	b += addr[4];
 	a += addr[3] << 24;
 	a += addr[2] << 16;
 	a += addr[1] << 8;
 	a += addr[0];
 
 	mix(a, b, c);
 
 	return (c & BRIDGE_RTHASH_MASK);
 }
 
 #undef mix
 
 static int
 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
 {
 	int i, d;
 
 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
 		d = ((int)a[i]) - ((int)b[i]);
 	}
 
 	return (d);
 }
 
 /*
  * bridge_rtnode_lookup:
  *
  *	Look up a bridge route node for the specified destination. Compare the
  *	vlan id or if zero then just return the first match.
  */
 static struct bridge_rtnode *
 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 	uint32_t hash;
 	int dir;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	hash = bridge_rthash(sc, addr);
 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0))
 			return (brt);
 		if (dir > 0)
 			return (NULL);
 	}
 
 	return (NULL);
 }
 
 /*
  * bridge_rtnode_insert:
  *
  *	Insert the specified bridge node into the route table.  We
  *	assume the entry is not already in the table.
  */
 static int
 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
 {
 	struct bridge_rtnode *lbrt;
 	uint32_t hash;
 	int dir;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	hash = bridge_rthash(sc, brt->brt_addr);
 
 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
 	if (lbrt == NULL) {
 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
 		goto out;
 	}
 
 	do {
 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
 			return (EEXIST);
 		if (dir > 0) {
 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
 			goto out;
 		}
 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
 			goto out;
 		}
 		lbrt = LIST_NEXT(lbrt, brt_hash);
 	} while (lbrt != NULL);
 
 #ifdef DIAGNOSTIC
 	panic("bridge_rtnode_insert: impossible");
 #endif
 
 out:
 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
 	sc->sc_brtcnt++;
 
 	return (0);
 }
 
 /*
  * bridge_rtnode_destroy:
  *
  *	Destroy a bridge rtnode.
  */
 static void
 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
 {
 	BRIDGE_LOCK_ASSERT(sc);
 
 	LIST_REMOVE(brt, brt_hash);
 
 	LIST_REMOVE(brt, brt_list);
 	sc->sc_brtcnt--;
 	brt->brt_dst->bif_addrcnt--;
 	uma_zfree(bridge_rtnode_zone, brt);
 }
 
 /*
  * bridge_rtable_expire:
  *
  *	Set the expiry time for all routes on an interface.
  */
 static void
 bridge_rtable_expire(struct ifnet *ifp, int age)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_rtnode *brt;
 
 	BRIDGE_LOCK(sc);
 
 	/*
 	 * If the age is zero then flush, otherwise set all the expiry times to
 	 * age for the interface
 	 */
 	if (age == 0)
 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
 	else {
 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
 			/* Cap the expiry time to 'age' */
 			if (brt->brt_ifp == ifp &&
 			    brt->brt_expire > time_uptime + age &&
 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
 				brt->brt_expire = time_uptime + age;
 		}
 	}
 	BRIDGE_UNLOCK(sc);
 }
 
 /*
  * bridge_state_change:
  *
  *	Callback from the bridgestp code when a port changes states.
  */
 static void
 bridge_state_change(struct ifnet *ifp, int state)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	static const char *stpstates[] = {
 		"disabled",
 		"listening",
 		"learning",
 		"forwarding",
 		"blocking",
 		"discarding"
 	};
 
 	CURVNET_SET(ifp->if_vnet);
 	if (V_log_stp)
 		log(LOG_NOTICE, "%s: state changed to %s on %s\n",
 		    sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
 	CURVNET_RESTORE();
 }
 
 /*
  * Send bridge packets through pfil if they are one of the types pfil can deal
  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
  * that interface.
  */
 static int
 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 {
 	int snap, error, i, hlen;
 	struct ether_header *eh1, eh2;
 	struct ip *ip;
 	struct llc llc1;
 	u_int16_t ether_type;
 
 	snap = 0;
 	error = -1;	/* Default error if not error == 0 */
 
 #if 0
 	/* we may return with the IP fields swapped, ensure its not shared */
 	KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
 #endif
 
 	if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0)
 		return (0); /* filtering is disabled */
 
 	i = min((*mp)->m_pkthdr.len, max_protohdr);
 	if ((*mp)->m_len < i) {
 	    *mp = m_pullup(*mp, i);
 	    if (*mp == NULL) {
 		printf("%s: m_pullup failed\n", __func__);
 		return (-1);
 	    }
 	}
 
 	eh1 = mtod(*mp, struct ether_header *);
 	ether_type = ntohs(eh1->ether_type);
 
 	/*
 	 * Check for SNAP/LLC.
 	 */
 	if (ether_type < ETHERMTU) {
 		struct llc *llc2 = (struct llc *)(eh1 + 1);
 
 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
 		    llc2->llc_control == LLC_UI) {
 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
 			snap = 1;
 		}
 	}
 
 	/*
 	 * If we're trying to filter bridge traffic, don't look at anything
 	 * other than IP and ARP traffic.  If the filter doesn't understand
 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
 	 * but of course we don't have an AppleTalk filter to begin with.
 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
 	 * ARP traffic.)
 	 */
 	switch (ether_type) {
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			if (V_pfil_ipfw_arp == 0)
 				return (0); /* Automatically pass */
 			break;
 
 		case ETHERTYPE_IP:
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 #endif /* INET6 */
 			break;
 		default:
 			/*
 			 * Check to see if the user wants to pass non-ip
 			 * packets, these will not be checked by pfil(9) and
 			 * passed unconditionally so the default is to drop.
 			 */
 			if (V_pfil_onlyip)
 				goto bad;
 	}
 
 	/* Run the packet through pfil before stripping link headers */
 	if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 &&
 			dir == PFIL_OUT && ifp != NULL) {
 
 		error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL);
 
 		if (*mp == NULL || error != 0) /* packet consumed by filter */
 			return (error);
 	}
 
 	/* Strip off the Ethernet header and keep a copy. */
 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
 	m_adj(*mp, ETHER_HDR_LEN);
 
 	/* Strip off snap header, if present */
 	if (snap) {
 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
 		m_adj(*mp, sizeof(struct llc));
 	}
 
 	/*
 	 * Check the IP header for alignment and errors
 	 */
 	if (dir == PFIL_IN) {
 		switch (ether_type) {
 			case ETHERTYPE_IP:
 				error = bridge_ip_checkbasic(mp);
 				break;
 #ifdef INET6
 			case ETHERTYPE_IPV6:
 				error = bridge_ip6_checkbasic(mp);
 				break;
 #endif /* INET6 */
 			default:
 				error = 0;
 		}
 		if (error)
 			goto bad;
 	}
 
 	error = 0;
 
 	/*
 	 * Run the packet through pfil
 	 */
 	switch (ether_type) {
 	case ETHERTYPE_IP:
 		/*
 		 * Run pfil on the member interface and the bridge, both can
 		 * be skipped by clearing pfil_member or pfil_bridge.
 		 *
 		 * Keep the order:
 		 *   in_if -> bridge_if -> out_if
 		 */
 		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
 			error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
 		if (V_pfil_member && ifp != NULL)
 			error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
 		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
 			error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
 		/* check if we need to fragment the packet */
 		if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) {
 			i = (*mp)->m_pkthdr.len;
 			if (i > ifp->if_mtu) {
 				error = bridge_fragment(ifp, *mp, &eh2, snap,
 					    &llc1);
 				return (error);
 			}
 		}
 
 		/* Recalculate the ip checksum. */
 		ip = mtod(*mp, struct ip *);
 		hlen = ip->ip_hl << 2;
 		if (hlen < sizeof(struct ip))
 			goto bad;
 		if (hlen > (*mp)->m_len) {
 			if ((*mp = m_pullup(*mp, hlen)) == 0)
 				goto bad;
 			ip = mtod(*mp, struct ip *);
 			if (ip == NULL)
 				goto bad;
 		}
 		ip->ip_sum = 0;
 		if (hlen == sizeof(struct ip))
 			ip->ip_sum = in_cksum_hdr(ip);
 		else
 			ip->ip_sum = in_cksum(*mp, hlen);
 
 		break;
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
 			error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
 		if (V_pfil_member && ifp != NULL)
 			error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp,
 					dir, NULL);
 
 		if (*mp == NULL || error != 0) /* filter may consume */
 			break;
 
 		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
 			error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
 					dir, NULL);
 		break;
 #endif
 	default:
 		error = 0;
 		break;
 	}
 
 	if (*mp == NULL)
 		return (error);
 	if (error != 0)
 		goto bad;
 
 	error = -1;
 
 	/*
 	 * Finally, put everything back the way it was and return
 	 */
 	if (snap) {
 		M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
 		if (*mp == NULL)
 			return (error);
 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
 	}
 
 	M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
 	if (*mp == NULL)
 		return (error);
 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
 
 	return (0);
 
 bad:
 	m_freem(*mp);
 	*mp = NULL;
 	return (error);
 }
 
 /*
  * Perform basic checks on header size since
  * pfil assumes ip_input has already processed
  * it for it.  Cut-and-pasted from ip_input.c.
  * Given how simple the IPv6 version is,
  * does the IPv4 version really need to be
  * this complicated?
  *
  * XXX Should we update ipstat here, or not?
  * XXX Right now we update ipstat but not
  * XXX csum_counter.
  */
 static int
 bridge_ip_checkbasic(struct mbuf **mp)
 {
 	struct mbuf *m = *mp;
 	struct ip *ip;
 	int len, hlen;
 	u_short sum;
 
 	if (*mp == NULL)
 		return (-1);
 
 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
 		if ((m = m_copyup(m, sizeof(struct ip),
 			(max_linkhdr + 3) & ~3)) == NULL) {
 			/* XXXJRT new stat, please */
 			KMOD_IPSTAT_INC(ips_toosmall);
 			goto bad;
 		}
 	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
 		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
 			KMOD_IPSTAT_INC(ips_toosmall);
 			goto bad;
 		}
 	}
 	ip = mtod(m, struct ip *);
 	if (ip == NULL) goto bad;
 
 	if (ip->ip_v != IPVERSION) {
 		KMOD_IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) { /* minimum header length */
 		KMOD_IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == 0) {
 			KMOD_IPSTAT_INC(ips_badhlen);
 			goto bad;
 		}
 		ip = mtod(m, struct ip *);
 		if (ip == NULL) goto bad;
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		KMOD_IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 	/* Retrieve the packet length. */
 	len = ntohs(ip->ip_len);
 
 	/*
 	 * Check for additional length bogosity
 	 */
 	if (len < hlen) {
 		KMOD_IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < len) {
 		KMOD_IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 
 	/* Checks out, proceed */
 	*mp = m;
 	return (0);
 
 bad:
 	*mp = m;
 	return (-1);
 }
 
 #ifdef INET6
 /*
  * Same as above, but for IPv6.
  * Cut-and-pasted from ip6_input.c.
  * XXX Should we update ip6stat, or not?
  */
 static int
 bridge_ip6_checkbasic(struct mbuf **mp)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6;
 
 	/*
 	 * If the IPv6 header is not aligned, slurp it up into a new
 	 * mbuf with space for link headers, in the event we forward
 	 * it.  Otherwise, if it is aligned, make sure the entire base
 	 * IPv6 header is in the first mbuf of the chain.
 	 */
 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
 		struct ifnet *inifp = m->m_pkthdr.rcvif;
 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
 			    (max_linkhdr + 3) & ~3)) == NULL) {
 			/* XXXJRT new stat, please */
 			IP6STAT_INC(ip6s_toosmall);
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
 			goto bad;
 		}
 	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
 		struct ifnet *inifp = m->m_pkthdr.rcvif;
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
 			IP6STAT_INC(ip6s_toosmall);
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
 			goto bad;
 		}
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		IP6STAT_INC(ip6s_badvers);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 		goto bad;
 	}
 
 	/* Checks out, proceed */
 	*mp = m;
 	return (0);
 
 bad:
 	*mp = m;
 	return (-1);
 }
 #endif /* INET6 */
 
 /*
  * bridge_fragment:
  *
  *	Return a fragmented mbuf chain.
  */
 static int
 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
     int snap, struct llc *llc)
 {
 	struct mbuf *m0;
 	struct ip *ip;
 	int error = -1;
 
 	if (m->m_len < sizeof(struct ip) &&
 	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
 		goto out;
 	ip = mtod(m, struct ip *);
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
 	if (error)
 		goto out;
 
 	/* walk the chain and re-add the Ethernet header */
 	for (m0 = m; m0; m0 = m0->m_nextpkt) {
 		if (error == 0) {
 			if (snap) {
 				M_PREPEND(m0, sizeof(struct llc), M_NOWAIT);
 				if (m0 == NULL) {
 					error = ENOBUFS;
 					continue;
 				}
 				bcopy(llc, mtod(m0, caddr_t),
 				    sizeof(struct llc));
 			}
 			M_PREPEND(m0, ETHER_HDR_LEN, M_NOWAIT);
 			if (m0 == NULL) {
 				error = ENOBUFS;
 				continue;
 			}
 			bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		KMOD_IPSTAT_INC(ips_fragmented);
 
 	return (error);
 
 out:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 static void
 bridge_linkstate(struct ifnet *ifp)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_iflist *bif;
 
 	BRIDGE_LOCK(sc);
 	bif = bridge_lookup_member_if(sc, ifp);
 	if (bif == NULL) {
 		BRIDGE_UNLOCK(sc);
 		return;
 	}
 	bridge_linkcheck(sc);
 	BRIDGE_UNLOCK(sc);
 
 	bstp_linkstate(&bif->bif_stp);
 }
 
 static void
 bridge_linkcheck(struct bridge_softc *sc)
 {
 	struct bridge_iflist *bif;
 	int new_link, hasls;
 
 	BRIDGE_LOCK_ASSERT(sc);
 	new_link = LINK_STATE_DOWN;
 	hasls = 0;
 	/* Our link is considered up if at least one of our ports is active */
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE)
 			hasls++;
 		if (bif->bif_ifp->if_link_state == LINK_STATE_UP) {
 			new_link = LINK_STATE_UP;
 			break;
 		}
 	}
 	if (!LIST_EMPTY(&sc->sc_iflist) && !hasls) {
 		/* If no interfaces support link-state then we default to up */
 		new_link = LINK_STATE_UP;
 	}
 	if_link_state_change(sc->sc_ifp, new_link);
 }