diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 505a62b01b4e..41eaa6a56086 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -1,4460 +1,4456 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* Driver for VirtIO network devices. */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/msan.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/random.h>
 #include <sys/sglist.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/taskqueue.h>
 #include <sys/smp.h>
 #include <machine/smp.h>
 
 #include <vm/uma.h>
 
 #include <net/debugnet.h>
 #include <net/ethernet.h>
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 
 #include <net/bpf.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/virtio/virtio.h>
 #include <dev/virtio/virtqueue.h>
 #include <dev/virtio/network/virtio_net.h>
 #include <dev/virtio/network/if_vtnetvar.h>
 #include "virtio_if.h"
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #if defined(INET) || defined(INET6)
 #include <machine/in_cksum.h>
 #endif
 
 static int	vtnet_modevent(module_t, int, void *);
 
 static int	vtnet_probe(device_t);
 static int	vtnet_attach(device_t);
 static int	vtnet_detach(device_t);
 static int	vtnet_suspend(device_t);
 static int	vtnet_resume(device_t);
 static int	vtnet_shutdown(device_t);
 static int	vtnet_attach_completed(device_t);
 static int	vtnet_config_change(device_t);
 
 static int	vtnet_negotiate_features(struct vtnet_softc *);
 static int	vtnet_setup_features(struct vtnet_softc *);
 static int	vtnet_init_rxq(struct vtnet_softc *, int);
 static int	vtnet_init_txq(struct vtnet_softc *, int);
 static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
 static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
 static void	vtnet_free_rx_filters(struct vtnet_softc *);
 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
 static int	vtnet_alloc_interface(struct vtnet_softc *);
 static int	vtnet_setup_interface(struct vtnet_softc *);
 static int	vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
 static int	vtnet_ioctl_ifflags(struct vtnet_softc *);
 static int	vtnet_ioctl_multi(struct vtnet_softc *);
 static int	vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
 static int	vtnet_ioctl(if_t, u_long, caddr_t);
 static uint64_t	vtnet_get_counter(if_t, ift_counter);
 
 static int	vtnet_rxq_populate(struct vtnet_rxq *);
 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
 static struct mbuf *
 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
 static int	vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
 		    struct mbuf *, int);
 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
 static int	vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
 		     uint16_t, int, struct virtio_net_hdr *);
 static int	vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
 		     uint16_t, int, struct virtio_net_hdr *);
 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
 		     struct virtio_net_hdr *);
 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
 static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
 static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
 static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
 		    struct virtio_net_hdr *);
 static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
 static void	vtnet_rx_vq_intr(void *);
 static void	vtnet_rxq_tq_intr(void *, int);
 
 static int	vtnet_txq_intr_threshold(struct vtnet_txq *);
 static int	vtnet_txq_below_threshold(struct vtnet_txq *);
 static int	vtnet_txq_notify(struct vtnet_txq *);
 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
 		    int *, int *, int *);
 static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
 		    int, struct virtio_net_hdr *);
 static struct mbuf *
 		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
 		    struct virtio_net_hdr *);
 static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
 		    struct vtnet_tx_header *);
 static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
 #ifdef VTNET_LEGACY_TX
 static void	vtnet_start_locked(struct vtnet_txq *, if_t);
 static void	vtnet_start(if_t);
 #else
 static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
 static int	vtnet_txq_mq_start(if_t, struct mbuf *);
 static void	vtnet_txq_tq_deferred(void *, int);
 #endif
 static void	vtnet_txq_start(struct vtnet_txq *);
 static void	vtnet_txq_tq_intr(void *, int);
 static int	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
 static void	vtnet_tx_start_all(struct vtnet_softc *);
 
 #ifndef VTNET_LEGACY_TX
 static void	vtnet_qflush(if_t);
 #endif
 
 static int	vtnet_watchdog(struct vtnet_txq *);
 static void	vtnet_accum_stats(struct vtnet_softc *,
 		    struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
 static void	vtnet_tick(void *);
 
 static void	vtnet_start_taskqueues(struct vtnet_softc *);
 static void	vtnet_free_taskqueues(struct vtnet_softc *);
 static void	vtnet_drain_taskqueues(struct vtnet_softc *);
 
 static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_stop_rendezvous(struct vtnet_softc *);
 static void	vtnet_stop(struct vtnet_softc *);
 static int	vtnet_virtio_reinit(struct vtnet_softc *);
 static void	vtnet_init_rx_filters(struct vtnet_softc *);
 static int	vtnet_init_rx_queues(struct vtnet_softc *);
 static int	vtnet_init_tx_queues(struct vtnet_softc *);
 static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
 static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
 static void	vtnet_update_rx_offloads(struct vtnet_softc *);
 static int	vtnet_reinit(struct vtnet_softc *);
 static void	vtnet_init_locked(struct vtnet_softc *, int);
 static void	vtnet_init(void *);
 
 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
 		    struct sglist *, int, int);
 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
 static int	vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
 static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
 static int	vtnet_set_promisc(struct vtnet_softc *, bool);
 static int	vtnet_set_allmulti(struct vtnet_softc *, bool);
 static void	vtnet_rx_filter(struct vtnet_softc *);
 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_register_vlan(void *, if_t, uint16_t);
 static void	vtnet_unregister_vlan(void *, if_t, uint16_t);
 
 static void	vtnet_update_speed_duplex(struct vtnet_softc *);
 static int	vtnet_is_link_up(struct vtnet_softc *);
 static void	vtnet_update_link_status(struct vtnet_softc *);
 static int	vtnet_ifmedia_upd(if_t);
 static void	vtnet_ifmedia_sts(if_t, struct ifmediareq *);
 static void	vtnet_get_macaddr(struct vtnet_softc *);
 static void	vtnet_set_macaddr(struct vtnet_softc *);
 static void	vtnet_attached_set_macaddr(struct vtnet_softc *);
 static void	vtnet_vlan_tag_remove(struct mbuf *);
 static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
 
 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_rxq *);
 static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_txq *);
 static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
 static void	vtnet_load_tunables(struct vtnet_softc *);
 static void	vtnet_setup_sysctl(struct vtnet_softc *);
 
 static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
 static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
 static int	vtnet_txq_enable_intr(struct vtnet_txq *);
 static void	vtnet_txq_disable_intr(struct vtnet_txq *);
 static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
 static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
 static void	vtnet_enable_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
 static void	vtnet_disable_interrupts(struct vtnet_softc *);
 
 static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
 
 DEBUGNET_DEFINE(vtnet);
 
 #define vtnet_htog16(_sc, _val)	virtio_htog16(vtnet_modern(_sc), _val)
 #define vtnet_htog32(_sc, _val)	virtio_htog32(vtnet_modern(_sc), _val)
 #define vtnet_htog64(_sc, _val)	virtio_htog64(vtnet_modern(_sc), _val)
 #define vtnet_gtoh16(_sc, _val)	virtio_gtoh16(vtnet_modern(_sc), _val)
 #define vtnet_gtoh32(_sc, _val)	virtio_gtoh32(vtnet_modern(_sc), _val)
 #define vtnet_gtoh64(_sc, _val)	virtio_gtoh64(vtnet_modern(_sc), _val)
 
 /* Tunables. */
 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "VirtIO Net driver parameters");
 
 static int vtnet_csum_disable = 0;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
 
 static int vtnet_fixup_needs_csum = 0;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
     &vtnet_fixup_needs_csum, 0,
     "Calculate valid checksum for NEEDS_CSUM packets");
 
 static int vtnet_tso_disable = 0;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
     &vtnet_tso_disable, 0, "Disables TSO");
 
 static int vtnet_lro_disable = 0;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
     &vtnet_lro_disable, 0, "Disables hardware LRO");
 
 static int vtnet_mq_disable = 0;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
     &vtnet_mq_disable, 0, "Disables multiqueue support");
 
 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
     &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
 
 static int vtnet_tso_maxlen = IP_MAXPACKET;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
     &vtnet_tso_maxlen, 0, "TSO burst limit");
 
 static int vtnet_rx_process_limit = 1024;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
     &vtnet_rx_process_limit, 0,
     "Number of RX segments processed in one pass");
 
 static int vtnet_lro_entry_count = 128;
 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
     &vtnet_lro_entry_count, 0, "Software LRO entry count");
 
 /* Enable sorted LRO, and the depth of the mbuf queue. */
 static int vtnet_lro_mbufq_depth = 0;
 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
     &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
 
 static uma_zone_t vtnet_tx_header_zone;
 
 static struct virtio_feature_desc vtnet_feature_desc[] = {
 	{ VIRTIO_NET_F_CSUM,			"TxChecksum"		},
 	{ VIRTIO_NET_F_GUEST_CSUM,		"RxChecksum"		},
 	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlRxOffloads"	},
 	{ VIRTIO_NET_F_MAC,			"MAC"			},
 	{ VIRTIO_NET_F_GSO,			"TxGSO"			},
 	{ VIRTIO_NET_F_GUEST_TSO4,		"RxLROv4"		},
 	{ VIRTIO_NET_F_GUEST_TSO6,		"RxLROv6"		},
 	{ VIRTIO_NET_F_GUEST_ECN,		"RxLROECN"		},
 	{ VIRTIO_NET_F_GUEST_UFO,		"RxUFO"			},
 	{ VIRTIO_NET_F_HOST_TSO4,		"TxTSOv4"		},
 	{ VIRTIO_NET_F_HOST_TSO6,		"TxTSOv6"		},
 	{ VIRTIO_NET_F_HOST_ECN,		"TxTSOECN"		},
 	{ VIRTIO_NET_F_HOST_UFO,		"TxUFO"			},
 	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRxBuf"		},
 	{ VIRTIO_NET_F_STATUS,			"Status"		},
 	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVq"		},
 	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRxMode"		},
 	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLANFilter"	},
 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRxModeExtra"	},
 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce"		},
 	{ VIRTIO_NET_F_MQ,			"Multiqueue"		},
 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMacAddr"		},
 	{ VIRTIO_NET_F_SPEED_DUPLEX,		"SpeedDuplex"		},
 
 	{ 0, NULL }
 };
 
 static device_method_t vtnet_methods[] = {
 	/* Device methods. */
 	DEVMETHOD(device_probe,			vtnet_probe),
 	DEVMETHOD(device_attach,		vtnet_attach),
 	DEVMETHOD(device_detach,		vtnet_detach),
 	DEVMETHOD(device_suspend,		vtnet_suspend),
 	DEVMETHOD(device_resume,		vtnet_resume),
 	DEVMETHOD(device_shutdown,		vtnet_shutdown),
 
 	/* VirtIO methods. */
 	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
 	DEVMETHOD(virtio_config_change,		vtnet_config_change),
 
 	DEVMETHOD_END
 };
 
 #ifdef DEV_NETMAP
 #include <dev/netmap/if_vtnet_netmap.h>
 #endif
 
 static driver_t vtnet_driver = {
     .name = "vtnet",
     .methods = vtnet_methods,
     .size = sizeof(struct vtnet_softc)
 };
 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
 MODULE_VERSION(vtnet, 1);
 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
 #endif
 
 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
 
 static int
 vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
 {
 	int error = 0;
 	static int loaded = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (loaded++ == 0) {
 			vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
 				sizeof(struct vtnet_tx_header),
 				NULL, NULL, NULL, NULL, 0, 0);
 #ifdef DEBUGNET
 			/*
 			 * We need to allocate from this zone in the transmit path, so ensure
 			 * that we have at least one item per header available.
 			 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
 			 * buckets
 			 */
 			uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
 			uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
 #endif
 		}
 		break;
 	case MOD_QUIESCE:
 		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
 			error = EBUSY;
 		break;
 	case MOD_UNLOAD:
 		if (--loaded == 0) {
 			uma_zdestroy(vtnet_tx_header_zone);
 			vtnet_tx_header_zone = NULL;
 		}
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 vtnet_probe(device_t dev)
 {
 	return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
 }
 
 static int
 vtnet_attach(device_t dev)
 {
 	struct vtnet_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->vtnet_dev = dev;
 	virtio_set_feature_desc(dev, vtnet_feature_desc);
 
 	VTNET_CORE_LOCK_INIT(sc);
 	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
 	vtnet_load_tunables(sc);
 
 	error = vtnet_alloc_interface(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate interface\n");
 		goto fail;
 	}
 
 	vtnet_setup_sysctl(sc);
 
 	error = vtnet_setup_features(sc);
 	if (error) {
 		device_printf(dev, "cannot setup features\n");
 		goto fail;
 	}
 
 	error = vtnet_alloc_rx_filters(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate Rx filters\n");
 		goto fail;
 	}
 
 	error = vtnet_alloc_rxtx_queues(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate queues\n");
 		goto fail;
 	}
 
 	error = vtnet_alloc_virtqueues(sc);
 	if (error) {
 		device_printf(dev, "cannot allocate virtqueues\n");
 		goto fail;
 	}
 
 	error = vtnet_setup_interface(sc);
 	if (error) {
 		device_printf(dev, "cannot setup interface\n");
 		goto fail;
 	}
 
 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
 	if (error) {
 		device_printf(dev, "cannot setup interrupts\n");
 		ether_ifdetach(sc->vtnet_ifp);
 		goto fail;
 	}
 
 #ifdef DEV_NETMAP
 	vtnet_netmap_attach(sc);
 #endif
 	vtnet_start_taskqueues(sc);
 
 fail:
 	if (error)
 		vtnet_detach(dev);
 
 	return (error);
 }
 
 static int
 vtnet_detach(device_t dev)
 {
 	struct vtnet_softc *sc;
 	if_t ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
 	if (device_is_attached(dev)) {
 		VTNET_CORE_LOCK(sc);
 		vtnet_stop(sc);
 		VTNET_CORE_UNLOCK(sc);
 
 		callout_drain(&sc->vtnet_tick_ch);
 		vtnet_drain_taskqueues(sc);
 
 		ether_ifdetach(ifp);
 	}
 
 #ifdef DEV_NETMAP
 	netmap_detach(ifp);
 #endif
 
 	if (sc->vtnet_pfil != NULL) {
 		pfil_head_unregister(sc->vtnet_pfil);
 		sc->vtnet_pfil = NULL;
 	}
 
 	vtnet_free_taskqueues(sc);
 
 	if (sc->vtnet_vlan_attach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
 		sc->vtnet_vlan_attach = NULL;
 	}
 	if (sc->vtnet_vlan_detach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
 		sc->vtnet_vlan_detach = NULL;
 	}
 
 	ifmedia_removeall(&sc->vtnet_media);
 
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vtnet_ifp = NULL;
 	}
 
 	vtnet_free_rxtx_queues(sc);
 	vtnet_free_rx_filters(sc);
 
 	if (sc->vtnet_ctrl_vq != NULL)
 		vtnet_free_ctrl_vq(sc);
 
 	VTNET_CORE_LOCK_DESTROY(sc);
 
 	return (0);
 }
 
 static int
 vtnet_suspend(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_stop(sc);
 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_resume(device_t dev)
 {
 	struct vtnet_softc *sc;
 	if_t ifp;
 
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK(sc);
 	if (if_getflags(ifp) & IFF_UP)
 		vtnet_init_locked(sc, 0);
 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_shutdown(device_t dev)
 {
 	/*
 	 * Suspend already does all of what we need to
 	 * do here; we just never expect to be resumed.
 	 */
 	return (vtnet_suspend(dev));
 }
 
 static int
 vtnet_attach_completed(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_attached_set_macaddr(sc);
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_config_change(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_update_link_status(sc);
 	if (sc->vtnet_link_active != 0)
 		vtnet_tx_start_all(sc);
 	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 vtnet_negotiate_features(struct vtnet_softc *sc)
 {
 	device_t dev;
 	uint64_t features, negotiated_features;
 	int no_csum;
 
 	dev = sc->vtnet_dev;
 	features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
 	    VTNET_LEGACY_FEATURES;
 
 	/*
 	 * TSO and LRO are only available when their corresponding checksum
 	 * offload feature is also negotiated.
 	 */
 	no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
 	if (no_csum)
 		features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
 	if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
 		features &= ~VTNET_TSO_FEATURES;
 	if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
 		features &= ~VTNET_LRO_FEATURES;
 
 #ifndef VTNET_LEGACY_TX
 	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
 		features &= ~VIRTIO_NET_F_MQ;
 #else
 	features &= ~VIRTIO_NET_F_MQ;
 #endif
 
 	negotiated_features = virtio_negotiate_features(dev, features);
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
 		uint16_t mtu;
 
 		mtu = virtio_read_dev_config_2(dev,
 		    offsetof(struct virtio_net_config, mtu));
 		if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) {
 			device_printf(dev, "Invalid MTU value: %d. "
 			    "MTU feature disabled.\n", mtu);
 			features &= ~VIRTIO_NET_F_MTU;
 			negotiated_features =
 			    virtio_negotiate_features(dev, features);
 		}
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
 		uint16_t npairs;
 
 		npairs = virtio_read_dev_config_2(dev,
 		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
 		if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
 		    npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
 			device_printf(dev, "Invalid max_virtqueue_pairs value: "
 			    "%d. Multiqueue feature disabled.\n", npairs);
 			features &= ~VIRTIO_NET_F_MQ;
 			negotiated_features =
 			    virtio_negotiate_features(dev, features);
 		}
 	}
 
 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
 	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
 		/*
 		 * LRO without mergeable buffers requires special care. This
 		 * is not ideal because every receive buffer must be large
 		 * enough to hold the maximum TCP packet, the Ethernet header,
 		 * and the header. This requires up to 34 descriptors with
 		 * MCLBYTES clusters. If we do not have indirect descriptors,
 		 * LRO is disabled since the virtqueue will not contain very
 		 * many receive buffers.
 		 */
 		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
 			device_printf(dev,
 			    "Host LRO disabled since both mergeable buffers "
 			    "and indirect descriptors were not negotiated\n");
 			features &= ~VTNET_LRO_FEATURES;
 			negotiated_features =
 			    virtio_negotiate_features(dev, features);
 		} else
 			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
 	}
 
 	sc->vtnet_features = negotiated_features;
 	sc->vtnet_negotiated_features = negotiated_features;
 
 	return (virtio_finalize_features(dev));
 }
 
 static int
 vtnet_setup_features(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int error;
 
 	dev = sc->vtnet_dev;
 
 	error = vtnet_negotiate_features(sc);
 	if (error)
 		return (error);
 
 	if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
 		sc->vtnet_flags |= VTNET_FLAG_MODERN;
 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
 		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
 	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
 		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
 		/* This feature should always be negotiated. */
 		sc->vtnet_flags |= VTNET_FLAG_MAC;
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
 		sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
 		    offsetof(struct virtio_net_config, mtu));
 	} else
 		sc->vtnet_max_mtu = VTNET_MAX_MTU;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	} else if (vtnet_modern(sc)) {
 		/* This is identical to the mergeable header. */
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
 	} else
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
 
 	if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
 		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
 		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
 	else
 		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
 
 	/*
 	 * Favor "hardware" LRO if negotiated, but support software LRO as
 	 * a fallback; there is usually little benefit (or worse) with both.
 	 */
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
 	    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
 		sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
 		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
 	else
 		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
 
 	sc->vtnet_req_vq_pairs = 1;
 	sc->vtnet_max_vq_pairs = 1;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
 
 		if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
 			sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
 			    offsetof(struct virtio_net_config,
 			    max_virtqueue_pairs));
 		}
 	}
 
 	if (sc->vtnet_max_vq_pairs > 1) {
 		int req;
 
 		/*
 		 * Limit the maximum number of requested queue pairs to the
 		 * number of CPUs and the configured maximum.
 		 */
 		req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
 		if (req < 0)
 			req = 1;
 		if (req == 0)
 			req = mp_ncpus;
 		if (req > sc->vtnet_max_vq_pairs)
 			req = sc->vtnet_max_vq_pairs;
 		if (req > mp_ncpus)
 			req = mp_ncpus;
 		if (req > 1) {
 			sc->vtnet_req_vq_pairs = req;
 			sc->vtnet_flags |= VTNET_FLAG_MQ;
 		}
 	}
 
 	return (0);
 }
 
 static int
 vtnet_init_rxq(struct vtnet_softc *sc, int id)
 {
 	struct vtnet_rxq *rxq;
 
 	rxq = &sc->vtnet_rxqs[id];
 
 	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
 	    device_get_nameunit(sc->vtnet_dev), id);
 	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
 
 	rxq->vtnrx_sc = sc;
 	rxq->vtnrx_id = id;
 
 	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
 	if (rxq->vtnrx_sg == NULL)
 		return (ENOMEM);
 
 #if defined(INET) || defined(INET6)
 	if (vtnet_software_lro(sc)) {
 		if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
 		    sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
 			return (ENOMEM);
 	}
 #endif
 
 	NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
 	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
 	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
 
 	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
 }
 
 static int
 vtnet_init_txq(struct vtnet_softc *sc, int id)
 {
 	struct vtnet_txq *txq;
 
 	txq = &sc->vtnet_txqs[id];
 
 	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
 	    device_get_nameunit(sc->vtnet_dev), id);
 	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
 
 	txq->vtntx_sc = sc;
 	txq->vtntx_id = id;
 
 	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
 	if (txq->vtntx_sg == NULL)
 		return (ENOMEM);
 
 #ifndef VTNET_LEGACY_TX
 	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
 	    M_NOWAIT, &txq->vtntx_mtx);
 	if (txq->vtntx_br == NULL)
 		return (ENOMEM);
 
 	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
 #endif
 	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
 	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
 	    taskqueue_thread_enqueue, &txq->vtntx_tq);
 	if (txq->vtntx_tq == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 static int
 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
 {
 	int i, npairs, error;
 
 	npairs = sc->vtnet_max_vq_pairs;
 
 	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
 	    M_NOWAIT | M_ZERO);
 	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
 	    M_NOWAIT | M_ZERO);
 	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < npairs; i++) {
 		error = vtnet_init_rxq(sc, i);
 		if (error)
 			return (error);
 		error = vtnet_init_txq(sc, i);
 		if (error)
 			return (error);
 	}
 
 	vtnet_set_rx_process_limit(sc);
 	vtnet_setup_queue_sysctl(sc);
 
 	return (0);
 }
 
 static void
 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
 {
 
 	rxq->vtnrx_sc = NULL;
 	rxq->vtnrx_id = -1;
 
 #if defined(INET) || defined(INET6)
 	tcp_lro_free(&rxq->vtnrx_lro);
 #endif
 
 	if (rxq->vtnrx_sg != NULL) {
 		sglist_free(rxq->vtnrx_sg);
 		rxq->vtnrx_sg = NULL;
 	}
 
 	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
 		mtx_destroy(&rxq->vtnrx_mtx);
 }
 
 static void
 vtnet_destroy_txq(struct vtnet_txq *txq)
 {
 
 	txq->vtntx_sc = NULL;
 	txq->vtntx_id = -1;
 
 	if (txq->vtntx_sg != NULL) {
 		sglist_free(txq->vtntx_sg);
 		txq->vtntx_sg = NULL;
 	}
 
 #ifndef VTNET_LEGACY_TX
 	if (txq->vtntx_br != NULL) {
 		buf_ring_free(txq->vtntx_br, M_DEVBUF);
 		txq->vtntx_br = NULL;
 	}
 #endif
 
 	if (mtx_initialized(&txq->vtntx_mtx) != 0)
 		mtx_destroy(&txq->vtntx_mtx);
 }
 
 static void
 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
 {
 	int i;
 
 	if (sc->vtnet_rxqs != NULL) {
 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
 		free(sc->vtnet_rxqs, M_DEVBUF);
 		sc->vtnet_rxqs = NULL;
 	}
 
 	if (sc->vtnet_txqs != NULL) {
 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
 		free(sc->vtnet_txqs, M_DEVBUF);
 		sc->vtnet_txqs = NULL;
 	}
 }
 
 static int
 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
 {
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
 		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
 		    M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (sc->vtnet_mac_filter == NULL)
 			return (ENOMEM);
 	}
 
 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
 		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
 		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (sc->vtnet_vlan_filter == NULL)
 			return (ENOMEM);
 	}
 
 	return (0);
 }
 
 static void
 vtnet_free_rx_filters(struct vtnet_softc *sc)
 {
 
 	if (sc->vtnet_mac_filter != NULL) {
 		free(sc->vtnet_mac_filter, M_DEVBUF);
 		sc->vtnet_mac_filter = NULL;
 	}
 
 	if (sc->vtnet_vlan_filter != NULL) {
 		free(sc->vtnet_vlan_filter, M_DEVBUF);
 		sc->vtnet_vlan_filter = NULL;
 	}
 }
 
 static int
 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vq_alloc_info *info;
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i, idx, flags, nvqs, error;
 
 	dev = sc->vtnet_dev;
 	flags = 0;
 
 	nvqs = sc->vtnet_max_vq_pairs * 2;
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 		nvqs++;
 
 	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
 	if (info == NULL)
 		return (ENOMEM);
 
 	for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
 		rxq = &sc->vtnet_rxqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
 		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
 		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
 
 		txq = &sc->vtnet_txqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
 		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
 		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
 	}
 
 	/* These queues will not be used so allocate the minimum resources. */
 	for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
 		rxq = &sc->vtnet_rxqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
 		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
 
 		txq = &sc->vtnet_txqs[i];
 		VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq,
 		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
 	}
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
 		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
 	}
 
 	/*
 	 * TODO: Enable interrupt binding if this is multiqueue. This will
 	 * only matter when per-virtqueue MSIX is available.
 	 */
 	if (sc->vtnet_flags & VTNET_FLAG_MQ)
 		flags |= 0;
 
 	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
 	free(info, M_TEMP);
 
 	return (error);
 }
 
 static int
 vtnet_alloc_interface(struct vtnet_softc *sc)
 {
 	device_t dev;
 	if_t ifp;
 
 	dev = sc->vtnet_dev;
 
 	ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL)
 		return (ENOMEM);
 
 	sc->vtnet_ifp = ifp;
 	if_setsoftc(ifp, sc);
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
 	return (0);
 }
 
 static int
 vtnet_setup_interface(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct pfil_head_args pa;
 	if_t ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
 	    IFF_KNOWSEPOCH);
 	if_setbaudrate(ifp, IF_Gbps(10));
 	if_setinitfn(ifp, vtnet_init);
 	if_setioctlfn(ifp, vtnet_ioctl);
 	if_setgetcounterfn(ifp, vtnet_get_counter);
 #ifndef VTNET_LEGACY_TX
 	if_settransmitfn(ifp, vtnet_txq_mq_start);
 	if_setqflushfn(ifp, vtnet_qflush);
 #else
 	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
 	if_setstartfn(ifp, vtnet_start);
 	if_setsendqlen(ifp, virtqueue_size(vq) - 1);
 	if_setsendqready(ifp);
 #endif
 
 	vtnet_get_macaddr(sc);
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
 		if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
 
 	ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
 	ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
 		int gso;
 
 		if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
 
 		gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
 		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
 			if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
 		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
 			if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
 		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
 
 		if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
 			int tso_maxlen;
 
 			if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
 
 			tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
 			    vtnet_tso_maxlen);
 			if_sethwtsomax(ifp, tso_maxlen -
 			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
 			if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
 			if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
 		}
 	}
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
 		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
 #ifdef notyet
 		/* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
 		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
 #endif
 
 		if (vtnet_tunable_int(sc, "fixup_needs_csum",
 		    vtnet_fixup_needs_csum) != 0)
 			sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
 
 		/* Support either "hardware" or software LRO. */
 		if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
 	}
 
 	if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
 		/*
 		 * VirtIO does not support VLAN tagging, but we can fake
 		 * it by inserting and removing the 802.1Q header during
 		 * transmit and receive. We are then able to do checksum
 		 * offloading of VLAN frames.
 		 */
 		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
 	}
 
 	if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
 		if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
 
 	/*
 	 * Capabilities after here are not enabled by default.
 	 */
 	if_setcapenable(ifp, if_getcapabilities(ifp));
 
 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
 		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
 
 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
 
 	ether_ifattach(ifp, sc->vtnet_hwaddr);
 
 	/* Tell the upper layer(s) we support long frames. */
 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
 
 	DEBUGNET_SET(ifp, vtnet);
 
 	pa.pa_version = PFIL_VERSION;
 	pa.pa_flags = PFIL_IN;
 	pa.pa_type = PFIL_TYPE_ETHERNET;
 	pa.pa_headname = if_name(ifp);
 	sc->vtnet_pfil = pfil_head_register(&pa);
 
 	return (0);
 }
 
 static int
 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
 {
 	int framesz;
 
 	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
 		return (MJUMPAGESIZE);
 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
 		return (MCLBYTES);
 
 	/*
 	 * Try to scale the receive mbuf cluster size from the MTU. We
 	 * could also use the VQ size to influence the selected size,
 	 * but that would only matter for very small queues.
 	 */
 	if (vtnet_modern(sc)) {
 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
 		framesz = sizeof(struct virtio_net_hdr_v1);
 	} else
 		framesz = sizeof(struct vtnet_rx_header);
 	framesz += sizeof(struct ether_vlan_header) + mtu;
 
 	if (framesz <= MCLBYTES)
 		return (MCLBYTES);
 	else if (framesz <= MJUMPAGESIZE)
 		return (MJUMPAGESIZE);
 	else if (framesz <= MJUM9BYTES)
 		return (MJUM9BYTES);
 
 	/* Sane default; avoid 16KB clusters. */
 	return (MCLBYTES);
 }
 
 static int
 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
 {
 	if_t ifp;
 	int clustersz;
 
 	ifp = sc->vtnet_ifp;
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (if_getmtu(ifp) == mtu)
 		return (0);
 	else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
 		return (EINVAL);
 
 	if_setmtu(ifp, mtu);
 	clustersz = vtnet_rx_cluster_size(sc, mtu);
 
 	if (clustersz != sc->vtnet_rx_clustersz &&
 	    if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 		vtnet_init_locked(sc, 0);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_ioctl_ifflags(struct vtnet_softc *sc)
 {
 	if_t ifp;
 	int drv_running;
 
 	ifp = sc->vtnet_ifp;
 	drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if ((if_getflags(ifp) & IFF_UP) == 0) {
 		if (drv_running)
 			vtnet_stop(sc);
 		goto out;
 	}
 
 	if (!drv_running) {
 		vtnet_init_locked(sc, 0);
 		goto out;
 	}
 
 	if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
 	    (IFF_PROMISC | IFF_ALLMULTI)) {
 		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
 			vtnet_rx_filter(sc);
 		else {
 			if ((if_getflags(ifp) ^ sc->vtnet_if_flags) & IFF_ALLMULTI)
 				return (ENOTSUP);
 			if_setflagbits(ifp, IFF_PROMISC, 0);
 		}
 	}
 
 out:
 	sc->vtnet_if_flags = if_getflags(ifp);
 	return (0);
 }
 
 static int
 vtnet_ioctl_multi(struct vtnet_softc *sc)
 {
 	if_t ifp;
 
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
 	    if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 		vtnet_rx_filter_mac(sc);
 
 	return (0);
 }
 
 static int
 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
 {
 	if_t ifp;
 	int mask, reinit, update;
 
 	ifp = sc->vtnet_ifp;
 	mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
 	reinit = update = 0;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (mask & IFCAP_TXCSUM)
 		if_togglecapenable(ifp, IFCAP_TXCSUM);
 	if (mask & IFCAP_TXCSUM_IPV6)
 		if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
 	if (mask & IFCAP_TSO4)
 		if_togglecapenable(ifp, IFCAP_TSO4);
 	if (mask & IFCAP_TSO6)
 		if_togglecapenable(ifp, IFCAP_TSO6);
 
 	if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
 		/*
 		 * These Rx features require the negotiated features to
 		 * be updated. Avoid a full reinit if possible.
 		 */
 		if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
 			update = 1;
 		else
 			reinit = 1;
 
 		/* BMV: Avoid needless renegotiation for just software LRO. */
 		if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
 		    IFCAP_LRO && vtnet_software_lro(sc))
 			reinit = update = 0;
 
 		if (mask & IFCAP_RXCSUM)
 			if_togglecapenable(ifp, IFCAP_RXCSUM);
 		if (mask & IFCAP_RXCSUM_IPV6)
 			if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
 		if (mask & IFCAP_LRO)
 			if_togglecapenable(ifp, IFCAP_LRO);
 
 		/*
 		 * VirtIO does not distinguish between IPv4 and IPv6 checksums
 		 * so treat them as a pair. Guest TSO (LRO) requires receive
 		 * checksums.
 		 */
 		if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
 #ifdef notyet
 			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
 #endif
 		} else
 			if_setcapenablebit(ifp, 0,
 			    (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO));
 	}
 
 	if (mask & IFCAP_VLAN_HWFILTER) {
 		/* These Rx features require renegotiation. */
 		reinit = 1;
 
 		if (mask & IFCAP_VLAN_HWFILTER)
 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
 	}
 
 	if (mask & IFCAP_VLAN_HWTSO)
 		if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
 	if (mask & IFCAP_VLAN_HWTAGGING)
 		if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
 
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 		if (reinit) {
 			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 			vtnet_init_locked(sc, 0);
 		} else if (update)
 			vtnet_update_rx_offloads(sc);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
 {
 	struct vtnet_softc *sc;
 	struct ifreq *ifr;
 	int error;
 
 	sc = if_getsoftc(ifp);
 	ifr = (struct ifreq *) data;
 	error = 0;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		VTNET_CORE_LOCK(sc);
 		error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
 		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFFLAGS:
 		VTNET_CORE_LOCK(sc);
 		error = vtnet_ioctl_ifflags(sc);
 		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		VTNET_CORE_LOCK(sc);
 		error = vtnet_ioctl_multi(sc);
 		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
 		VTNET_CORE_LOCK(sc);
 		error = vtnet_ioctl_ifcap(sc, ifr);
 		VTNET_CORE_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_populate(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	int nbufs, error;
 
 #ifdef DEV_NETMAP
 	error = vtnet_netmap_rxq_populate(rxq);
 	if (error >= 0)
 		return (error);
 #endif  /* DEV_NETMAP */
 
 	vq = rxq->vtnrx_vq;
 	error = ENOSPC;
 
 	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
 		error = vtnet_rxq_new_buf(rxq);
 		if (error)
 			break;
 	}
 
 	if (nbufs > 0) {
 		virtqueue_notify(vq);
 		/*
 		 * EMSGSIZE signifies the virtqueue did not have enough
 		 * entries available to hold the last mbuf. This is not
 		 * an error.
 		 */
 		if (error == EMSGSIZE)
 			error = 0;
 	}
 
 	return (error);
 }
 
 static void
 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	struct mbuf *m;
 	int last;
 #ifdef DEV_NETMAP
 	struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
 							rxq->vtnrx_id, NR_RX);
 #else  /* !DEV_NETMAP */
 	void *kring = NULL;
 #endif /* !DEV_NETMAP */
 
 	vq = rxq->vtnrx_vq;
 	last = 0;
 
 	while ((m = virtqueue_drain(vq, &last)) != NULL) {
 		if (kring == NULL)
 			m_freem(m);
 	}
 
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
 }
 
 static struct mbuf *
 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 {
 	struct mbuf *m_head, *m_tail, *m;
 	int i, size;
 
 	m_head = NULL;
 	size = sc->vtnet_rx_clustersz;
 
 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
 	    ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
 
 	for (i = 0; i < nbufs; i++) {
 		m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
 		if (m == NULL) {
 			sc->vtnet_stats.mbuf_alloc_failed++;
 			m_freem(m_head);
 			return (NULL);
 		}
 
 		m->m_len = size;
 		if (m_head != NULL) {
 			m_tail->m_next = m;
 			m_tail = m;
 		} else
 			m_head = m_tail = m;
 	}
 
 	if (m_tailp != NULL)
 		*m_tailp = m_tail;
 
 	return (m_head);
 }
 
 /*
  * Slow path for when LRO without mergeable buffers is negotiated.
  */
 static int
 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
     int len0)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m, *m_prev, *m_new, *m_tail;
 	int len, clustersz, nreplace, error;
 
 	sc = rxq->vtnrx_sc;
 	clustersz = sc->vtnet_rx_clustersz;
 
 	m_prev = NULL;
 	m_tail = NULL;
 	nreplace = 0;
 
 	m = m0;
 	len = len0;
 
 	/*
 	 * Since these mbuf chains are so large, avoid allocating a complete
 	 * replacement when the received frame did not consume the entire
 	 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
 	 */
 	while (len > 0) {
 		if (m == NULL) {
 			sc->vtnet_stats.rx_frame_too_large++;
 			return (EMSGSIZE);
 		}
 
 		/*
 		 * Every mbuf should have the expected cluster size since that
 		 * is also used to allocate the replacements.
 		 */
 		KASSERT(m->m_len == clustersz,
 		    ("%s: mbuf size %d not expected cluster size %d", __func__,
 		    m->m_len, clustersz));
 
 		m->m_len = MIN(m->m_len, len);
 		len -= m->m_len;
 
 		m_prev = m;
 		m = m->m_next;
 		nreplace++;
 	}
 
 	KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
 	    ("%s: invalid replacement mbuf count %d max %d", __func__,
 	    nreplace, sc->vtnet_rx_nmbufs));
 
 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
 	if (m_new == NULL) {
 		m_prev->m_len = clustersz;
 		return (ENOBUFS);
 	}
 
 	/*
 	 * Move any unused mbufs from the received mbuf chain onto the
 	 * end of the replacement chain.
 	 */
 	if (m_prev->m_next != NULL) {
 		m_tail->m_next = m_prev->m_next;
 		m_prev->m_next = NULL;
 	}
 
 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
 	if (error) {
 		/*
 		 * The replacement is suppose to be an copy of the one
 		 * dequeued so this is a very unexpected error.
 		 *
 		 * Restore the m0 chain to the original state if it was
 		 * modified so we can then discard it.
 		 */
 		if (m_tail->m_next != NULL) {
 			m_prev->m_next = m_tail->m_next;
 			m_tail->m_next = NULL;
 		}
 		m_prev->m_len = clustersz;
 		sc->vtnet_stats.rx_enq_replacement_failed++;
 		m_freem(m_new);
 	}
 
 	return (error);
 }
 
 static int
 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m_new;
 	int error;
 
 	sc = rxq->vtnrx_sc;
 
 	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
 		return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
 
 	MPASS(m->m_next == NULL);
 	if (m->m_len < len)
 		return (EMSGSIZE);
 
 	m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
 	if (m_new == NULL)
 		return (ENOBUFS);
 
 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
 	if (error) {
 		sc->vtnet_stats.rx_enq_replacement_failed++;
 		m_freem(m_new);
 	} else
 		m->m_len = len;
 
 	return (error);
 }
 
 static int
 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct sglist *sg;
 	int header_inlined, error;
 
 	sc = rxq->vtnrx_sc;
 	sg = rxq->vtnrx_sg;
 
 	KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
 	    ("%s: mbuf chain without LRO_NOMRG", __func__));
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 
 	sglist_reset(sg);
 	header_inlined = vtnet_modern(sc) ||
 	    (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
 
 	if (header_inlined)
 		error = sglist_append_mbuf(sg, m);
 	else {
 		struct vtnet_rx_header *rxhdr =
 		    mtod(m, struct vtnet_rx_header *);
 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
 
 		/* Append the header and remaining mbuf data. */
 		error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
 		if (error)
 			return (error);
 		error = sglist_append(sg, &rxhdr[1],
 		    m->m_len - sizeof(struct vtnet_rx_header));
 		if (error)
 			return (error);
 
 		if (m->m_next != NULL)
 			error = sglist_append_mbuf(sg, m->m_next);
 	}
 
 	if (error)
 		return (error);
 
 	return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
 }
 
 static int
 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
 {
 	struct vtnet_softc *sc;
 	struct mbuf *m;
 	int error;
 
 	sc = rxq->vtnrx_sc;
 
 	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	error = vtnet_rxq_enqueue_buf(rxq, m);
 	if (error)
 		m_freem(m);
 
 	return (error);
 }
 
 static int
 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
     int hoff, struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	int error;
 
 	sc = rxq->vtnrx_sc;
 
 	/*
 	 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
 	 * not have an analogous CSUM flag. The checksum has been validated,
 	 * but is incomplete (TCP/UDP pseudo header).
 	 *
 	 * The packet is likely from another VM on the same host that itself
 	 * performed checksum offloading so Tx/Rx is basically a memcpy and
 	 * the checksum has little value.
 	 *
 	 * Default to receiving the packet as-is for performance reasons, but
 	 * this can cause issues if the packet is to be forwarded because it
 	 * does not contain a valid checksum. This patch may be helpful:
 	 * https://reviews.freebsd.org/D6611. In the meantime, have the driver
 	 * compute the checksum if requested.
 	 *
 	 * BMV: Need to add an CSUM_PARTIAL flag?
 	 */
 	if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
 		error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
 		return (error);
 	}
 
 	/*
 	 * Compute the checksum in the driver so the packet will contain a
 	 * valid checksum. The checksum is at csum_offset from csum_start.
 	 */
 	switch (etype) {
 #if defined(INET) || defined(INET6)
 	case ETHERTYPE_IP:
 	case ETHERTYPE_IPV6: {
 		int csum_off, csum_end;
 		uint16_t csum;
 
 		csum_off = hdr->csum_start + hdr->csum_offset;
 		csum_end = csum_off + sizeof(uint16_t);
 
 		/* Assume checksum will be in the first mbuf. */
 		if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
 			return (1);
 
 		/*
 		 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
 		 * checksum and write it at the specified offset. We could
 		 * try to verify the packet: csum_start should probably
 		 * correspond to the start of the TCP/UDP header.
 		 *
 		 * BMV: Need to properly handle UDP with zero checksum. Is
 		 * the IPv4 header checksum implicitly validated?
 		 */
 		csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
 		*(uint16_t *)(mtodo(m, csum_off)) = csum;
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	}
 #endif
 	default:
 		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
     uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
 {
 #if 0
 	struct vtnet_softc *sc;
 #endif
 	int protocol;
 
 #if 0
 	sc = rxq->vtnrx_sc;
 #endif
 
 	switch (etype) {
 #if defined(INET)
 	case ETHERTYPE_IP:
 		if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
 			protocol = IPPROTO_DONE;
 		else {
 			struct ip *ip = (struct ip *)(m->m_data + hoff);
 			protocol = ip->ip_p;
 		}
 		break;
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
 		    || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
 			protocol = IPPROTO_DONE;
 		break;
 #endif
 	default:
 		protocol = IPPROTO_DONE;
 		break;
 	}
 
 	switch (protocol) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	default:
 		/*
 		 * FreeBSD does not support checksum offloading of this
 		 * protocol. Let the stack re-verify the checksum later
 		 * if the protocol is supported.
 		 */
 #if 0
 		if_printf(sc->vtnet_ifp,
 		    "%s: checksum offload of unsupported protocol "
 		    "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
 		    __func__, etype, protocol, hdr->csum_start,
 		    hdr->csum_offset);
 #endif
 		break;
 	}
 
 	return (0);
 }
 
 static int
 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	const struct ether_header *eh;
 	int hoff;
 	uint16_t etype;
 
 	eh = mtod(m, const struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	if (etype == ETHERTYPE_VLAN) {
 		/* TODO BMV: Handle QinQ. */
 		const struct ether_vlan_header *evh =
 		    mtod(m, const struct ether_vlan_header *);
 		etype = ntohs(evh->evl_proto);
 		hoff = sizeof(struct ether_vlan_header);
 	} else
 		hoff = sizeof(struct ether_header);
 
 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 		return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
 	else /* VIRTIO_NET_HDR_F_DATA_VALID */
 		return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
 }
 
 static void
 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
 {
 	struct mbuf *m;
 
 	while (--nbufs > 0) {
 		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
 		if (m == NULL)
 			break;
 		vtnet_rxq_discard_buf(rxq, m);
 	}
 }
 
 static void
 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	int error __diagused;
 
 	/*
 	 * Requeue the discarded mbuf. This should always be successful
 	 * since it was just dequeued.
 	 */
 	error = vtnet_rxq_enqueue_buf(rxq, m);
 	KASSERT(error == 0,
 	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
 }
 
 static int
 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m_tail;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
 	m_tail = m_head;
 
 	while (--nbufs > 0) {
 		struct mbuf *m;
 		uint32_t len;
 
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL) {
 			rxq->vtnrx_stats.vrxs_ierrors++;
 			goto fail;
 		}
 
 		if (vtnet_rxq_new_buf(rxq) != 0) {
 			rxq->vtnrx_stats.vrxs_iqdrops++;
 			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			goto fail;
 		}
 
 		if (m->m_len < len)
 			len = m->m_len;
 
 		m->m_len = len;
 		m->m_flags &= ~M_PKTHDR;
 
 		m_head->m_pkthdr.len += len;
 		m_tail->m_next = m;
 		m_tail = m;
 	}
 
 	return (0);
 
 fail:
 	sc->vtnet_stats.rx_mergeable_failed++;
 	m_freem(m_head);
 
 	return (1);
 }
 
 #if defined(INET) || defined(INET6)
 static int
 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	struct lro_ctrl *lro;
 
 	lro = &rxq->vtnrx_lro;
 
 	if (lro->lro_mbuf_max != 0) {
 		tcp_lro_queue_mbuf(lro, m);
 		return (0);
 	}
 
 	return (tcp_lro_rx(lro, m, 0));
 }
 #endif
 
 static void
 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	if_t ifp;
 
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
 		struct ether_header *eh = mtod(m, struct ether_header *);
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			vtnet_vlan_tag_remove(m);
 			/*
 			 * With the 802.1Q header removed, update the
 			 * checksum starting location accordingly.
 			 */
 			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
 				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
 		}
 	}
 
 	m->m_pkthdr.flowid = rxq->vtnrx_id;
 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 
 	if (hdr->flags &
 	    (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
 			rxq->vtnrx_stats.vrxs_csum++;
 		else
 			rxq->vtnrx_stats.vrxs_csum_failed++;
 	}
 
 	if (hdr->gso_size != 0) {
 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
 		case VIRTIO_NET_HDR_GSO_TCPV6:
 			m->m_pkthdr.lro_nsegs =
 			    howmany(m->m_pkthdr.len, hdr->gso_size);
 			rxq->vtnrx_stats.vrxs_host_lro++;
 			break;
 		}
 	}
 
 	rxq->vtnrx_stats.vrxs_ipackets++;
 	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
 
 #if defined(INET) || defined(INET6)
 	if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
 		if (vtnet_lro_rx(rxq, m) == 0)
 			return;
 	}
 #endif
 
 	if_input(ifp, m);
 }
 
 static int
 vtnet_rxq_eof(struct vtnet_rxq *rxq)
 {
 	struct virtio_net_hdr lhdr, *hdr;
 	struct vtnet_softc *sc;
 	if_t ifp;
 	struct virtqueue *vq;
 	int deq, count;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
 	deq = 0;
 	count = sc->vtnet_rx_process_limit;
 
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 
 	while (count-- > 0) {
 		struct mbuf *m;
 		uint32_t len, nbufs, adjsz;
 
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL)
 			break;
 		deq++;
 
 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
 			rxq->vtnrx_stats.vrxs_ierrors++;
 			vtnet_rxq_discard_buf(rxq, m);
 			continue;
 		}
 
 		if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
 			struct virtio_net_hdr_mrg_rxbuf *mhdr =
 			    mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
 			kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
 			nbufs = vtnet_htog16(sc, mhdr->num_buffers);
 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 		} else if (vtnet_modern(sc)) {
 			nbufs = 1; /* num_buffers is always 1 */
 			adjsz = sizeof(struct virtio_net_hdr_v1);
 		} else {
 			nbufs = 1;
 			adjsz = sizeof(struct vtnet_rx_header);
 			/*
 			 * Account for our gap between the header and start of
 			 * data to keep the segments separated.
 			 */
 			len += VTNET_RX_HEADER_PAD;
 		}
 
 		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
 			rxq->vtnrx_stats.vrxs_iqdrops++;
 			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			continue;
 		}
 
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.csum_flags = 0;
 
 		if (nbufs > 1) {
 			/* Dequeue the rest of chain. */
 			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
 				continue;
 		}
 
 		kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
 
 		/*
 		 * Save an endian swapped version of the header prior to it
 		 * being stripped. The header is always at the start of the
 		 * mbuf data. num_buffers was already saved (and not needed)
 		 * so use the standard header.
 		 */
 		hdr = mtod(m, struct virtio_net_hdr *);
 		lhdr.flags = hdr->flags;
 		lhdr.gso_type = hdr->gso_type;
 		lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
 		lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
 		lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
 		lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
 		m_adj(m, adjsz);
 
 		if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
 			pfil_return_t pfil;
 
-			pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN,
-			    NULL);
+			pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL);
 			switch (pfil) {
-			case PFIL_REALLOCED:
-				m = pfil_mem2mbuf(m->m_data);
-				break;
 			case PFIL_DROPPED:
 			case PFIL_CONSUMED:
 				continue;
 			default:
 				KASSERT(pfil == PFIL_PASS,
 				    ("Filter returned %d!", pfil));
 			}
 		}
 
 		vtnet_rxq_input(rxq, m, &lhdr);
 	}
 
 	if (deq > 0) {
 #if defined(INET) || defined(INET6)
 		if (vtnet_software_lro(sc))
 			tcp_lro_flush_all(&rxq->vtnrx_lro);
 #endif
 		virtqueue_notify(vq);
 	}
 
 	return (count > 0 ? 0 : EAGAIN);
 }
 
 static void
 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
 {
 	struct vtnet_softc *sc;
 	if_t ifp;
 	u_int more;
 #ifdef DEV_NETMAP
 	int nmirq;
 #endif /* DEV_NETMAP */
 
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
 		 * Ignore this interrupt. Either this is a spurious interrupt
 		 * or multiqueue without per-VQ MSIX so every queue needs to
 		 * be polled (a brain dead configuration we could try harder
 		 * to avoid).
 		 */
 		vtnet_rxq_disable_intr(rxq);
 		return;
 	}
 
 	VTNET_RXQ_LOCK(rxq);
 
 #ifdef DEV_NETMAP
 	/*
 	 * We call netmap_rx_irq() under lock to prevent concurrent calls.
 	 * This is not necessary to serialize the access to the RX vq, but
 	 * rather to avoid races that may happen if this interface is
 	 * attached to a VALE switch, which would cause received packets
 	 * to stall in the RX queue (nm_kr_tryget() could find the kring
 	 * busy when called from netmap_bwrap_intr_notify()).
 	 */
 	nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
 	if (nmirq != NM_IRQ_PASS) {
 		VTNET_RXQ_UNLOCK(rxq);
 		if (nmirq == NM_IRQ_RESCHED) {
 			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 		}
 		return;
 	}
 #endif /* DEV_NETMAP */
 
 again:
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
 
 	more = vtnet_rxq_eof(rxq);
 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 		if (!more)
 			vtnet_rxq_disable_intr(rxq);
 		/*
 		 * This is an occasional condition or race (when !more),
 		 * so retry a few times before scheduling the taskqueue.
 		 */
 		if (tries-- > 0)
 			goto again;
 
 		rxq->vtnrx_stats.vrxs_rescheduled++;
 		VTNET_RXQ_UNLOCK(rxq);
 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	} else
 		VTNET_RXQ_UNLOCK(rxq);
 }
 
 static void
 vtnet_rx_vq_intr(void *xrxq)
 {
 	struct vtnet_rxq *rxq;
 
 	rxq = xrxq;
 	vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
 }
 
 static void
 vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
 {
 	struct vtnet_rxq *rxq;
 
 	rxq = xrxq;
 	vtnet_rx_vq_process(rxq, 0);
 }
 
 static int
 vtnet_txq_intr_threshold(struct vtnet_txq *txq)
 {
 	struct vtnet_softc *sc;
 	int threshold;
 
 	sc = txq->vtntx_sc;
 
 	/*
 	 * The Tx interrupt is disabled until the queue free count falls
 	 * below our threshold. Completed frames are drained from the Tx
 	 * virtqueue before transmitting new frames and in the watchdog
 	 * callout, so the frequency of Tx interrupts is greatly reduced,
 	 * at the cost of not freeing mbufs as quickly as they otherwise
 	 * would be.
 	 */
 	threshold = virtqueue_size(txq->vtntx_vq) / 4;
 
 	/*
 	 * Without indirect descriptors, leave enough room for the most
 	 * segments we handle.
 	 */
 	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
 	    threshold < sc->vtnet_tx_nsegs)
 		threshold = sc->vtnet_tx_nsegs;
 
 	return (threshold);
 }
 
 static int
 vtnet_txq_below_threshold(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 
 	vq = txq->vtntx_vq;
 
 	return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
 }
 
 static int
 vtnet_txq_notify(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 
 	vq = txq->vtntx_vq;
 
 	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
 	virtqueue_notify(vq);
 
 	if (vtnet_txq_enable_intr(txq) == 0)
 		return (0);
 
 	/*
 	 * Drain frames that were completed since last checked. If this
 	 * causes the queue to go above the threshold, the caller should
 	 * continue transmitting.
 	 */
 	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
 		virtqueue_disable_intr(vq);
 		return (1);
 	}
 
 	return (0);
 }
 
 static void
 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	int last;
 #ifdef DEV_NETMAP
 	struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
 							txq->vtntx_id, NR_TX);
 #else  /* !DEV_NETMAP */
 	void *kring = NULL;
 #endif /* !DEV_NETMAP */
 
 	vq = txq->vtntx_vq;
 	last = 0;
 
 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
 		if (kring == NULL) {
 			m_freem(txhdr->vth_mbuf);
 			uma_zfree(vtnet_tx_header_zone, txhdr);
 		}
 	}
 
 	KASSERT(virtqueue_empty(vq),
 	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
 }
 
 /*
  * BMV: This can go away once we finally have offsets in the mbuf header.
  */
 static int
 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
     int *proto, int *start)
 {
 	struct vtnet_softc *sc;
 	struct ether_vlan_header *evh;
 #if defined(INET) || defined(INET6)
 	int offset;
 #endif
 
 	sc = txq->vtntx_sc;
 
 	evh = mtod(m, struct ether_vlan_header *);
 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		/* BMV: We should handle nested VLAN tags too. */
 		*etype = ntohs(evh->evl_proto);
 #if defined(INET) || defined(INET6)
 		offset = sizeof(struct ether_vlan_header);
 #endif
 	} else {
 		*etype = ntohs(evh->evl_encap_proto);
 #if defined(INET) || defined(INET6)
 		offset = sizeof(struct ether_header);
 #endif
 	}
 
 	switch (*etype) {
 #if defined(INET)
 	case ETHERTYPE_IP: {
 		struct ip *ip, iphdr;
 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
 			m_copydata(m, offset, sizeof(struct ip),
 			    (caddr_t) &iphdr);
 			ip = &iphdr;
 		} else
 			ip = (struct ip *)(m->m_data + offset);
 		*proto = ip->ip_p;
 		*start = offset + (ip->ip_hl << 2);
 		break;
 	}
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
 		*proto = -1;
 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
 		/* Assert the network stack sent us a valid packet. */
 		KASSERT(*start > offset,
 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
 		    *start, offset, *proto));
 		break;
 #endif
 	default:
 		sc->vtnet_stats.tx_csum_unknown_ethtype++;
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
     int offset, struct virtio_net_hdr *hdr)
 {
 	static struct timeval lastecn;
 	static int curecn;
 	struct vtnet_softc *sc;
 	struct tcphdr *tcp, tcphdr;
 
 	sc = txq->vtntx_sc;
 
 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
 		tcp = &tcphdr;
 	} else
 		tcp = (struct tcphdr *)(m->m_data + offset);
 
 	hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
 	hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
 	    VIRTIO_NET_HDR_GSO_TCPV6;
 
 	if (__predict_false(tcp->th_flags & TH_CWR)) {
 		/*
 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
 		 * FreeBSD, ECN support is not on a per-interface basis,
 		 * but globally via the net.inet.tcp.ecn.enable sysctl
 		 * knob. The default is off.
 		 */
 		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
 			if (ppsratecheck(&lastecn, &curecn, 1))
 				if_printf(sc->vtnet_ifp,
 				    "TSO with ECN not negotiated with host\n");
 			return (ENOTSUP);
 		}
 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 	}
 
 	txq->vtntx_stats.vtxs_tso++;
 
 	return (0);
 }
 
 static struct mbuf *
 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
 	int flags, etype, csum_start, proto, error;
 
 	sc = txq->vtntx_sc;
 	flags = m->m_pkthdr.csum_flags;
 
 	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
 	if (error)
 		goto drop;
 
 	if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
 		/* Sanity check the parsed mbuf matches the offload flags. */
 		if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
 		    etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
 		    && etype != ETHERTYPE_IPV6))) {
 			sc->vtnet_stats.tx_csum_proto_mismatch++;
 			goto drop;
 		}
 
 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
 		hdr->csum_start = vtnet_gtoh16(sc, csum_start);
 		hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
 		txq->vtntx_stats.vtxs_csum++;
 	}
 
 	if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
 		/*
 		 * Sanity check the parsed mbuf IP protocol is TCP, and
 		 * VirtIO TSO reqires the checksum offloading above.
 		 */
 		if (__predict_false(proto != IPPROTO_TCP)) {
 			sc->vtnet_stats.tx_tso_not_tcp++;
 			goto drop;
 		} else if (__predict_false((hdr->flags &
 		    VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
 			sc->vtnet_stats.tx_tso_without_csum++;
 			goto drop;
 		}
 
 		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
 		if (error)
 			goto drop;
 	}
 
 	return (m);
 
 drop:
 	m_freem(m);
 	return (NULL);
 }
 
 static int
 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
     struct vtnet_tx_header *txhdr)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct sglist *sg;
 	struct mbuf *m;
 	int error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	sg = txq->vtntx_sg;
 	m = *m_head;
 
 	sglist_reset(sg);
 	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
 	if (error != 0 || sg->sg_nseg != 1) {
 		KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
 		    __func__, error, sg->sg_nseg));
 		goto fail;
 	}
 
 	error = sglist_append_mbuf(sg, m);
 	if (error) {
 		m = m_defrag(m, M_NOWAIT);
 		if (m == NULL)
 			goto fail;
 
 		*m_head = m;
 		sc->vtnet_stats.tx_defragged++;
 
 		error = sglist_append_mbuf(sg, m);
 		if (error)
 			goto fail;
 	}
 
 	txhdr->vth_mbuf = m;
 	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
 
 	return (error);
 
 fail:
 	sc->vtnet_stats.tx_defrag_failed++;
 	m_freem(*m_head);
 	*m_head = NULL;
 
 	return (ENOBUFS);
 }
 
 static int
 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
 {
 	struct vtnet_tx_header *txhdr;
 	struct virtio_net_hdr *hdr;
 	struct mbuf *m;
 	int error;
 
 	m = *m_head;
 	M_ASSERTPKTHDR(m);
 
 	txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
 	if (txhdr == NULL) {
 		m_freem(m);
 		*m_head = NULL;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Always use the non-mergeable header, regardless if mergable headers
 	 * were negotiated, because for transmit num_buffers is always zero.
 	 * The vtnet_hdr_size is used to enqueue the right header size segment.
 	 */
 	hdr = &txhdr->vth_uhdr.hdr;
 
 	if (m->m_flags & M_VLANTAG) {
 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 		m->m_flags &= ~M_VLANTAG;
 	}
 
 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
 		m = vtnet_txq_offload(txq, m, hdr);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
 	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
 fail:
 	if (error)
 		uma_zfree(vtnet_tx_header_zone, txhdr);
 
 	return (error);
 }
 
 #ifdef VTNET_LEGACY_TX
 
 static void
 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
 	int tries, enq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	tries = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
 	    sc->vtnet_link_active == 0)
 		return;
 
 	vtnet_txq_eof(txq);
 
 again:
 	enq = 0;
 
 	while (!if_sendq_empty(ifp)) {
 		if (virtqueue_full(vq))
 			break;
 
 		m0 = if_dequeue(ifp);
 		if (m0 == NULL)
 			break;
 
 		if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
 			if (m0 != NULL)
 				if_sendq_prepend(ifp, m0);
 			break;
 		}
 
 		enq++;
 		ETHER_BPF_MTAP(ifp, m0);
 	}
 
 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 		if (tries++ < VTNET_NOTIFY_RETRIES)
 			goto again;
 
 		txq->vtntx_stats.vtxs_rescheduled++;
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 }
 
 static void
 vtnet_start(if_t ifp)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 
 	sc = if_getsoftc(ifp);
 	txq = &sc->vtnet_txqs[0];
 
 	VTNET_TXQ_LOCK(txq);
 	vtnet_start_locked(txq, ifp);
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 #else /* !VTNET_LEGACY_TX */
 
 static int
 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct buf_ring *br;
 	if_t ifp;
 	int enq, tries, error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	br = txq->vtntx_br;
 	ifp = sc->vtnet_ifp;
 	tries = 0;
 	error = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
 	    sc->vtnet_link_active == 0) {
 		if (m != NULL)
 			error = drbr_enqueue(ifp, br, m);
 		return (error);
 	}
 
 	if (m != NULL) {
 		error = drbr_enqueue(ifp, br, m);
 		if (error)
 			return (error);
 	}
 
 	vtnet_txq_eof(txq);
 
 again:
 	enq = 0;
 
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (virtqueue_full(vq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
 		if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
 				drbr_advance(ifp, br);
 			break;
 		}
 		drbr_advance(ifp, br);
 
 		enq++;
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
 		if (tries++ < VTNET_NOTIFY_RETRIES)
 			goto again;
 
 		txq->vtntx_stats.vtxs_rescheduled++;
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 
 	return (0);
 }
 
 static int
 vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	int i, npairs, error;
 
 	sc = if_getsoftc(ifp);
 	npairs = sc->vtnet_act_vq_pairs;
 
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % npairs;
 	else
 		i = curcpu % npairs;
 
 	txq = &sc->vtnet_txqs[i];
 
 	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
 		error = vtnet_txq_mq_start_locked(txq, m);
 		VTNET_TXQ_UNLOCK(txq);
 	} else {
 		error = drbr_enqueue(ifp, txq->vtntx_br, m);
 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
 	}
 
 	return (error);
 }
 
 static void
 vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 
 	VTNET_TXQ_LOCK(txq);
 	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
 		vtnet_txq_mq_start_locked(txq, NULL);
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 #endif /* VTNET_LEGACY_TX */
 
 static void
 vtnet_txq_start(struct vtnet_txq *txq)
 {
 	struct vtnet_softc *sc;
 	if_t ifp;
 
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 #ifdef VTNET_LEGACY_TX
 	if (!if_sendq_empty(ifp))
 		vtnet_start_locked(txq, ifp);
 #else
 	if (!drbr_empty(ifp, txq->vtntx_br))
 		vtnet_txq_mq_start_locked(txq, NULL);
 #endif
 }
 
 static void
 vtnet_txq_tq_intr(void *xtxq, int pending __unused)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	if_t ifp;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
 	vtnet_txq_start(txq);
 
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 static int
 vtnet_txq_eof(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	struct mbuf *m;
 	int deq;
 
 	vq = txq->vtntx_vq;
 	deq = 0;
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 		m = txhdr->vth_mbuf;
 		deq++;
 
 		txq->vtntx_stats.vtxs_opackets++;
 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
 		if (m->m_flags & M_MCAST)
 			txq->vtntx_stats.vtxs_omcasts++;
 
 		m_freem(m);
 		uma_zfree(vtnet_tx_header_zone, txhdr);
 	}
 
 	if (virtqueue_empty(vq))
 		txq->vtntx_watchdog = 0;
 
 	return (deq);
 }
 
 static void
 vtnet_tx_vq_intr(void *xtxq)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	if_t ifp;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
 		 * Ignore this interrupt. Either this is a spurious interrupt
 		 * or multiqueue without per-VQ MSIX so every queue needs to
 		 * be polled (a brain dead configuration we could try harder
 		 * to avoid).
 		 */
 		vtnet_txq_disable_intr(txq);
 		return;
 	}
 
 #ifdef DEV_NETMAP
 	if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
 		return;
 #endif /* DEV_NETMAP */
 
 	VTNET_TXQ_LOCK(txq);
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
 	vtnet_txq_start(txq);
 
 	VTNET_TXQ_UNLOCK(txq);
 }
 
 static void
 vtnet_tx_start_all(struct vtnet_softc *sc)
 {
 	struct vtnet_txq *txq;
 	int i;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 
 		VTNET_TXQ_LOCK(txq);
 		vtnet_txq_start(txq);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 }
 
 #ifndef VTNET_LEGACY_TX
 static void
 vtnet_qflush(if_t ifp)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct mbuf *m;
 	int i;
 
 	sc = if_getsoftc(ifp);
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 
 		VTNET_TXQ_LOCK(txq);
 		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
 			m_freem(m);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 
 	if_qflush(ifp);
 }
 #endif
 
 static int
 vtnet_watchdog(struct vtnet_txq *txq)
 {
 	if_t ifp;
 
 	ifp = txq->vtntx_sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
 	if (txq->vtntx_watchdog == 1) {
 		/*
 		 * Only drain completed frames if the watchdog is about to
 		 * expire. If any frames were drained, there may be enough
 		 * free descriptors now available to transmit queued frames.
 		 * In that case, the timer will immediately be decremented
 		 * below, but the timeout is generous enough that should not
 		 * be a problem.
 		 */
 		if (vtnet_txq_eof(txq) != 0)
 			vtnet_txq_start(txq);
 	}
 
 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 		VTNET_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VTNET_TXQ_UNLOCK(txq);
 
 	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 	return (1);
 }
 
 static void
 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
     struct vtnet_txq_stats *txacc)
 {
 
 	bzero(rxacc, sizeof(struct vtnet_rxq_stats));
 	bzero(txacc, sizeof(struct vtnet_txq_stats));
 
 	for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		struct vtnet_rxq_stats *rxst;
 		struct vtnet_txq_stats *txst;
 
 		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
 		rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
 		rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
 		rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
 		rxacc->vrxs_csum += rxst->vrxs_csum;
 		rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
 		rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
 
 		txst = &sc->vtnet_txqs[i].vtntx_stats;
 		txacc->vtxs_opackets += txst->vtxs_opackets;
 		txacc->vtxs_obytes += txst->vtxs_obytes;
 		txacc->vtxs_csum += txst->vtxs_csum;
 		txacc->vtxs_tso += txst->vtxs_tso;
 		txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
 	}
 }
 
 static uint64_t
 vtnet_get_counter(if_t ifp, ift_counter cnt)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_rxq_stats rxaccum;
 	struct vtnet_txq_stats txaccum;
 
 	sc = if_getsoftc(ifp);
 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
 
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		return (rxaccum.vrxs_ipackets);
 	case IFCOUNTER_IQDROPS:
 		return (rxaccum.vrxs_iqdrops);
 	case IFCOUNTER_IERRORS:
 		return (rxaccum.vrxs_ierrors);
 	case IFCOUNTER_OPACKETS:
 		return (txaccum.vtxs_opackets);
 #ifndef VTNET_LEGACY_TX
 	case IFCOUNTER_OBYTES:
 		return (txaccum.vtxs_obytes);
 	case IFCOUNTER_OMCASTS:
 		return (txaccum.vtxs_omcasts);
 #endif
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 static void
 vtnet_tick(void *xsc)
 {
 	struct vtnet_softc *sc;
 	if_t ifp;
 	int i, timedout;
 
 	sc = xsc;
 	ifp = sc->vtnet_ifp;
 	timedout = 0;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
 
 	if (timedout != 0) {
 		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 		vtnet_init_locked(sc, 0);
 	} else
 		callout_schedule(&sc->vtnet_tick_ch, hz);
 }
 
 static void
 vtnet_start_taskqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i, error;
 
 	dev = sc->vtnet_dev;
 
 	/*
 	 * Errors here are very difficult to recover from - we cannot
 	 * easily fail because, if this is during boot, we will hang
 	 * when freeing any successfully started taskqueues because
 	 * the scheduler isn't up yet.
 	 *
 	 * Most drivers just ignore the return value - it only fails
 	 * with ENOMEM so an error is not likely.
 	 */
 	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
 		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
 		if (error) {
 			device_printf(dev, "failed to start rx taskq %d\n",
 			    rxq->vtnrx_id);
 		}
 
 		txq = &sc->vtnet_txqs[i];
 		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
 		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
 		if (error) {
 			device_printf(dev, "failed to start tx taskq %d\n",
 			    txq->vtntx_id);
 		}
 	}
 }
 
 static void
 vtnet_free_taskqueues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		if (rxq->vtnrx_tq != NULL) {
 			taskqueue_free(rxq->vtnrx_tq);
 			rxq->vtnrx_tq = NULL;
 		}
 
 		txq = &sc->vtnet_txqs[i];
 		if (txq->vtntx_tq != NULL) {
 			taskqueue_free(txq->vtntx_tq);
 			txq->vtntx_tq = NULL;
 		}
 	}
 }
 
 static void
 vtnet_drain_taskqueues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		if (rxq->vtnrx_tq != NULL)
 			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 
 		txq = &sc->vtnet_txqs[i];
 		if (txq->vtntx_tq != NULL) {
 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
 #ifndef VTNET_LEGACY_TX
 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
 #endif
 		}
 	}
 }
 
 static void
 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		vtnet_rxq_free_mbufs(rxq);
 
 		txq = &sc->vtnet_txqs[i];
 		vtnet_txq_free_mbufs(txq);
 	}
 }
 
 static void
 vtnet_stop_rendezvous(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	struct vtnet_txq *txq;
 	int i;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	/*
 	 * Lock and unlock the per-queue mutex so we known the stop
 	 * state is visible. Doing only the active queues should be
 	 * sufficient, but it does not cost much extra to do all the
 	 * queues.
 	 */
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		VTNET_RXQ_LOCK(rxq);
 		VTNET_RXQ_UNLOCK(rxq);
 
 		txq = &sc->vtnet_txqs[i];
 		VTNET_TXQ_LOCK(txq);
 		VTNET_TXQ_UNLOCK(txq);
 	}
 }
 
 static void
 vtnet_stop(struct vtnet_softc *sc)
 {
 	device_t dev;
 	if_t ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 	sc->vtnet_link_active = 0;
 	callout_stop(&sc->vtnet_tick_ch);
 
 	/* Only advisory. */
 	vtnet_disable_interrupts(sc);
 
 #ifdef DEV_NETMAP
 	/* Stop any pending txsync/rxsync and disable them. */
 	netmap_disable_all_rings(ifp);
 #endif /* DEV_NETMAP */
 
 	/*
 	 * Stop the host adapter. This resets it to the pre-initialized
 	 * state. It will not generate any interrupts until after it is
 	 * reinitialized.
 	 */
 	virtio_stop(dev);
 	vtnet_stop_rendezvous(sc);
 
 	vtnet_drain_rxtx_queues(sc);
 	sc->vtnet_act_vq_pairs = 1;
 }
 
 static int
 vtnet_virtio_reinit(struct vtnet_softc *sc)
 {
 	device_t dev;
 	if_t ifp;
 	uint64_t features;
 	int error;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 	features = sc->vtnet_negotiated_features;
 
 	/*
 	 * Re-negotiate with the host, removing any disabled receive
 	 * features. Transmit features are disabled only on our side
 	 * via if_capenable and if_hwassist.
 	 */
 
 	if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
 		features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
 
 	if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
 		features &= ~VTNET_LRO_FEATURES;
 
 	if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
 		features &= ~VIRTIO_NET_F_CTRL_VLAN;
 
 	error = virtio_reinit(dev, features);
 	if (error) {
 		device_printf(dev, "virtio reinit error %d\n", error);
 		return (error);
 	}
 
 	sc->vtnet_features = features;
 	virtio_reinit_complete(dev);
 
 	return (0);
 }
 
 static void
 vtnet_init_rx_filters(struct vtnet_softc *sc)
 {
 	if_t ifp;
 
 	ifp = sc->vtnet_ifp;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
 		vtnet_rx_filter(sc);
 		vtnet_rx_filter_mac(sc);
 	}
 
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		vtnet_rx_filter_vlan(sc);
 }
 
 static int
 vtnet_init_rx_queues(struct vtnet_softc *sc)
 {
 	device_t dev;
 	if_t ifp;
 	struct vtnet_rxq *rxq;
 	int i, clustersz, error;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
 	sc->vtnet_rx_clustersz = clustersz;
 
 	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
 		sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
 		    VTNET_MAX_RX_SIZE, clustersz);
 		KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
 		    ("%s: too many rx mbufs %d for %d segments", __func__,
 		    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
 	} else
 		sc->vtnet_rx_nmbufs = 1;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 
 		/* Hold the lock to satisfy asserts. */
 		VTNET_RXQ_LOCK(rxq);
 		error = vtnet_rxq_populate(rxq);
 		VTNET_RXQ_UNLOCK(rxq);
 
 		if (error) {
 			device_printf(dev, "cannot populate Rx queue %d\n", i);
 			return (error);
 		}
 	}
 
 	return (0);
 }
 
 static int
 vtnet_init_tx_queues(struct vtnet_softc *sc)
 {
 	struct vtnet_txq *txq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		txq = &sc->vtnet_txqs[i];
 		txq->vtntx_watchdog = 0;
 		txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
 #ifdef DEV_NETMAP
 		netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
 #endif /* DEV_NETMAP */
 	}
 
 	return (0);
 }
 
 static int
 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
 {
 	int error;
 
 	error = vtnet_init_rx_queues(sc);
 	if (error)
 		return (error);
 
 	error = vtnet_init_tx_queues(sc);
 	if (error)
 		return (error);
 
 	return (0);
 }
 
 static void
 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int npairs;
 
 	dev = sc->vtnet_dev;
 
 	if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
 		sc->vtnet_act_vq_pairs = 1;
 		return;
 	}
 
 	npairs = sc->vtnet_req_vq_pairs;
 
 	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
 		device_printf(dev, "cannot set active queue pairs to %d, "
 		    "falling back to 1 queue pair\n", npairs);
 		npairs = 1;
 	}
 
 	sc->vtnet_act_vq_pairs = npairs;
 }
 
 static void
 vtnet_update_rx_offloads(struct vtnet_softc *sc)
 {
 	if_t ifp;
 	uint64_t features;
 	int error;
 
 	ifp = sc->vtnet_ifp;
 	features = sc->vtnet_features;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 		if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
 			features |= VIRTIO_NET_F_GUEST_CSUM;
 		else
 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
 	}
 
 	if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
 		if (if_getcapenable(ifp) & IFCAP_LRO)
 			features |= VTNET_LRO_FEATURES;
 		else
 			features &= ~VTNET_LRO_FEATURES;
 	}
 
 	error = vtnet_ctrl_guest_offloads(sc,
 	    features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
 		        VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN  |
 			VIRTIO_NET_F_GUEST_UFO));
 	if (error) {
 		device_printf(sc->vtnet_dev,
 		    "%s: cannot update Rx features\n", __func__);
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 			vtnet_init_locked(sc, 0);
 		}
 	} else
 		sc->vtnet_features = features;
 }
 
 static int
 vtnet_reinit(struct vtnet_softc *sc)
 {
 	if_t ifp;
 	int error;
 
 	ifp = sc->vtnet_ifp;
 
 	bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
 
 	error = vtnet_virtio_reinit(sc);
 	if (error)
 		return (error);
 
 	vtnet_set_macaddr(sc);
 	vtnet_set_active_vq_pairs(sc);
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
 		vtnet_init_rx_filters(sc);
 
 	if_sethwassist(ifp, 0);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
 		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
 	if (if_getcapenable(ifp) & IFCAP_TSO4)
 		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
 	if (if_getcapenable(ifp) & IFCAP_TSO6)
 		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
 
 	error = vtnet_init_rxtx_queues(sc);
 	if (error)
 		return (error);
 
 	return (0);
 }
 
 static void
 vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
 {
 	if_t ifp;
 
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 		return;
 
 	vtnet_stop(sc);
 
 #ifdef DEV_NETMAP
 	/* Once stopped we can update the netmap flags, if necessary. */
 	switch (init_mode) {
 	case VTNET_INIT_NETMAP_ENTER:
 		nm_set_native_flags(NA(ifp));
 		break;
 	case VTNET_INIT_NETMAP_EXIT:
 		nm_clear_native_flags(NA(ifp));
 		break;
 	}
 #endif /* DEV_NETMAP */
 
 	if (vtnet_reinit(sc) != 0) {
 		vtnet_stop(sc);
 		return;
 	}
 
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 	vtnet_update_link_status(sc);
 	vtnet_enable_interrupts(sc);
 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
 
 #ifdef DEV_NETMAP
 	/* Re-enable txsync/rxsync. */
 	netmap_enable_all_rings(ifp);
 #endif /* DEV_NETMAP */
 }
 
 static void
 vtnet_init(void *xsc)
 {
 	struct vtnet_softc *sc;
 
 	sc = xsc;
 
 	VTNET_CORE_LOCK(sc);
 	vtnet_init_locked(sc, 0);
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
 {
 
 	/*
 	 * The control virtqueue is only polled and therefore it should
 	 * already be empty.
 	 */
 	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
 	    ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
 }
 
 static void
 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
     struct sglist *sg, int readable, int writable)
 {
 	struct virtqueue *vq;
 
 	vq = sc->vtnet_ctrl_vq;
 
 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (!virtqueue_empty(vq))
 		return;
 
 	/*
 	 * Poll for the response, but the command is likely completed before
 	 * returning from the notify.
 	 */
 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0)  {
 		virtqueue_notify(vq);
 		virtqueue_poll(vq, NULL);
 	}
 }
 
 static int
 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr __aligned(2);
 		uint8_t pad1;
 		uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
 		uint8_t pad2;
 		uint8_t ack;
 	} s;
 	int error;
 
 	error = 0;
 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
 
 	s.hdr.class = VIRTIO_NET_CTRL_MAC;
 	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
 	bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, nitems(segs), segs);
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 
 	if (error == 0)
 		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr __aligned(2);
 		uint8_t pad1;
 		uint64_t offloads __aligned(8);
 		uint8_t pad2;
 		uint8_t ack;
 	} s;
 	int error;
 
 	error = 0;
 	MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
 
 	s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
 	s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
 	s.offloads = vtnet_gtoh64(sc, offloads);
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, nitems(segs), segs);
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 
 	if (error == 0)
 		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr __aligned(2);
 		uint8_t pad1;
 		struct virtio_net_ctrl_mq mq __aligned(2);
 		uint8_t pad2;
 		uint8_t ack;
 	} s;
 	int error;
 
 	error = 0;
 	MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
 
 	s.hdr.class = VIRTIO_NET_CTRL_MQ;
 	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
 	s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, nitems(segs), segs);
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 
 	if (error == 0)
 		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr __aligned(2);
 		uint8_t pad1;
 		uint8_t onoff;
 		uint8_t pad2;
 		uint8_t ack;
 	} s;
 	int error;
 
 	error = 0;
 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
 
 	s.hdr.class = VIRTIO_NET_CTRL_RX;
 	s.hdr.cmd = cmd;
 	s.onoff = on;
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, nitems(segs), segs);
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 
 	if (error == 0)
 		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
 vtnet_set_promisc(struct vtnet_softc *sc, bool on)
 {
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
 }
 
 static int
 vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
 {
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
 }
 
 static void
 vtnet_rx_filter(struct vtnet_softc *sc)
 {
 	device_t dev;
 	if_t ifp;
 
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
 		device_printf(dev, "cannot %s promiscuous mode\n",
 		    if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
 	}
 
 	if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
 		device_printf(dev, "cannot %s all-multicast mode\n",
 		    if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
 	}
 }
 
 static u_int
 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
 {
 	struct vtnet_softc *sc = arg;
 
 	if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
 		return (0);
 
 	if (ucnt < VTNET_MAX_MAC_ENTRIES)
 		bcopy(LLADDR(sdl),
 		    &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
 		    ETHER_ADDR_LEN);
 
 	return (1);
 }
 
 static u_int
 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
 {
 	struct vtnet_mac_filter *filter = arg;
 
 	if (mcnt < VTNET_MAX_MAC_ENTRIES)
 		bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
 		    ETHER_ADDR_LEN);
 
 	return (1);
 }
 
 static void
 vtnet_rx_filter_mac(struct vtnet_softc *sc)
 {
 	struct virtio_net_ctrl_hdr hdr __aligned(2);
 	struct vtnet_mac_filter *filter;
 	struct sglist_seg segs[4];
 	struct sglist sg;
 	if_t ifp;
 	bool promisc, allmulti;
 	u_int ucnt, mcnt;
 	int error;
 	uint8_t ack;
 
 	ifp = sc->vtnet_ifp;
 	filter = sc->vtnet_mac_filter;
 	error = 0;
 
 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	/* Unicast MAC addresses: */
 	ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
 	promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
 
 	if (promisc) {
 		ucnt = 0;
 		if_printf(ifp, "more than %d MAC addresses assigned, "
 		    "falling back to promiscuous mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	}
 
 	/* Multicast MAC addresses: */
 	mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
 	allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
 
 	if (allmulti) {
 		mcnt = 0;
 		if_printf(ifp, "more than %d multicast MAC addresses "
 		    "assigned, falling back to all-multicast mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	}
 
 	if (promisc && allmulti)
 		goto out;
 
 	filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
 	filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
 
 	hdr.class = VIRTIO_NET_CTRL_MAC;
 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
 	ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, nitems(segs), segs);
 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &filter->vmf_unicast,
 	    sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &filter->vmf_multicast,
 	    sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 
 	if (error == 0)
 		vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 	if (ack != VIRTIO_NET_OK)
 		if_printf(ifp, "error setting host MAC filter table\n");
 
 out:
 	if (promisc != 0 && vtnet_set_promisc(sc, true) != 0)
 		if_printf(ifp, "cannot enable promiscuous mode\n");
 	if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0)
 		if_printf(ifp, "cannot enable all-multicast mode\n");
 }
 
 static int
 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	struct sglist_seg segs[3];
 	struct sglist sg;
 	struct {
 		struct virtio_net_ctrl_hdr hdr __aligned(2);
 		uint8_t pad1;
 		uint16_t tag __aligned(2);
 		uint8_t pad2;
 		uint8_t ack;
 	} s;
 	int error;
 
 	error = 0;
 	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
 
 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
 	s.tag = vtnet_gtoh16(sc, tag);
 	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, nitems(segs), segs);
 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
 
 	if (error == 0)
 		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static void
 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
 {
 	int i, bit;
 	uint32_t w;
 	uint16_t tag;
 
 	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
 	VTNET_CORE_LOCK_ASSERT(sc);
 
 	/* Enable the filter for each configured VLAN. */
 	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
 		w = sc->vtnet_vlan_filter[i];
 
 		while ((bit = ffs(w) - 1) != -1) {
 			w &= ~(1 << bit);
 			tag = sizeof(w) * CHAR_BIT * i + bit;
 
 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
 				device_printf(sc->vtnet_dev,
 				    "cannot enable VLAN %d filter\n", tag);
 			}
 		}
 	}
 }
 
 static void
 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	if_t ifp;
 	int idx, bit;
 
 	ifp = sc->vtnet_ifp;
 	idx = (tag >> 5) & 0x7F;
 	bit = tag & 0x1F;
 
 	if (tag == 0 || tag > 4095)
 		return;
 
 	VTNET_CORE_LOCK(sc);
 
 	if (add)
 		sc->vtnet_vlan_filter[idx] |= (1 << bit);
 	else
 		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
 
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
 	    if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
 		device_printf(sc->vtnet_dev,
 		    "cannot %s VLAN %d %s the host filter table\n",
 		    add ? "add" : "remove", tag, add ? "to" : "from");
 	}
 
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
 {
 
 	if (if_getsoftc(ifp) != arg)
 		return;
 
 	vtnet_update_vlan_filter(arg, 1, tag);
 }
 
 static void
 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
 {
 
 	if (if_getsoftc(ifp) != arg)
 		return;
 
 	vtnet_update_vlan_filter(arg, 0, tag);
 }
 
 static void
 vtnet_update_speed_duplex(struct vtnet_softc *sc)
 {
 	if_t ifp;
 	uint32_t speed;
 
 	ifp = sc->vtnet_ifp;
 
 	if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
 		return;
 
 	/* BMV: Ignore duplex. */
 	speed = virtio_read_dev_config_4(sc->vtnet_dev,
 	    offsetof(struct virtio_net_config, speed));
 	if (speed != UINT32_MAX)
 		if_setbaudrate(ifp, IF_Mbps(speed));
 }
 
 static int
 vtnet_is_link_up(struct vtnet_softc *sc)
 {
 	uint16_t status;
 
 	if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
 		return (1);
 
 	status = virtio_read_dev_config_2(sc->vtnet_dev,
 	    offsetof(struct virtio_net_config, status));
 
 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
 }
 
 static void
 vtnet_update_link_status(struct vtnet_softc *sc)
 {
 	if_t ifp;
 	int link;
 
 	ifp = sc->vtnet_ifp;
 	VTNET_CORE_LOCK_ASSERT(sc);
 	link = vtnet_is_link_up(sc);
 
 	/* Notify if the link status has changed. */
 	if (link != 0 && sc->vtnet_link_active == 0) {
 		vtnet_update_speed_duplex(sc);
 		sc->vtnet_link_active = 1;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (link == 0 && sc->vtnet_link_active != 0) {
 		sc->vtnet_link_active = 0;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
 static int
 vtnet_ifmedia_upd(if_t ifp __unused)
 {
 	return (EOPNOTSUPP);
 }
 
 static void
 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
 {
 	struct vtnet_softc *sc;
 
 	sc = if_getsoftc(ifp);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	VTNET_CORE_LOCK(sc);
 	if (vtnet_is_link_up(sc) != 0) {
 		ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 	} else
 		ifmr->ifm_active |= IFM_NONE;
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_get_macaddr(struct vtnet_softc *sc)
 {
 
 	if (sc->vtnet_flags & VTNET_FLAG_MAC) {
 		virtio_read_device_config_array(sc->vtnet_dev,
 		    offsetof(struct virtio_net_config, mac),
 		    &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
 	} else {
 		/* Generate a random locally administered unicast address. */
 		sc->vtnet_hwaddr[0] = 0xB2;
 		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
 	}
 }
 
 static void
 vtnet_set_macaddr(struct vtnet_softc *sc)
 {
 	device_t dev;
 	int error;
 
 	dev = sc->vtnet_dev;
 
 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
 		error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
 		if (error)
 			device_printf(dev, "unable to set MAC address\n");
 		return;
 	}
 
 	/* MAC in config is read-only in modern VirtIO. */
 	if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
 		for (int i = 0; i < ETHER_ADDR_LEN; i++) {
 			virtio_write_dev_config_1(dev,
 			    offsetof(struct virtio_net_config, mac) + i,
 			    sc->vtnet_hwaddr[i]);
 		}
 	}
 }
 
 static void
 vtnet_attached_set_macaddr(struct vtnet_softc *sc)
 {
 
 	/* Assign MAC address if it was generated. */
 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
 		vtnet_set_macaddr(sc);
 }
 
 static void
 vtnet_vlan_tag_remove(struct mbuf *m)
 {
 	struct ether_vlan_header *evh;
 
 	evh = mtod(m, struct ether_vlan_header *);
 	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
 	m->m_flags |= M_VLANTAG;
 
 	/* Strip the 802.1Q header. */
 	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
 }
 
 static void
 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
 {
 	int limit;
 
 	limit = vtnet_tunable_int(sc, "rx_process_limit",
 	    vtnet_rx_process_limit);
 	if (limit < 0)
 		limit = INT_MAX;
 	sc->vtnet_rx_process_limit = limit;
 }
 
 static void
 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 {
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	struct vtnet_rxq_stats *stats;
 	char namebuf[16];
 
 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
 	list = SYSCTL_CHILDREN(node);
 
 	stats = &rxq->vtnrx_stats;
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
 	    &stats->vrxs_ipackets, "Receive packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
 	    &stats->vrxs_ibytes, "Receive bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
 	    &stats->vrxs_iqdrops, "Receive drops");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
 	    &stats->vrxs_ierrors, "Receive errors");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 	    &stats->vrxs_csum, "Receive checksum offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
 	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
 	    &stats->vrxs_host_lro, "Receive host segmentation offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 	    &stats->vrxs_rescheduled,
 	    "Receive interrupt handler rescheduled");
 }
 
 static void
 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_txq *txq)
 {
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	struct vtnet_txq_stats *stats;
 	char namebuf[16];
 
 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
 	list = SYSCTL_CHILDREN(node);
 
 	stats = &txq->vtntx_stats;
 
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
 	    &stats->vtxs_opackets, "Transmit packets");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
 	    &stats->vtxs_obytes, "Transmit bytes");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
 	    &stats->vtxs_omcasts, "Transmit multicasts");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
 	    &stats->vtxs_csum, "Transmit checksum offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
 	    &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
 	    &stats->vtxs_rescheduled,
 	    "Transmit interrupt handler rescheduled");
 }
 
 static void
 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 	int i;
 
 	dev = sc->vtnet_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
 		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
 		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
 	}
 }
 
 static void
 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_softc *sc)
 {
 	struct vtnet_statistics *stats;
 	struct vtnet_rxq_stats rxaccum;
 	struct vtnet_txq_stats txaccum;
 
 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
 
 	stats = &sc->vtnet_stats;
 	stats->rx_csum_offloaded = rxaccum.vrxs_csum;
 	stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
 	stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
 	stats->tx_csum_offloaded = txaccum.vtxs_csum;
 	stats->tx_tso_offloaded = txaccum.vtxs_tso;
 	stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
 	    "Mbuf cluster allocation failures");
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
 	    CTLFLAG_RD, &stats->rx_frame_too_large,
 	    "Received frame larger than the mbuf chain");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
 	    "Enqueuing the replacement receive mbuf failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
 	    "Mergeable buffers receive failures");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
 	    "Received checksum offloaded buffer with unsupported "
 	    "Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
 	    "Received checksum offloaded buffer with incorrect IP protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
 	    "Received checksum offloaded buffer with incorrect offset");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
 	    "Received checksum offloaded buffer with incorrect protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
 	    CTLFLAG_RD, &stats->rx_csum_failed,
 	    "Received buffer checksum offload failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
 	    "Received buffer checksum offload succeeded");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
 	    "Times the receive interrupt task rescheduled itself");
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
 	    CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
 	    "Aborted transmit of checksum offloaded buffer with unknown "
 	    "Ethernet type");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
 	    CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
 	    "Aborted transmit of checksum offloaded buffer because mismatched "
 	    "protocols");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
 	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
 	    "Aborted transmit of TSO buffer with non TCP protocol");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
 	    CTLFLAG_RD, &stats->tx_tso_without_csum,
 	    "Aborted transmit of TSO buffer without TCP checksum offload");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
 	    CTLFLAG_RD, &stats->tx_defragged,
 	    "Transmit mbufs defragged");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
 	    CTLFLAG_RD, &stats->tx_defrag_failed,
 	    "Aborted transmit of buffer because defrag failed");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
 	    CTLFLAG_RD, &stats->tx_csum_offloaded,
 	    "Offloaded checksum of transmitted buffer");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
 	    CTLFLAG_RD, &stats->tx_tso_offloaded,
 	    "Segmentation offload of transmitted buffer");
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
 	    "Times the transmit interrupt task rescheduled itself");
 }
 
 static void
 vtnet_setup_sysctl(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 
 	dev = sc->vtnet_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
 	    "Number of maximum supported virtqueue pairs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
 	    "Number of requested virtqueue pairs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
 	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
 	    "Number of active virtqueue pairs");
 
 	vtnet_setup_stat_sysctl(ctx, child, sc);
 }
 
 static void
 vtnet_load_tunables(struct vtnet_softc *sc)
 {
 
 	sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
 	    "lro_entry_count", vtnet_lro_entry_count);
 	if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
 		sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
 
 	sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
 	    "lro_mbufq_depth", vtnet_lro_mbufq_depth);
 }
 
 static int
 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
 {
 
 	return (virtqueue_enable_intr(rxq->vtnrx_vq));
 }
 
 static void
 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 {
 
 	virtqueue_disable_intr(rxq->vtnrx_vq);
 }
 
 static int
 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 
 	vq = txq->vtntx_vq;
 
 	if (vtnet_txq_below_threshold(txq) != 0)
 		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 
 	/*
 	 * The free count is above our threshold. Keep the Tx interrupt
 	 * disabled until the queue is fuller.
 	 */
 	return (0);
 }
 
 static void
 vtnet_txq_disable_intr(struct vtnet_txq *txq)
 {
 
 	virtqueue_disable_intr(txq->vtntx_vq);
 }
 
 static void
 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
 {
 	struct vtnet_rxq *rxq;
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
 		if (vtnet_rxq_enable_intr(rxq) != 0)
 			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	}
 }
 
 static void
 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
 }
 
 static void
 vtnet_enable_interrupts(struct vtnet_softc *sc)
 {
 
 	vtnet_enable_rx_interrupts(sc);
 	vtnet_enable_tx_interrupts(sc);
 }
 
 static void
 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
 }
 
 static void
 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
 		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
 }
 
 static void
 vtnet_disable_interrupts(struct vtnet_softc *sc)
 {
 
 	vtnet_disable_rx_interrupts(sc);
 	vtnet_disable_tx_interrupts(sc);
 }
 
 static int
 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
 {
 	char path[64];
 
 	snprintf(path, sizeof(path),
 	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
 	TUNABLE_INT_FETCH(path, &def);
 
 	return (def);
 }
 
 #ifdef DEBUGNET
 static void
 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
 {
 	struct vtnet_softc *sc;
 
 	sc = if_getsoftc(ifp);
 
 	VTNET_CORE_LOCK(sc);
 	*nrxr = sc->vtnet_req_vq_pairs;
 	*ncl = DEBUGNET_MAX_IN_FLIGHT;
 	*clsize = sc->vtnet_rx_clustersz;
 	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
 {
 	struct vtnet_softc *sc;
 	static bool sw_lro_enabled = false;
 
 	/*
 	 * Disable software LRO, since it would require entering the network
 	 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
 	 */
 	sc = if_getsoftc(ifp);
 	switch (event) {
 	case DEBUGNET_START:
 		sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
 		if (sw_lro_enabled)
 			sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
 		break;
 	case DEBUGNET_END:
 		if (sw_lro_enabled)
 			sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
 		break;
 	}
 }
 
 static int
 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
 {
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	int error;
 
 	sc = if_getsoftc(ifp);
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (EBUSY);
 
 	txq = &sc->vtnet_txqs[0];
 	error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
 	if (error == 0)
 		(void)vtnet_txq_notify(txq);
 	return (error);
 }
 
 static int
 vtnet_debugnet_poll(if_t ifp, int count)
 {
 	struct vtnet_softc *sc;
 	int i;
 
 	sc = if_getsoftc(ifp);
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (EBUSY);
 
 	(void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
 		(void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
 	return (0);
 }
 #endif /* DEBUGNET */
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 5a4954e84869..9f99434dd4e0 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -1,3798 +1,3804 @@
 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed for the NetBSD Project by
  *	Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
  */
 
 /*
  * Network interface bridge support.
  *
  * TODO:
  *
  *	- Currently only supports Ethernet-like interfaces (Ethernet,
  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
  *	  to bridge other types of interfaces (maybe consider
  *	  heterogeneous bridges).
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/protosw.h>
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/time.h>
 #include <sys/socket.h> /* for net/if.h */
 #include <sys/sockio.h>
 #include <sys/ctype.h>  /* string functions */
 #include <sys/kernel.h>
 #include <sys/random.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <vm/uma.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif
 #if defined(INET) || defined(INET6)
 #include <netinet/ip_carp.h>
 #endif
 #include <machine/in_cksum.h>
 #include <netinet/if_ether.h>
 #include <net/bridgestp.h>
 #include <net/if_bridgevar.h>
 #include <net/if_llc.h>
 #include <net/if_vlan_var.h>
 
 #include <net/route.h>
 
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
 /*
  * Size of the route hash table.  Must be a power of two.
  */
 #ifndef BRIDGE_RTHASH_SIZE
 #define	BRIDGE_RTHASH_SIZE		1024
 #endif
 
 #define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
 
 /*
  * Default maximum number of addresses to cache.
  */
 #ifndef BRIDGE_RTABLE_MAX
 #define	BRIDGE_RTABLE_MAX		2000
 #endif
 
 /*
  * Timeout (in seconds) for entries learned dynamically.
  */
 #ifndef BRIDGE_RTABLE_TIMEOUT
 #define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
 #endif
 
 /*
  * Number of seconds between walks of the route list.
  */
 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
 #define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
 #endif
 
 /*
  * List of capabilities to possibly mask on the member interface.
  */
 #define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
 					 IFCAP_TXCSUM_IPV6)
 
 /*
  * List of capabilities to strip
  */
 #define	BRIDGE_IFCAPS_STRIP		IFCAP_LRO
 
 /*
  * Bridge locking
  *
  * The bridge relies heavily on the epoch(9) system to protect its data
  * structures. This means we can safely use CK_LISTs while in NET_EPOCH, but we
  * must ensure there is only one writer at a time.
  *
  * That is: for read accesses we only need to be in NET_EPOCH, but for write
  * accesses we must hold:
  *
  *  - BRIDGE_RT_LOCK, for any change to bridge_rtnodes
  *  - BRIDGE_LOCK, for any other change
  *
  * The BRIDGE_LOCK is a sleepable lock, because it is held across ioctl()
  * calls to bridge member interfaces and these ioctl()s can sleep.
  * The BRIDGE_RT_LOCK is a non-sleepable mutex, because it is sometimes
  * required while we're in NET_EPOCH and then we're not allowed to sleep.
  */
 #define BRIDGE_LOCK_INIT(_sc)		do {			\
 	sx_init(&(_sc)->sc_sx, "if_bridge");			\
 	mtx_init(&(_sc)->sc_rt_mtx, "if_bridge rt", NULL, MTX_DEF);	\
 } while (0)
 #define BRIDGE_LOCK_DESTROY(_sc)	do {	\
 	sx_destroy(&(_sc)->sc_sx);		\
 	mtx_destroy(&(_sc)->sc_rt_mtx);		\
 } while (0)
 #define BRIDGE_LOCK(_sc)		sx_xlock(&(_sc)->sc_sx)
 #define BRIDGE_UNLOCK(_sc)		sx_xunlock(&(_sc)->sc_sx)
 #define BRIDGE_LOCK_ASSERT(_sc)		sx_assert(&(_sc)->sc_sx, SX_XLOCKED)
 #define BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(_sc)	\
 	    MPASS(in_epoch(net_epoch_preempt) || sx_xlocked(&(_sc)->sc_sx))
 #define BRIDGE_UNLOCK_ASSERT(_sc)	sx_assert(&(_sc)->sc_sx, SX_UNLOCKED)
 #define BRIDGE_RT_LOCK(_sc)		mtx_lock(&(_sc)->sc_rt_mtx)
 #define BRIDGE_RT_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_rt_mtx)
 #define BRIDGE_RT_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_rt_mtx, MA_OWNED)
 #define BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(_sc)	\
 	    MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(_sc)->sc_rt_mtx))
 
 /*
  * Bridge interface list entry.
  */
 struct bridge_iflist {
 	CK_LIST_ENTRY(bridge_iflist) bif_next;
 	struct ifnet		*bif_ifp;	/* member if */
 	struct bstp_port	bif_stp;	/* STP state */
 	uint32_t		bif_flags;	/* member if flags */
 	int			bif_savedcaps;	/* saved capabilities */
 	uint32_t		bif_addrmax;	/* max # of addresses */
 	uint32_t		bif_addrcnt;	/* cur. # of addresses */
 	uint32_t		bif_addrexceeded;/* # of address violations */
 	struct epoch_context	bif_epoch_ctx;
 };
 
 /*
  * Bridge route node.
  */
 struct bridge_rtnode {
 	CK_LIST_ENTRY(bridge_rtnode) brt_hash;	/* hash table linkage */
 	CK_LIST_ENTRY(bridge_rtnode) brt_list;	/* list linkage */
 	struct bridge_iflist	*brt_dst;	/* destination if */
 	unsigned long		brt_expire;	/* expiration time */
 	uint8_t			brt_flags;	/* address flags */
 	uint8_t			brt_addr[ETHER_ADDR_LEN];
 	uint16_t		brt_vlan;	/* vlan id */
 	struct	vnet		*brt_vnet;
 	struct	epoch_context	brt_epoch_ctx;
 };
 #define	brt_ifp			brt_dst->bif_ifp
 
 /*
  * Software state for each bridge.
  */
 struct bridge_softc {
 	struct ifnet		*sc_ifp;	/* make this an interface */
 	LIST_ENTRY(bridge_softc) sc_list;
 	struct sx		sc_sx;
 	struct mtx		sc_rt_mtx;
 	uint32_t		sc_brtmax;	/* max # of addresses */
 	uint32_t		sc_brtcnt;	/* cur. # of addresses */
 	uint32_t		sc_brttimeout;	/* rt timeout in seconds */
 	struct callout		sc_brcallout;	/* bridge callout */
 	CK_LIST_HEAD(, bridge_iflist) sc_iflist;	/* member interface list */
 	CK_LIST_HEAD(, bridge_rtnode) *sc_rthash;	/* our forwarding table */
 	CK_LIST_HEAD(, bridge_rtnode) sc_rtlist;	/* list version of above */
 	uint32_t		sc_rthash_key;	/* key for hash */
 	CK_LIST_HEAD(, bridge_iflist) sc_spanlist;	/* span ports list */
 	struct bstp_state	sc_stp;		/* STP state */
 	uint32_t		sc_brtexceeded;	/* # of cache drops */
 	struct ifnet		*sc_ifaddr;	/* member mac copied from */
 	struct ether_addr	sc_defaddr;	/* Default MAC address */
 	struct epoch_context	sc_epoch_ctx;
 };
 
 VNET_DEFINE_STATIC(struct sx, bridge_list_sx);
 #define	V_bridge_list_sx	VNET(bridge_list_sx)
 static eventhandler_tag bridge_detach_cookie;
 
 int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
 
 VNET_DEFINE_STATIC(uma_zone_t, bridge_rtnode_zone);
 #define	V_bridge_rtnode_zone	VNET(bridge_rtnode_zone)
 
 static int	bridge_clone_create(struct if_clone *, char *, size_t,
 		    struct ifc_data *, struct ifnet **);
 static int	bridge_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
 
 static int	bridge_ioctl(struct ifnet *, u_long, caddr_t);
 static void	bridge_mutecaps(struct bridge_softc *);
 static void	bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
 		    int);
 static void	bridge_ifdetach(void *arg __unused, struct ifnet *);
 static void	bridge_init(void *);
 static void	bridge_dummynet(struct mbuf *, struct ifnet *);
 static void	bridge_stop(struct ifnet *, int);
 static int	bridge_transmit(struct ifnet *, struct mbuf *);
 #ifdef ALTQ
 static void	bridge_altq_start(if_t);
 static int	bridge_altq_transmit(if_t, struct mbuf *);
 #endif
 static void	bridge_qflush(struct ifnet *);
 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
 static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
 static int	bridge_enqueue(struct bridge_softc *, struct ifnet *,
 		    struct mbuf *);
 static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
 
 static void	bridge_forward(struct bridge_softc *, struct bridge_iflist *,
 		    struct mbuf *m);
 
 static void	bridge_timer(void *);
 
 static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
 		    struct mbuf *, int);
 static void	bridge_span(struct bridge_softc *, struct mbuf *);
 
 static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
 		    uint16_t, struct bridge_iflist *, int, uint8_t);
 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
 		    uint16_t);
 static void	bridge_rttrim(struct bridge_softc *);
 static void	bridge_rtage(struct bridge_softc *);
 static void	bridge_rtflush(struct bridge_softc *, int);
 static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
 		    uint16_t);
 
 static void	bridge_rtable_init(struct bridge_softc *);
 static void	bridge_rtable_fini(struct bridge_softc *);
 
 static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
 		    const uint8_t *, uint16_t);
 static int	bridge_rtnode_insert(struct bridge_softc *,
 		    struct bridge_rtnode *);
 static void	bridge_rtnode_destroy(struct bridge_softc *,
 		    struct bridge_rtnode *);
 static void	bridge_rtable_expire(struct ifnet *, int);
 static void	bridge_state_change(struct ifnet *, int);
 
 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
 		    const char *name);
 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
 		    struct ifnet *ifp);
 static void	bridge_delete_member(struct bridge_softc *,
 		    struct bridge_iflist *, int);
 static void	bridge_delete_span(struct bridge_softc *,
 		    struct bridge_iflist *);
 
 static int	bridge_ioctl_add(struct bridge_softc *, void *);
 static int	bridge_ioctl_del(struct bridge_softc *, void *);
 static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
 static int	bridge_ioctl_scache(struct bridge_softc *, void *);
 static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
 static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
 static int	bridge_ioctl_rts(struct bridge_softc *, void *);
 static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
 static int	bridge_ioctl_sto(struct bridge_softc *, void *);
 static int	bridge_ioctl_gto(struct bridge_softc *, void *);
 static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
 static int	bridge_ioctl_flush(struct bridge_softc *, void *);
 static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
 static int	bridge_ioctl_spri(struct bridge_softc *, void *);
 static int	bridge_ioctl_ght(struct bridge_softc *, void *);
 static int	bridge_ioctl_sht(struct bridge_softc *, void *);
 static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
 static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
 static int	bridge_ioctl_gma(struct bridge_softc *, void *);
 static int	bridge_ioctl_sma(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
 static int	bridge_ioctl_addspan(struct bridge_softc *, void *);
 static int	bridge_ioctl_delspan(struct bridge_softc *, void *);
 static int	bridge_ioctl_gbparam(struct bridge_softc *, void *);
 static int	bridge_ioctl_grte(struct bridge_softc *, void *);
 static int	bridge_ioctl_gifsstp(struct bridge_softc *, void *);
 static int	bridge_ioctl_sproto(struct bridge_softc *, void *);
 static int	bridge_ioctl_stxhc(struct bridge_softc *, void *);
 static int	bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
 		    int);
 static int	bridge_ip_checkbasic(struct mbuf **mp);
 #ifdef INET6
 static int	bridge_ip6_checkbasic(struct mbuf **mp);
 #endif /* INET6 */
 static int	bridge_fragment(struct ifnet *, struct mbuf **mp,
 		    struct ether_header *, int, struct llc *);
 static void	bridge_linkstate(struct ifnet *ifp);
 static void	bridge_linkcheck(struct bridge_softc *sc);
 
 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
 #define	VLANTAGOF(_m)	\
     (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : 1
 
 static struct bstp_cb_ops bridge_ops = {
 	.bcb_state = bridge_state_change,
 	.bcb_rtage = bridge_rtable_expire
 };
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Bridge");
 
 /* only pass IP[46] packets when pfil is enabled */
 VNET_DEFINE_STATIC(int, pfil_onlyip) = 1;
 #define	V_pfil_onlyip	VNET(pfil_onlyip)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
     "Only pass IP packets when pfil is enabled");
 
 /* run pfil hooks on the bridge interface */
 VNET_DEFINE_STATIC(int, pfil_bridge) = 0;
 #define	V_pfil_bridge	VNET(pfil_bridge)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
     "Packet filter on the bridge interface");
 
 /* layer2 filter with ipfw */
 VNET_DEFINE_STATIC(int, pfil_ipfw);
 #define	V_pfil_ipfw	VNET(pfil_ipfw)
 
 /* layer2 ARP filter with ipfw */
 VNET_DEFINE_STATIC(int, pfil_ipfw_arp);
 #define	V_pfil_ipfw_arp	VNET(pfil_ipfw_arp)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
     "Filter ARP packets through IPFW layer2");
 
 /* run pfil hooks on the member interface */
 VNET_DEFINE_STATIC(int, pfil_member) = 0;
 #define	V_pfil_member	VNET(pfil_member)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
     "Packet filter on the member interface");
 
 /* run pfil hooks on the physical interface for locally destined packets */
 VNET_DEFINE_STATIC(int, pfil_local_phys);
 #define	V_pfil_local_phys	VNET(pfil_local_phys)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
     "Packet filter on the physical interface for locally destined packets");
 
 /* log STP state changes */
 VNET_DEFINE_STATIC(int, log_stp);
 #define	V_log_stp	VNET(log_stp)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
     "Log STP state changes");
 
 /* share MAC with first bridge member */
 VNET_DEFINE_STATIC(int, bridge_inherit_mac);
 #define	V_bridge_inherit_mac	VNET(bridge_inherit_mac)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
     "Inherit MAC address from the first bridge member");
 
 VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0;
 #define	V_allow_llz_overlap	VNET(allow_llz_overlap)
 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
     "Allow overlap of link-local scope "
     "zones of a bridge interface and the member interfaces");
 
 struct bridge_control {
 	int	(*bc_func)(struct bridge_softc *, void *);
 	int	bc_argsize;
 	int	bc_flags;
 };
 
 #define	BC_F_COPYIN		0x01	/* copy arguments in */
 #define	BC_F_COPYOUT		0x02	/* copy arguments out */
 #define	BC_F_SUSER		0x04	/* do super-user check */
 
 static const struct bridge_control bridge_control_table[] = {
 	{ bridge_ioctl_add,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_del,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gifflags,	sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 	{ bridge_ioctl_sifflags,	sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_scache,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_gcache,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_gifs,		sizeof(struct ifbifconf),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 	{ bridge_ioctl_rts,		sizeof(struct ifbaconf),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 
 	{ bridge_ioctl_saddr,		sizeof(struct ifbareq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sto,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_gto,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_daddr,		sizeof(struct ifbareq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_flush,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gpri,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_spri,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_ght,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_sht,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gfd,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_sfd,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gma,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 	{ bridge_ioctl_sma,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sifprio,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sifcost,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_addspan,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_delspan,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_gbparam,		sizeof(struct ifbropreq),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_grte,		sizeof(struct ifbrparam),
 	  BC_F_COPYOUT },
 
 	{ bridge_ioctl_gifsstp,		sizeof(struct ifbpstpconf),
 	  BC_F_COPYIN|BC_F_COPYOUT },
 
 	{ bridge_ioctl_sproto,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_stxhc,		sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sifmaxaddr,	sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 };
 static const int bridge_control_table_size = nitems(bridge_control_table);
 
 VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list);
 #define	V_bridge_list	VNET(bridge_list)
 #define	BRIDGE_LIST_LOCK_INIT(x)	sx_init(&V_bridge_list_sx,	\
 					    "if_bridge list")
 #define	BRIDGE_LIST_LOCK_DESTROY(x)	sx_destroy(&V_bridge_list_sx)
 #define	BRIDGE_LIST_LOCK(x)		sx_xlock(&V_bridge_list_sx)
 #define	BRIDGE_LIST_UNLOCK(x)		sx_xunlock(&V_bridge_list_sx)
 
 VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner);
 #define	V_bridge_cloner	VNET(bridge_cloner)
 
 static const char bridge_name[] = "bridge";
 
 static void
 vnet_bridge_init(const void *unused __unused)
 {
 
 	V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
 	    sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	BRIDGE_LIST_LOCK_INIT();
 	LIST_INIT(&V_bridge_list);
 
 	struct if_clone_addreq req = {
 		.create_f = bridge_clone_create,
 		.destroy_f = bridge_clone_destroy,
 		.flags = IFC_F_AUTOUNIT,
 	};
 	V_bridge_cloner = ifc_attach_cloner(bridge_name, &req);
 }
 VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_bridge_init, NULL);
 
 static void
 vnet_bridge_uninit(const void *unused __unused)
 {
 
 	ifc_detach_cloner(V_bridge_cloner);
 	V_bridge_cloner = NULL;
 	BRIDGE_LIST_LOCK_DESTROY();
 
 	/* Callbacks may use the UMA zone. */
 	NET_EPOCH_DRAIN_CALLBACKS();
 
 	uma_zdestroy(V_bridge_rtnode_zone);
 }
 VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_bridge_uninit, NULL);
 
 static int
 bridge_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		bridge_dn_p = bridge_dummynet;
 		bridge_detach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, bridge_ifdetach, NULL,
 		    EVENTHANDLER_PRI_ANY);
 		break;
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    bridge_detach_cookie);
 		bridge_dn_p = NULL;
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t bridge_mod = {
 	"if_bridge",
 	bridge_modevent,
 	0
 };
 
 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_bridge, 1);
 MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
 
 /*
  * handler for net.link.bridge.ipfw
  */
 static int
 sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
 {
 	int enable = V_pfil_ipfw;
 	int error;
 
 	error = sysctl_handle_int(oidp, &enable, 0, req);
 	enable &= 1;
 
 	if (enable != V_pfil_ipfw) {
 		V_pfil_ipfw = enable;
 
 		/*
 		 * Disable pfil so that ipfw doesnt run twice, if the user
 		 * really wants both then they can re-enable pfil_bridge and/or
 		 * pfil_member. Also allow non-ip packets as ipfw can filter by
 		 * layer2 type.
 		 */
 		if (V_pfil_ipfw) {
 			V_pfil_onlyip = 0;
 			V_pfil_bridge = 0;
 			V_pfil_member = 0;
 		}
 	}
 
 	return (error);
 }
 SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET | CTLFLAG_NEEDGIANT,
     &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I",
     "Layer2 filter with IPFW");
 
 #ifdef VIMAGE
 static void
 bridge_reassign(struct ifnet *ifp, struct vnet *newvnet, char *arg)
 {
 	struct bridge_softc *sc = ifp->if_softc;
 	struct bridge_iflist *bif;
 
 	BRIDGE_LOCK(sc);
 
 	while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL)
 		bridge_delete_member(sc, bif, 0);
 
 	while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) {
 		bridge_delete_span(sc, bif);
 	}
 
 	BRIDGE_UNLOCK(sc);
 
 	ether_reassign(ifp, newvnet, arg);
 }
 #endif
 
 /*
  * bridge_clone_create:
  *
  *	Create a new bridge instance.
  */
 static int
 bridge_clone_create(struct if_clone *ifc, char *name, size_t len,
     struct ifc_data *ifd, struct ifnet **ifpp)
 {
 	struct bridge_softc *sc;
 	struct ifnet *ifp;
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		free(sc, M_DEVBUF);
 		return (ENOSPC);
 	}
 
 	BRIDGE_LOCK_INIT(sc);
 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
 
 	/* Initialize our routing table. */
 	bridge_rtable_init(sc);
 
 	callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0);
 
 	CK_LIST_INIT(&sc->sc_iflist);
 	CK_LIST_INIT(&sc->sc_spanlist);
 
 	ifp->if_softc = sc;
 	if_initname(ifp, bridge_name, ifd->unit);
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = bridge_ioctl;
 #ifdef ALTQ
 	ifp->if_start = bridge_altq_start;
 	ifp->if_transmit = bridge_altq_transmit;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_snd.ifq_drv_maxlen = 0;
 	IFQ_SET_READY(&ifp->if_snd);
 #else
 	ifp->if_transmit = bridge_transmit;
 #endif
 	ifp->if_qflush = bridge_qflush;
 	ifp->if_init = bridge_init;
 	ifp->if_type = IFT_BRIDGE;
 
 	ether_gen_addr(ifp, &sc->sc_defaddr);
 
 	bstp_attach(&sc->sc_stp, &bridge_ops);
 	ether_ifattach(ifp, sc->sc_defaddr.octet);
 	/* Now undo some of the damage... */
 	ifp->if_baudrate = 0;
 	ifp->if_type = IFT_BRIDGE;
 #ifdef VIMAGE
 	ifp->if_reassign = bridge_reassign;
 #endif
 
 	BRIDGE_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
 	BRIDGE_LIST_UNLOCK();
 	*ifpp = ifp;
 
 	return (0);
 }
 
 static void
 bridge_clone_destroy_cb(struct epoch_context *ctx)
 {
 	struct bridge_softc *sc;
 
 	sc = __containerof(ctx, struct bridge_softc, sc_epoch_ctx);
 
 	BRIDGE_LOCK_DESTROY(sc);
 	free(sc, M_DEVBUF);
 }
 
 /*
  * bridge_clone_destroy:
  *
  *	Destroy a bridge instance.
  */
 static int
 bridge_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
 {
 	struct bridge_softc *sc = ifp->if_softc;
 	struct bridge_iflist *bif;
 	struct epoch_tracker et;
 
 	BRIDGE_LOCK(sc);
 
 	bridge_stop(ifp, 1);
 	ifp->if_flags &= ~IFF_UP;
 
 	while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL)
 		bridge_delete_member(sc, bif, 0);
 
 	while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) {
 		bridge_delete_span(sc, bif);
 	}
 
 	/* Tear down the routing table. */
 	bridge_rtable_fini(sc);
 
 	BRIDGE_UNLOCK(sc);
 
 	NET_EPOCH_ENTER(et);
 
 	callout_drain(&sc->sc_brcallout);
 
 	BRIDGE_LIST_LOCK();
 	LIST_REMOVE(sc, sc_list);
 	BRIDGE_LIST_UNLOCK();
 
 	bstp_detach(&sc->sc_stp);
 #ifdef ALTQ
 	IFQ_PURGE(&ifp->if_snd);
 #endif
 	NET_EPOCH_EXIT(et);
 
 	ether_ifdetach(ifp);
 	if_free(ifp);
 
 	NET_EPOCH_CALL(bridge_clone_destroy_cb, &sc->sc_epoch_ctx);
 
 	return (0);
 }
 
 /*
  * bridge_ioctl:
  *
  *	Handle a control request from the operator.
  */
 static int
 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct bridge_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct bridge_iflist *bif;
 	struct thread *td = curthread;
 	union {
 		struct ifbreq ifbreq;
 		struct ifbifconf ifbifconf;
 		struct ifbareq ifbareq;
 		struct ifbaconf ifbaconf;
 		struct ifbrparam ifbrparam;
 		struct ifbropreq ifbropreq;
 	} args;
 	struct ifdrv *ifd = (struct ifdrv *) data;
 	const struct bridge_control *bc;
 	int error = 0, oldmtu;
 
 	BRIDGE_LOCK(sc);
 
 	switch (cmd) {
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 
 	case SIOCGDRVSPEC:
 	case SIOCSDRVSPEC:
 		if (ifd->ifd_cmd >= bridge_control_table_size) {
 			error = EINVAL;
 			break;
 		}
 		bc = &bridge_control_table[ifd->ifd_cmd];
 
 		if (cmd == SIOCGDRVSPEC &&
 		    (bc->bc_flags & BC_F_COPYOUT) == 0) {
 			error = EINVAL;
 			break;
 		}
 		else if (cmd == SIOCSDRVSPEC &&
 		    (bc->bc_flags & BC_F_COPYOUT) != 0) {
 			error = EINVAL;
 			break;
 		}
 
 		if (bc->bc_flags & BC_F_SUSER) {
 			error = priv_check(td, PRIV_NET_BRIDGE);
 			if (error)
 				break;
 		}
 
 		if (ifd->ifd_len != bc->bc_argsize ||
 		    ifd->ifd_len > sizeof(args)) {
 			error = EINVAL;
 			break;
 		}
 
 		bzero(&args, sizeof(args));
 		if (bc->bc_flags & BC_F_COPYIN) {
 			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
 			if (error)
 				break;
 		}
 
 		oldmtu = ifp->if_mtu;
 		error = (*bc->bc_func)(sc, &args);
 		if (error)
 			break;
 
 		/*
 		 * Bridge MTU may change during addition of the first port.
 		 * If it did, do network layer specific procedure.
 		 */
 		if (ifp->if_mtu != oldmtu) {
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
 			rt_updatemtu(ifp);
 		}
 
 		if (bc->bc_flags & BC_F_COPYOUT)
 			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
 
 		break;
 
 	case SIOCSIFFLAGS:
 		if (!(ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			/*
 			 * If interface is marked down and it is running,
 			 * then stop and disable it.
 			 */
 			bridge_stop(ifp, 1);
 		} else if ((ifp->if_flags & IFF_UP) &&
 		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			/*
 			 * If interface is marked up and it is stopped, then
 			 * start it.
 			 */
 			BRIDGE_UNLOCK(sc);
 			(*ifp->if_init)(sc);
 			BRIDGE_LOCK(sc);
 		}
 		break;
 
 	case SIOCSIFMTU:
 		oldmtu = sc->sc_ifp->if_mtu;
 
 		if (ifr->ifr_mtu < 576) {
 			error = EINVAL;
 			break;
 		}
 		if (CK_LIST_EMPTY(&sc->sc_iflist)) {
 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
 			break;
 		}
 		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 			error = (*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
 			    SIOCSIFMTU, (caddr_t)ifr);
 			if (error != 0) {
 				log(LOG_NOTICE, "%s: invalid MTU: %u for"
 				    " member %s\n", sc->sc_ifp->if_xname,
 				    ifr->ifr_mtu,
 				    bif->bif_ifp->if_xname);
 				error = EINVAL;
 				break;
 			}
 		}
 		if (error) {
 			/* Restore the previous MTU on all member interfaces. */
 			ifr->ifr_mtu = oldmtu;
 			CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 				(*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
 				    SIOCSIFMTU, (caddr_t)ifr);
 			}
 		} else {
 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		/*
 		 * drop the lock as ether_ioctl() will call bridge_start() and
 		 * cause the lock to be recursed.
 		 */
 		BRIDGE_UNLOCK(sc);
 		error = ether_ioctl(ifp, cmd, data);
 		BRIDGE_LOCK(sc);
 		break;
 	}
 
 	BRIDGE_UNLOCK(sc);
 
 	return (error);
 }
 
 /*
  * bridge_mutecaps:
  *
  *	Clear or restore unwanted capabilities on the member interface
  */
 static void
 bridge_mutecaps(struct bridge_softc *sc)
 {
 	struct bridge_iflist *bif;
 	int enabled, mask;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	/* Initial bitmask of capabilities to test */
 	mask = BRIDGE_IFCAPS_MASK;
 
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		/* Every member must support it or its disabled */
 		mask &= bif->bif_savedcaps;
 	}
 
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		enabled = bif->bif_ifp->if_capenable;
 		enabled &= ~BRIDGE_IFCAPS_STRIP;
 		/* strip off mask bits and enable them again if allowed */
 		enabled &= ~BRIDGE_IFCAPS_MASK;
 		enabled |= mask;
 		bridge_set_ifcap(sc, bif, enabled);
 	}
 }
 
 static void
 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
 {
 	struct ifnet *ifp = bif->bif_ifp;
 	struct ifreq ifr;
 	int error, mask, stuck;
 
 	bzero(&ifr, sizeof(ifr));
 	ifr.ifr_reqcap = set;
 
 	if (ifp->if_capenable != set) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
 		if (error)
 			if_printf(sc->sc_ifp,
 			    "error setting capabilities on %s: %d\n",
 			    ifp->if_xname, error);
 		mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP;
 		stuck = ifp->if_capenable & mask & ~set;
 		if (stuck != 0)
 			if_printf(sc->sc_ifp,
 			    "can't disable some capabilities on %s: 0x%x\n",
 			    ifp->if_xname, stuck);
 	}
 }
 
 /*
  * bridge_lookup_member:
  *
  *	Lookup a bridge member interface.
  */
 static struct bridge_iflist *
 bridge_lookup_member(struct bridge_softc *sc, const char *name)
 {
 	struct bridge_iflist *bif;
 	struct ifnet *ifp;
 
 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
 
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		ifp = bif->bif_ifp;
 		if (strcmp(ifp->if_xname, name) == 0)
 			return (bif);
 	}
 
 	return (NULL);
 }
 
 /*
  * bridge_lookup_member_if:
  *
  *	Lookup a bridge member interface by ifnet*.
  */
 static struct bridge_iflist *
 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
 {
 	struct bridge_iflist *bif;
 
 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
 
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (bif->bif_ifp == member_ifp)
 			return (bif);
 	}
 
 	return (NULL);
 }
 
 static void
 bridge_delete_member_cb(struct epoch_context *ctx)
 {
 	struct bridge_iflist *bif;
 
 	bif = __containerof(ctx, struct bridge_iflist, bif_epoch_ctx);
 
 	free(bif, M_DEVBUF);
 }
 
 /*
  * bridge_delete_member:
  *
  *	Delete the specified member interface.
  */
 static void
 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
     int gone)
 {
 	struct ifnet *ifs = bif->bif_ifp;
 	struct ifnet *fif = NULL;
 	struct bridge_iflist *bifl;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	if (bif->bif_flags & IFBIF_STP)
 		bstp_disable(&bif->bif_stp);
 
 	ifs->if_bridge = NULL;
 	CK_LIST_REMOVE(bif, bif_next);
 
 	/*
 	 * If removing the interface that gave the bridge its mac address, set
 	 * the mac address of the bridge to the address of the next member, or
 	 * to its default address if no members are left.
 	 */
 	if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
 		if (CK_LIST_EMPTY(&sc->sc_iflist)) {
 			bcopy(&sc->sc_defaddr,
 			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 			sc->sc_ifaddr = NULL;
 		} else {
 			bifl = CK_LIST_FIRST(&sc->sc_iflist);
 			fif = bifl->bif_ifp;
 			bcopy(IF_LLADDR(fif),
 			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 			sc->sc_ifaddr = fif;
 		}
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	}
 
 	bridge_linkcheck(sc);
 	bridge_mutecaps(sc);	/* recalcuate now this interface is removed */
 	BRIDGE_RT_LOCK(sc);
 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
 	BRIDGE_RT_UNLOCK(sc);
 	KASSERT(bif->bif_addrcnt == 0,
 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
 
 	ifs->if_bridge_output = NULL;
 	ifs->if_bridge_input = NULL;
 	ifs->if_bridge_linkstate = NULL;
 	if (!gone) {
 		switch (ifs->if_type) {
 		case IFT_ETHER:
 		case IFT_L2VLAN:
 			/*
 			 * Take the interface out of promiscuous mode, but only
 			 * if it was promiscuous in the first place. It might
 			 * not be if we're in the bridge_ioctl_add() error path.
 			 */
 			if (ifs->if_flags & IFF_PROMISC)
 				(void) ifpromisc(ifs, 0);
 			break;
 
 		case IFT_GIF:
 			break;
 
 		default:
 #ifdef DIAGNOSTIC
 			panic("bridge_delete_member: impossible");
 #endif
 			break;
 		}
 		/* reneable any interface capabilities */
 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
 	}
 	bstp_destroy(&bif->bif_stp);	/* prepare to free */
 
 	NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx);
 }
 
 /*
  * bridge_delete_span:
  *
  *	Delete the specified span interface.
  */
 static void
 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
 {
 	BRIDGE_LOCK_ASSERT(sc);
 
 	KASSERT(bif->bif_ifp->if_bridge == NULL,
 	    ("%s: not a span interface", __func__));
 
 	CK_LIST_REMOVE(bif, bif_next);
 
 	NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx);
 }
 
 static int
 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif = NULL;
 	struct ifnet *ifs;
 	int error = 0;
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
 	if (ifs->if_ioctl == NULL)	/* must be supported */
 		return (EINVAL);
 
 	/* If it's in the span list, it can't be a member. */
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
 			return (EBUSY);
 
 	if (ifs->if_bridge == sc)
 		return (EEXIST);
 
 	if (ifs->if_bridge != NULL)
 		return (EBUSY);
 
 	switch (ifs->if_type) {
 	case IFT_ETHER:
 	case IFT_L2VLAN:
 	case IFT_GIF:
 		/* permitted interface types */
 		break;
 	default:
 		return (EINVAL);
 	}
 
 #ifdef INET6
 	/*
 	 * Two valid inet6 addresses with link-local scope must not be
 	 * on the parent interface and the member interfaces at the
 	 * same time.  This restriction is needed to prevent violation
 	 * of link-local scope zone.  Attempts to add a member
 	 * interface which has inet6 addresses when the parent has
 	 * inet6 triggers removal of all inet6 addresses on the member
 	 * interface.
 	 */
 
 	/* Check if the parent interface has a link-local scope addr. */
 	if (V_allow_llz_overlap == 0 &&
 	    in6ifa_llaonifp(sc->sc_ifp) != NULL) {
 		/*
 		 * If any, remove all inet6 addresses from the member
 		 * interfaces.
 		 */
 		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
  			if (in6ifa_llaonifp(bif->bif_ifp)) {
 				in6_ifdetach(bif->bif_ifp);
 				if_printf(sc->sc_ifp,
 				    "IPv6 addresses on %s have been removed "
 				    "before adding it as a member to prevent "
 				    "IPv6 address scope violation.\n",
 				    bif->bif_ifp->if_xname);
 			}
 		}
 		if (in6ifa_llaonifp(ifs)) {
 			in6_ifdetach(ifs);
 			if_printf(sc->sc_ifp,
 			    "IPv6 addresses on %s have been removed "
 			    "before adding it as a member to prevent "
 			    "IPv6 address scope violation.\n",
 			    ifs->if_xname);
 		}
 	}
 #endif
 	/* Allow the first Ethernet member to define the MTU */
 	if (CK_LIST_EMPTY(&sc->sc_iflist))
 		sc->sc_ifp->if_mtu = ifs->if_mtu;
 	else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
 		struct ifreq ifr;
 
 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s",
 		    ifs->if_xname);
 		ifr.ifr_mtu = sc->sc_ifp->if_mtu;
 
 		error = (*ifs->if_ioctl)(ifs,
 		    SIOCSIFMTU, (caddr_t)&ifr);
 		if (error != 0) {
 			log(LOG_NOTICE, "%s: invalid MTU: %u for"
 			    " new member %s\n", sc->sc_ifp->if_xname,
 			    ifr.ifr_mtu,
 			    ifs->if_xname);
 			return (EINVAL);
 		}
 	}
 
 	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (bif == NULL)
 		return (ENOMEM);
 
 	bif->bif_ifp = ifs;
 	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
 	bif->bif_savedcaps = ifs->if_capenable;
 
 	/*
 	 * Assign the interface's MAC address to the bridge if it's the first
 	 * member and the MAC address of the bridge has not been changed from
 	 * the default randomly generated one.
 	 */
 	if (V_bridge_inherit_mac && CK_LIST_EMPTY(&sc->sc_iflist) &&
 	    !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) {
 		bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 		sc->sc_ifaddr = ifs;
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	}
 
 	ifs->if_bridge = sc;
 	ifs->if_bridge_output = bridge_output;
 	ifs->if_bridge_input = bridge_input;
 	ifs->if_bridge_linkstate = bridge_linkstate;
 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
 	/*
 	 * XXX: XLOCK HERE!?!
 	 *
 	 * NOTE: insert_***HEAD*** should be safe for the traversals.
 	 */
 	CK_LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
 
 	/* Set interface capabilities to the intersection set of all members */
 	bridge_mutecaps(sc);
 	bridge_linkcheck(sc);
 
 	/* Place the interface into promiscuous mode */
 	switch (ifs->if_type) {
 		case IFT_ETHER:
 		case IFT_L2VLAN:
 			error = ifpromisc(ifs, 1);
 			break;
 	}
 
 	if (error)
 		bridge_delete_member(sc, bif, 0);
 	return (error);
 }
 
 static int
 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	bridge_delete_member(sc, bif, 0);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 	struct bstp_port *bp;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	bp = &bif->bif_stp;
 	req->ifbr_ifsflags = bif->bif_flags;
 	req->ifbr_state = bp->bp_state;
 	req->ifbr_priority = bp->bp_priority;
 	req->ifbr_path_cost = bp->bp_path_cost;
 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
 	req->ifbr_proto = bp->bp_protover;
 	req->ifbr_role = bp->bp_role;
 	req->ifbr_stpflags = bp->bp_flags;
 	req->ifbr_addrcnt = bif->bif_addrcnt;
 	req->ifbr_addrmax = bif->bif_addrmax;
 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
 
 	/* Copy STP state options as flags */
 	if (bp->bp_operedge)
 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE)
 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
 	if (bp->bp_ptp_link)
 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
 	if (bp->bp_flags & BSTP_PORT_AUTOPTP)
 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
 	if (bp->bp_flags & BSTP_PORT_ADMEDGE)
 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
 	if (bp->bp_flags & BSTP_PORT_ADMCOST)
 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
 	return (0);
 }
 
 static int
 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
 {
 	struct epoch_tracker et;
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 	struct bstp_port *bp;
 	int error;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 	bp = &bif->bif_stp;
 
 	if (req->ifbr_ifsflags & IFBIF_SPAN)
 		/* SPAN is readonly */
 		return (EINVAL);
 
 	NET_EPOCH_ENTER(et);
 
 	if (req->ifbr_ifsflags & IFBIF_STP) {
 		if ((bif->bif_flags & IFBIF_STP) == 0) {
 			error = bstp_enable(&bif->bif_stp);
 			if (error) {
 				NET_EPOCH_EXIT(et);
 				return (error);
 			}
 		}
 	} else {
 		if ((bif->bif_flags & IFBIF_STP) != 0)
 			bstp_disable(&bif->bif_stp);
 	}
 
 	/* Pass on STP flags */
 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
 
 	/* Save the bits relating to the bridge */
 	bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
 
 	NET_EPOCH_EXIT(et);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	sc->sc_brtmax = param->ifbrp_csize;
 	bridge_rttrim(sc);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	param->ifbrp_csize = sc->sc_brtmax;
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
 {
 	struct ifbifconf *bifc = arg;
 	struct bridge_iflist *bif;
 	struct ifbreq breq;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
 
 	count = 0;
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
 		count++;
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		count++;
 
 	buflen = sizeof(breq) * count;
 	if (bifc->ifbic_len == 0) {
 		bifc->ifbic_len = buflen;
 		return (0);
 	}
 	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
 	if (outbuf == NULL)
 		return (ENOMEM);
 
 	count = 0;
 	buf = outbuf;
 	len = min(bifc->ifbic_len, buflen);
 	bzero(&breq, sizeof(breq));
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (len < sizeof(breq))
 			break;
 
 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
 		    sizeof(breq.ifbr_ifsname));
 		/* Fill in the ifbreq structure */
 		error = bridge_ioctl_gifflags(sc, &breq);
 		if (error)
 			break;
 		memcpy(buf, &breq, sizeof(breq));
 		count++;
 		buf += sizeof(breq);
 		len -= sizeof(breq);
 	}
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
 		if (len < sizeof(breq))
 			break;
 
 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
 		    sizeof(breq.ifbr_ifsname));
 		breq.ifbr_ifsflags = bif->bif_flags;
 		breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff;
 		memcpy(buf, &breq, sizeof(breq));
 		count++;
 		buf += sizeof(breq);
 		len -= sizeof(breq);
 	}
 
 	bifc->ifbic_len = sizeof(breq) * count;
 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);
 	free(outbuf, M_TEMP);
 	return (error);
 }
 
 static int
 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
 {
 	struct ifbaconf *bac = arg;
 	struct bridge_rtnode *brt;
 	struct ifbareq bareq;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
 
 	if (bac->ifbac_len == 0)
 		return (0);
 
 	count = 0;
 	CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)
 		count++;
 	buflen = sizeof(bareq) * count;
 
 	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
 	if (outbuf == NULL)
 		return (ENOMEM);
 
 	count = 0;
 	buf = outbuf;
 	len = min(bac->ifbac_len, buflen);
 	bzero(&bareq, sizeof(bareq));
 	CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
 		if (len < sizeof(bareq))
 			goto out;
 		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
 		    sizeof(bareq.ifba_ifsname));
 		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
 		bareq.ifba_vlan = brt->brt_vlan;
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
 				time_uptime < brt->brt_expire)
 			bareq.ifba_expire = brt->brt_expire - time_uptime;
 		else
 			bareq.ifba_expire = 0;
 		bareq.ifba_flags = brt->brt_flags;
 
 		memcpy(buf, &bareq, sizeof(bareq));
 		count++;
 		buf += sizeof(bareq);
 		len -= sizeof(bareq);
 	}
 out:
 	bac->ifbac_len = sizeof(bareq) * count;
 	error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);
 	free(outbuf, M_TEMP);
 	return (error);
 }
 
 static int
 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
 {
 	struct ifbareq *req = arg;
 	struct bridge_iflist *bif;
 	struct epoch_tracker et;
 	int error;
 
 	NET_EPOCH_ENTER(et);
 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
 	if (bif == NULL) {
 		NET_EPOCH_EXIT(et);
 		return (ENOENT);
 	}
 
 	/* bridge_rtupdate() may acquire the lock. */
 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
 	    req->ifba_flags);
 	NET_EPOCH_EXIT(et);
 
 	return (error);
 }
 
 static int
 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	sc->sc_brttimeout = param->ifbrp_ctime;
 	return (0);
 }
 
 static int
 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	param->ifbrp_ctime = sc->sc_brttimeout;
 	return (0);
 }
 
 static int
 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
 {
 	struct ifbareq *req = arg;
 
 	return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan));
 }
 
 static int
 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 
 	BRIDGE_RT_LOCK(sc);
 	bridge_rtflush(sc, req->ifbr_ifsflags);
 	BRIDGE_RT_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_prio = bs->bs_bridge_priority;
 	return (0);
 }
 
 static int
 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
 }
 
 static int
 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
 	return (0);
 }
 
 static int
 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
 }
 
 static int
 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
 	return (0);
 }
 
 static int
 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
 }
 
 static int
 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 
 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
 	return (0);
 }
 
 static int
 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
 }
 
 static int
 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
 }
 
 static int
 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
 }
 
 static int
 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
 	bif->bif_addrmax = req->ifbr_addrmax;
 	return (0);
 }
 
 static int
 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif = NULL;
 	struct ifnet *ifs;
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
 
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
 			return (EBUSY);
 
 	if (ifs->if_bridge != NULL)
 		return (EBUSY);
 
 	switch (ifs->if_type) {
 		case IFT_ETHER:
 		case IFT_GIF:
 		case IFT_L2VLAN:
 			break;
 		default:
 			return (EINVAL);
 	}
 
 	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (bif == NULL)
 		return (ENOMEM);
 
 	bif->bif_ifp = ifs;
 	bif->bif_flags = IFBIF_SPAN;
 
 	CK_LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 	struct ifnet *ifs;
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
 
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
 			break;
 
 	if (bif == NULL)
 		return (ENOENT);
 
 	bridge_delete_span(sc, bif);
 
 	return (0);
 }
 
 static int
 bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg)
 {
 	struct ifbropreq *req = arg;
 	struct bstp_state *bs = &sc->sc_stp;
 	struct bstp_port *root_port;
 
 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;
 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;
 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;
 
 	root_port = bs->bs_root_port;
 	if (root_port == NULL)
 		req->ifbop_root_port = 0;
 	else
 		req->ifbop_root_port = root_port->bp_ifp->if_index;
 
 	req->ifbop_holdcount = bs->bs_txholdcount;
 	req->ifbop_priority = bs->bs_bridge_priority;
 	req->ifbop_protocol = bs->bs_protover;
 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;
 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;
 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;
 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;
 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;
 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;
 
 	return (0);
 }
 
 static int
 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
 	return (0);
 }
 
 static int
 bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg)
 {
 	struct ifbpstpconf *bifstp = arg;
 	struct bridge_iflist *bif;
 	struct bstp_port *bp;
 	struct ifbpstpreq bpreq;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
 
 	count = 0;
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if ((bif->bif_flags & IFBIF_STP) != 0)
 			count++;
 	}
 
 	buflen = sizeof(bpreq) * count;
 	if (bifstp->ifbpstp_len == 0) {
 		bifstp->ifbpstp_len = buflen;
 		return (0);
 	}
 
 	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
 	if (outbuf == NULL)
 		return (ENOMEM);
 
 	count = 0;
 	buf = outbuf;
 	len = min(bifstp->ifbpstp_len, buflen);
 	bzero(&bpreq, sizeof(bpreq));
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (len < sizeof(bpreq))
 			break;
 
 		if ((bif->bif_flags & IFBIF_STP) == 0)
 			continue;
 
 		bp = &bif->bif_stp;
 		bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;
 		bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;
 		bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;
 		bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;
 		bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id;
 		bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;
 
 		memcpy(buf, &bpreq, sizeof(bpreq));
 		count++;
 		buf += sizeof(bpreq);
 		len -= sizeof(bpreq);
 	}
 
 	bifstp->ifbpstp_len = sizeof(bpreq) * count;
 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);
 	free(outbuf, M_TEMP);
 	return (error);
 }
 
 static int
 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
 }
 
 static int
 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
 
 	return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
 }
 
 /*
  * bridge_ifdetach:
  *
  *	Detach an interface from a bridge.  Called when a member
  *	interface is detaching.
  */
 static void
 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_iflist *bif;
 
 	if (ifp->if_flags & IFF_RENAMING)
 		return;
 	if (V_bridge_cloner == NULL) {
 		/*
 		 * This detach handler can be called after
 		 * vnet_bridge_uninit().  Just return in that case.
 		 */
 		return;
 	}
 	/* Check if the interface is a bridge member */
 	if (sc != NULL) {
 		BRIDGE_LOCK(sc);
 
 		bif = bridge_lookup_member_if(sc, ifp);
 		if (bif != NULL)
 			bridge_delete_member(sc, bif, 1);
 
 		BRIDGE_UNLOCK(sc);
 		return;
 	}
 
 	/* Check if the interface is a span port */
 	BRIDGE_LIST_LOCK();
 	LIST_FOREACH(sc, &V_bridge_list, sc_list) {
 		BRIDGE_LOCK(sc);
 		CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 			if (ifp == bif->bif_ifp) {
 				bridge_delete_span(sc, bif);
 				break;
 			}
 
 		BRIDGE_UNLOCK(sc);
 	}
 	BRIDGE_LIST_UNLOCK();
 }
 
 /*
  * bridge_init:
  *
  *	Initialize a bridge interface.
  */
 static void
 bridge_init(void *xsc)
 {
 	struct bridge_softc *sc = (struct bridge_softc *)xsc;
 	struct ifnet *ifp = sc->sc_ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	BRIDGE_LOCK(sc);
 	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
 	    bridge_timer, sc);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	bstp_init(&sc->sc_stp);		/* Initialize Spanning Tree */
 
 	BRIDGE_UNLOCK(sc);
 }
 
 /*
  * bridge_stop:
  *
  *	Stop the bridge interface.
  */
 static void
 bridge_stop(struct ifnet *ifp, int disable)
 {
 	struct bridge_softc *sc = ifp->if_softc;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	BRIDGE_RT_LOCK(sc);
 	callout_stop(&sc->sc_brcallout);
 
 	bstp_stop(&sc->sc_stp);
 
 	bridge_rtflush(sc, IFBF_FLUSHDYN);
 	BRIDGE_RT_UNLOCK(sc);
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 }
 
 /*
  * bridge_enqueue:
  *
  *	Enqueue a packet on a bridge member interface.
  *
  */
 static int
 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 {
 	int len, err = 0;
 	short mflags;
 	struct mbuf *m0;
 
 	/* We may be sending a fragment so traverse the mbuf */
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		len = m->m_pkthdr.len;
 		mflags = m->m_flags;
 
 		/*
 		 * If underlying interface can not do VLAN tag insertion itself
 		 * then attach a packet tag that holds it.
 		 */
 		if ((m->m_flags & M_VLANTAG) &&
 		    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
 			m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
 			if (m == NULL) {
 				if_printf(dst_ifp,
 				    "unable to prepend VLAN header\n");
 				if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 			m->m_flags &= ~M_VLANTAG;
 		}
 
 		M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */
 		if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
 			int n;
 
 			for (m = m0, n = 1; m != NULL; m = m0, n++) {
 				m0 = m->m_nextpkt;
 				m_freem(m);
 			}
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, n);
 			break;
 		}
 
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
 		if (mflags & M_MCAST)
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1);
 	}
 
 	return (err);
 }
 
 /*
  * bridge_dummynet:
  *
  * 	Receive a queued packet from dummynet and pass it on to the output
  * 	interface.
  *
  *	The mbuf has the Ethernet header already attached.
  */
 static void
 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
 {
 	struct bridge_softc *sc;
 
 	sc = ifp->if_bridge;
 
 	/*
 	 * The packet didnt originate from a member interface. This should only
 	 * ever happen if a member interface is removed while packets are
 	 * queued for it.
 	 */
 	if (sc == NULL) {
 		m_freem(m);
 		return;
 	}
 
 	if (PFIL_HOOKED_OUT(V_inet_pfil_head)
 #ifdef INET6
 	    || PFIL_HOOKED_OUT(V_inet6_pfil_head)
 #endif
 	    ) {
 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
 			return;
 		if (m == NULL)
 			return;
 	}
 
 	bridge_enqueue(sc, ifp, m);
 }
 
 /*
  * bridge_output:
  *
  *	Send output from a bridge member interface.  This
  *	performs the bridging function for locally originated
  *	packets.
  *
  *	The mbuf has the Ethernet header already attached.  We must
  *	enqueue or free the mbuf before returning.
  */
 static int
 bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
     struct rtentry *rt)
 {
 	struct ether_header *eh;
 	struct ifnet *bifp, *dst_if;
 	struct bridge_softc *sc;
 	uint16_t vlan;
 
 	NET_EPOCH_ASSERT();
 
 	if (m->m_len < ETHER_HDR_LEN) {
 		m = m_pullup(m, ETHER_HDR_LEN);
 		if (m == NULL)
 			return (0);
 	}
 
 	eh = mtod(m, struct ether_header *);
 	sc = ifp->if_bridge;
 	vlan = VLANTAGOF(m);
 
 	bifp = sc->sc_ifp;
 
 	/*
 	 * If bridge is down, but the original output interface is up,
 	 * go ahead and send out that interface.  Otherwise, the packet
 	 * is dropped below.
 	 */
 	if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		dst_if = ifp;
 		goto sendunicast;
 	}
 
 	/*
 	 * If the packet is a multicast, or we don't know a better way to
 	 * get there, send to all interfaces.
 	 */
 	if (ETHER_IS_MULTICAST(eh->ether_dhost))
 		dst_if = NULL;
 	else
 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
 	/* Tap any traffic not passing back out the originating interface */
 	if (dst_if != ifp)
 		ETHER_BPF_MTAP(bifp, m);
 	if (dst_if == NULL) {
 		struct bridge_iflist *bif;
 		struct mbuf *mc;
 		int used = 0;
 
 		bridge_span(sc, m);
 
 		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 			dst_if = bif->bif_ifp;
 
 			if (dst_if->if_type == IFT_GIF)
 				continue;
 			if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				continue;
 
 			/*
 			 * If this is not the original output interface,
 			 * and the interface is participating in spanning
 			 * tree, make sure the port is in a state that
 			 * allows forwarding.
 			 */
 			if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) &&
 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 				continue;
 
 			if (CK_LIST_NEXT(bif, bif_next) == NULL) {
 				used = 1;
 				mc = m;
 			} else {
 				mc = m_dup(m, M_NOWAIT);
 				if (mc == NULL) {
 					if_inc_counter(bifp, IFCOUNTER_OERRORS, 1);
 					continue;
 				}
 			}
 
 			bridge_enqueue(sc, dst_if, mc);
 		}
 		if (used == 0)
 			m_freem(m);
 		return (0);
 	}
 
 sendunicast:
 	/*
 	 * XXX Spanning tree consideration here?
 	 */
 
 	bridge_span(sc, m);
 	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		m_freem(m);
 		return (0);
 	}
 
 	bridge_enqueue(sc, dst_if, m);
 	return (0);
 }
 
 /*
  * bridge_transmit:
  *
  *	Do output on a bridge.
  *
  */
 static int
 bridge_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct bridge_softc *sc;
 	struct ether_header *eh;
 	struct ifnet *dst_if;
 	int error = 0;
 
 	sc = ifp->if_softc;
 
 	ETHER_BPF_MTAP(ifp, m);
 
 	eh = mtod(m, struct ether_header *);
 
 	if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
 	    (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) {
 		error = bridge_enqueue(sc, dst_if, m);
 	} else
 		bridge_broadcast(sc, ifp, m, 0);
 
 	return (error);
 }
 
 #ifdef ALTQ
 static void
 bridge_altq_start(if_t ifp)
 {
 	struct ifaltq *ifq = &ifp->if_snd;
 	struct mbuf *m;
 
 	IFQ_LOCK(ifq);
 	IFQ_DEQUEUE_NOLOCK(ifq, m);
 	while (m != NULL) {
 		bridge_transmit(ifp, m);
 		IFQ_DEQUEUE_NOLOCK(ifq, m);
 	}
 	IFQ_UNLOCK(ifq);
 }
 
 static int
 bridge_altq_transmit(if_t ifp, struct mbuf *m)
 {
 	int err;
 
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		IFQ_ENQUEUE(&ifp->if_snd, m, err);
 		if (err == 0)
 			bridge_altq_start(ifp);
 	} else
 		err = bridge_transmit(ifp, m);
 
 	return (err);
 }
 #endif	/* ALTQ */
 
 /*
  * The ifp->if_qflush entry point for if_bridge(4) is no-op.
  */
 static void
 bridge_qflush(struct ifnet *ifp __unused)
 {
 }
 
 /*
  * bridge_forward:
  *
  *	The forwarding function of the bridge.
  *
  *	NOTE: Releases the lock on return.
  */
 static void
 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
     struct mbuf *m)
 {
 	struct bridge_iflist *dbif;
 	struct ifnet *src_if, *dst_if, *ifp;
 	struct ether_header *eh;
 	uint16_t vlan;
 	uint8_t *dst;
 	int error;
 
 	NET_EPOCH_ASSERT();
 
 	src_if = m->m_pkthdr.rcvif;
 	ifp = sc->sc_ifp;
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	vlan = VLANTAGOF(m);
 
 	if ((sbif->bif_flags & IFBIF_STP) &&
 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 		goto drop;
 
 	eh = mtod(m, struct ether_header *);
 	dst = eh->ether_dhost;
 
 	/* If the interface is learning, record the address. */
 	if (sbif->bif_flags & IFBIF_LEARNING) {
 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
 		    sbif, 0, IFBAF_DYNAMIC);
 		/*
 		 * If the interface has addresses limits then deny any source
 		 * that is not in the cache.
 		 */
 		if (error && sbif->bif_addrmax)
 			goto drop;
 	}
 
 	if ((sbif->bif_flags & IFBIF_STP) != 0 &&
 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING)
 		goto drop;
 
 	/*
 	 * At this point, the port either doesn't participate
 	 * in spanning tree or it is in the forwarding state.
 	 */
 
 	/*
 	 * If the packet is unicast, destined for someone on
 	 * "this" side of the bridge, drop it.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
 		dst_if = bridge_rtlookup(sc, dst, vlan);
 		if (src_if == dst_if)
 			goto drop;
 	} else {
 		/*
 		 * Check if its a reserved multicast address, any address
 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
 		 * bridge.
 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
 		 */
 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
 		    dst[4] == 0x00 && dst[5] <= 0x0f)
 			goto drop;
 
 		/* ...forward it to all interfaces. */
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 		dst_if = NULL;
 	}
 
 	/*
 	 * If we have a destination interface which is a member of our bridge,
 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
 	 * For broadcast or multicast packets, don't bother because it will
 	 * be reinjected into ether_input. We do this before we pass the packets
 	 * through the pfil(9) framework, as it is possible that pfil(9) will
 	 * drop the packet, or possibly modify it, making it difficult to debug
 	 * firewall issues on the bridge.
 	 */
 	if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0)
 		ETHER_BPF_MTAP(ifp, m);
 
 	/* run the packet filter */
 	if (PFIL_HOOKED_IN(V_inet_pfil_head)
 #ifdef INET6
 	    || PFIL_HOOKED_IN(V_inet6_pfil_head)
 #endif
 	    ) {
 		if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
 			return;
 		if (m == NULL)
 			return;
 	}
 
 	if (dst_if == NULL) {
 		bridge_broadcast(sc, src_if, m, 1);
 		return;
 	}
 
 	/*
 	 * At this point, we're dealing with a unicast frame
 	 * going to a different interface.
 	 */
 	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		goto drop;
 
 	dbif = bridge_lookup_member_if(sc, dst_if);
 	if (dbif == NULL)
 		/* Not a member of the bridge (anymore?) */
 		goto drop;
 
 	/* Private segments can not talk to each other */
 	if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)
 		goto drop;
 
 	if ((dbif->bif_flags & IFBIF_STP) &&
 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 		goto drop;
 
 	if (PFIL_HOOKED_OUT(V_inet_pfil_head)
 #ifdef INET6
 	    || PFIL_HOOKED_OUT(V_inet6_pfil_head)
 #endif
 	    ) {
 		if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
 			return;
 		if (m == NULL)
 			return;
 	}
 
 	bridge_enqueue(sc, dst_if, m);
 	return;
 
 drop:
 	m_freem(m);
 }
 
 /*
  * bridge_input:
  *
  *	Receive input from a member interface.  Queue the packet for
  *	bridging if it is not for us.
  */
 static struct mbuf *
 bridge_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_iflist *bif, *bif2;
 	struct ifnet *bifp;
 	struct ether_header *eh;
 	struct mbuf *mc, *mc2;
 	uint16_t vlan;
 	int error;
 
 	NET_EPOCH_ASSERT();
 
 	if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return (m);
 
 	bifp = sc->sc_ifp;
 	vlan = VLANTAGOF(m);
 
 	/*
 	 * Implement support for bridge monitoring. If this flag has been
 	 * set on this interface, discard the packet once we push it through
 	 * the bpf(4) machinery, but before we do, increment the byte and
 	 * packet counters associated with this interface.
 	 */
 	if ((bifp->if_flags & IFF_MONITOR) != 0) {
 		m->m_pkthdr.rcvif  = bifp;
 		ETHER_BPF_MTAP(bifp, m);
 		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 		m_freem(m);
 		return (NULL);
 	}
 	bif = bridge_lookup_member_if(sc, ifp);
 	if (bif == NULL) {
 		return (m);
 	}
 
 	eh = mtod(m, struct ether_header *);
 
 	bridge_span(sc, m);
 
 	if (m->m_flags & (M_BCAST|M_MCAST)) {
 		/* Tap off 802.1D packets; they do not get forwarded. */
 		if (memcmp(eh->ether_dhost, bstp_etheraddr,
 		    ETHER_ADDR_LEN) == 0) {
 			bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */
 			return (NULL);
 		}
 
 		if ((bif->bif_flags & IFBIF_STP) &&
 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
 			return (m);
 		}
 
 		/*
 		 * Make a deep copy of the packet and enqueue the copy
 		 * for bridge processing; return the original packet for
 		 * local processing.
 		 */
 		mc = m_dup(m, M_NOWAIT);
 		if (mc == NULL) {
 			return (m);
 		}
 
 		/* Perform the bridge forwarding function with the copy. */
 		bridge_forward(sc, bif, mc);
 
 		/*
 		 * Reinject the mbuf as arriving on the bridge so we have a
 		 * chance at claiming multicast packets. We can not loop back
 		 * here from ether_input as a bridge is never a member of a
 		 * bridge.
 		 */
 		KASSERT(bifp->if_bridge == NULL,
 		    ("loop created in bridge_input"));
 		mc2 = m_dup(m, M_NOWAIT);
 		if (mc2 != NULL) {
 			/* Keep the layer3 header aligned */
 			int i = min(mc2->m_pkthdr.len, max_protohdr);
 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
 		}
 		if (mc2 != NULL) {
 			mc2->m_pkthdr.rcvif = bifp;
 			(*bifp->if_input)(bifp, mc2);
 		}
 
 		/* Return the original packet for local processing. */
 		return (m);
 	}
 
 	if ((bif->bif_flags & IFBIF_STP) &&
 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
 		return (m);
 	}
 
 #if (defined(INET) || defined(INET6))
 #   define OR_CARP_CHECK_WE_ARE_DST(iface) \
 	|| ((iface)->if_carp \
 	    && (*carp_forus_p)((iface), eh->ether_dhost))
 #   define OR_CARP_CHECK_WE_ARE_SRC(iface) \
 	|| ((iface)->if_carp \
 	    && (*carp_forus_p)((iface), eh->ether_shost))
 #else
 #   define OR_CARP_CHECK_WE_ARE_DST(iface)
 #   define OR_CARP_CHECK_WE_ARE_SRC(iface)
 #endif
 
 #ifdef INET6
 #   define OR_PFIL_HOOKED_INET6 \
 	|| PFIL_HOOKED_IN(V_inet6_pfil_head)
 #else
 #   define OR_PFIL_HOOKED_INET6
 #endif
 
 #define GRAB_OUR_PACKETS(iface) \
 	if ((iface)->if_type == IFT_GIF) \
 		continue; \
 	/* It is destined for us. */ \
 	if (memcmp(IF_LLADDR((iface)), eh->ether_dhost,  ETHER_ADDR_LEN) == 0 \
 	    OR_CARP_CHECK_WE_ARE_DST((iface))				\
 	    ) {								\
 		if (bif->bif_flags & IFBIF_LEARNING) {			\
 			error = bridge_rtupdate(sc, eh->ether_shost,	\
 			    vlan, bif, 0, IFBAF_DYNAMIC);		\
 			if (error && bif->bif_addrmax) {		\
 				m_freem(m);				\
 				return (NULL);				\
 			}						\
 		}							\
 		m->m_pkthdr.rcvif = iface;				\
 		if ((iface) == ifp) {					\
 			/* Skip bridge processing... src == dest */	\
 			return (m);					\
 		}							\
 		/* It's passing over or to the bridge, locally. */	\
 		ETHER_BPF_MTAP(bifp, m);				\
 		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);		\
 		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
 		/* Filter on the physical interface. */			\
 		if (V_pfil_local_phys && (PFIL_HOOKED_IN(V_inet_pfil_head) \
 		     OR_PFIL_HOOKED_INET6)) {				\
 			if (bridge_pfil(&m, NULL, ifp,			\
 			    PFIL_IN) != 0 || m == NULL) {		\
 				return (NULL);				\
 			}						\
 		}							\
 		if ((iface) != bifp)					\
 			ETHER_BPF_MTAP(iface, m);			\
 		return (m);						\
 	}								\
 									\
 	/* We just received a packet that we sent out. */		\
 	if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \
 	    OR_CARP_CHECK_WE_ARE_SRC((iface))			\
 	    ) {								\
 		m_freem(m);						\
 		return (NULL);						\
 	}
 
 	/*
 	 * Unicast.  Make sure it's not for the bridge.
 	 */
 	do { GRAB_OUR_PACKETS(bifp) } while (0);
 
 	/*
 	 * Give a chance for ifp at first priority. This will help when	the
 	 * packet comes through the interface like VLAN's with the same MACs
 	 * on several interfaces from the same bridge. This also will save
 	 * some CPU cycles in case the destination interface and the input
 	 * interface (eq ifp) are the same.
 	 */
 	do { GRAB_OUR_PACKETS(ifp) } while (0);
 
 	/* Now check the all bridge members. */
 	CK_LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
 		GRAB_OUR_PACKETS(bif2->bif_ifp)
 	}
 
 #undef OR_CARP_CHECK_WE_ARE_DST
 #undef OR_CARP_CHECK_WE_ARE_SRC
 #undef OR_PFIL_HOOKED_INET6
 #undef GRAB_OUR_PACKETS
 
 	/* Perform the bridge forwarding function. */
 	bridge_forward(sc, bif, m);
 
 	return (NULL);
 }
 
 /*
  * bridge_broadcast:
  *
  *	Send a frame to all interfaces that are members of
  *	the bridge, except for the one on which the packet
  *	arrived.
  *
  *	NOTE: Releases the lock on return.
  */
 static void
 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
     struct mbuf *m, int runfilt)
 {
 	struct bridge_iflist *dbif, *sbif;
 	struct mbuf *mc;
 	struct ifnet *dst_if;
 	int used = 0, i;
 
 	NET_EPOCH_ASSERT();
 
 	sbif = bridge_lookup_member_if(sc, src_if);
 
 	/* Filter on the bridge interface before broadcasting */
 	if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head)
 #ifdef INET6
 	    || PFIL_HOOKED_OUT(V_inet6_pfil_head)
 #endif
 	    )) {
 		if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
 			return;
 		if (m == NULL)
 			return;
 	}
 
 	CK_LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) {
 		dst_if = dbif->bif_ifp;
 		if (dst_if == src_if)
 			continue;
 
 		/* Private segments can not talk to each other */
 		if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
 			continue;
 
 		if ((dbif->bif_flags & IFBIF_STP) &&
 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
 			continue;
 
 		if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 &&
 		    (m->m_flags & (M_BCAST|M_MCAST)) == 0)
 			continue;
 
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
 		if (CK_LIST_NEXT(dbif, bif_next) == NULL) {
 			mc = m;
 			used = 1;
 		} else {
 			mc = m_dup(m, M_NOWAIT);
 			if (mc == NULL) {
 				if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		}
 
 		/*
 		 * Filter on the output interface. Pass a NULL bridge interface
 		 * pointer so we do not redundantly filter on the bridge for
 		 * each interface we broadcast on.
 		 */
 		if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head)
 #ifdef INET6
 		    || PFIL_HOOKED_OUT(V_inet6_pfil_head)
 #endif
 		    )) {
 			if (used == 0) {
 				/* Keep the layer3 header aligned */
 				i = min(mc->m_pkthdr.len, max_protohdr);
 				mc = m_copyup(mc, i, ETHER_ALIGN);
 				if (mc == NULL) {
 					if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 					continue;
 				}
 			}
 			if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
 				continue;
 			if (mc == NULL)
 				continue;
 		}
 
 		bridge_enqueue(sc, dst_if, mc);
 	}
 	if (used == 0)
 		m_freem(m);
 }
 
 /*
  * bridge_span:
  *
  *	Duplicate a packet out one or more interfaces that are in span mode,
  *	the original mbuf is unmodified.
  */
 static void
 bridge_span(struct bridge_softc *sc, struct mbuf *m)
 {
 	struct bridge_iflist *bif;
 	struct ifnet *dst_if;
 	struct mbuf *mc;
 
 	NET_EPOCH_ASSERT();
 
 	if (CK_LIST_EMPTY(&sc->sc_spanlist))
 		return;
 
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
 		dst_if = bif->bif_ifp;
 
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
 		mc = m_dup(m, M_NOWAIT);
 		if (mc == NULL) {
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 			continue;
 		}
 
 		bridge_enqueue(sc, dst_if, mc);
 	}
 }
 
 /*
  * bridge_rtupdate:
  *
  *	Add a bridge routing entry.
  */
 static int
 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
     struct bridge_iflist *bif, int setflags, uint8_t flags)
 {
 	struct bridge_rtnode *brt;
 	int error;
 
 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
 
 	/* Check the source address is valid and not multicast. */
 	if (ETHER_IS_MULTICAST(dst) ||
 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
 	     dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
 		return (EINVAL);
 
 	/* 802.1p frames map to vlan 1 */
 	if (vlan == 0)
 		vlan = 1;
 
 	/*
 	 * A route for this destination might already exist.  If so,
 	 * update it, otherwise create a new one.
 	 */
 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
 		BRIDGE_RT_LOCK(sc);
 
 		/* Check again, now that we have the lock. There could have
 		 * been a race and we only want to insert this once. */
 		if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) != NULL) {
 			BRIDGE_RT_UNLOCK(sc);
 			return (0);
 		}
 
 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
 			sc->sc_brtexceeded++;
 			BRIDGE_RT_UNLOCK(sc);
 			return (ENOSPC);
 		}
 		/* Check per interface address limits (if enabled) */
 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
 			bif->bif_addrexceeded++;
 			BRIDGE_RT_UNLOCK(sc);
 			return (ENOSPC);
 		}
 
 		/*
 		 * Allocate a new bridge forwarding node, and
 		 * initialize the expiration time and Ethernet
 		 * address.
 		 */
 		brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO);
 		if (brt == NULL) {
 			BRIDGE_RT_UNLOCK(sc);
 			return (ENOMEM);
 		}
 		brt->brt_vnet = curvnet;
 
 		if (bif->bif_flags & IFBIF_STICKY)
 			brt->brt_flags = IFBAF_STICKY;
 		else
 			brt->brt_flags = IFBAF_DYNAMIC;
 
 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
 		brt->brt_vlan = vlan;
 
 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
 			uma_zfree(V_bridge_rtnode_zone, brt);
 			BRIDGE_RT_UNLOCK(sc);
 			return (error);
 		}
 		brt->brt_dst = bif;
 		bif->bif_addrcnt++;
 
 		BRIDGE_RT_UNLOCK(sc);
 	}
 
 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
 	    brt->brt_dst != bif) {
 		BRIDGE_RT_LOCK(sc);
 		brt->brt_dst->bif_addrcnt--;
 		brt->brt_dst = bif;
 		brt->brt_dst->bif_addrcnt++;
 		BRIDGE_RT_UNLOCK(sc);
 	}
 
 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
 		brt->brt_expire = time_uptime + sc->sc_brttimeout;
 	if (setflags)
 		brt->brt_flags = flags;
 
 	return (0);
 }
 
 /*
  * bridge_rtlookup:
  *
  *	Lookup the destination interface for an address.
  */
 static struct ifnet *
 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 
 	NET_EPOCH_ASSERT();
 
 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL)
 		return (NULL);
 
 	return (brt->brt_ifp);
 }
 
 /*
  * bridge_rttrim:
  *
  *	Trim the routine table so that we have a number
  *	of routing entries less than or equal to the
  *	maximum number.
  */
 static void
 bridge_rttrim(struct bridge_softc *sc)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	NET_EPOCH_ASSERT();
 	BRIDGE_RT_LOCK_ASSERT(sc);
 
 	/* Make sure we actually need to do this. */
 	if (sc->sc_brtcnt <= sc->sc_brtmax)
 		return;
 
 	/* Force an aging cycle; this might trim enough addresses. */
 	bridge_rtage(sc);
 	if (sc->sc_brtcnt <= sc->sc_brtmax)
 		return;
 
 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
 			bridge_rtnode_destroy(sc, brt);
 			if (sc->sc_brtcnt <= sc->sc_brtmax)
 				return;
 		}
 	}
 }
 
 /*
  * bridge_timer:
  *
  *	Aging timer for the bridge.
  */
 static void
 bridge_timer(void *arg)
 {
 	struct bridge_softc *sc = arg;
 
 	BRIDGE_RT_LOCK_ASSERT(sc);
 
 	/* Destruction of rtnodes requires a proper vnet context */
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 	bridge_rtage(sc);
 
 	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		callout_reset(&sc->sc_brcallout,
 		    bridge_rtable_prune_period * hz, bridge_timer, sc);
 	CURVNET_RESTORE();
 }
 
 /*
  * bridge_rtage:
  *
  *	Perform an aging cycle.
  */
 static void
 bridge_rtage(struct bridge_softc *sc)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_RT_LOCK_ASSERT(sc);
 
 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
 			if (time_uptime >= brt->brt_expire)
 				bridge_rtnode_destroy(sc, brt);
 		}
 	}
 }
 
 /*
  * bridge_rtflush:
  *
  *	Remove all dynamic addresses from the bridge.
  */
 static void
 bridge_rtflush(struct bridge_softc *sc, int full)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_RT_LOCK_ASSERT(sc);
 
 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
 			bridge_rtnode_destroy(sc, brt);
 	}
 }
 
 /*
  * bridge_rtdaddr:
  *
  *	Remove an address from the table.
  */
 static int
 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 	int found = 0;
 
 	BRIDGE_RT_LOCK(sc);
 
 	/*
 	 * If vlan is zero then we want to delete for all vlans so the lookup
 	 * may return more than one.
 	 */
 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
 		bridge_rtnode_destroy(sc, brt);
 		found = 1;
 	}
 
 	BRIDGE_RT_UNLOCK(sc);
 
 	return (found ? 0 : ENOENT);
 }
 
 /*
  * bridge_rtdelete:
  *
  *	Delete routes to a speicifc member interface.
  */
 static void
 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_RT_LOCK_ASSERT(sc);
 
 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if (brt->brt_ifp == ifp && (full ||
 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
 			bridge_rtnode_destroy(sc, brt);
 	}
 }
 
 /*
  * bridge_rtable_init:
  *
  *	Initialize the route table for this bridge.
  */
 static void
 bridge_rtable_init(struct bridge_softc *sc)
 {
 	int i;
 
 	sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
 	    M_DEVBUF, M_WAITOK);
 
 	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
 		CK_LIST_INIT(&sc->sc_rthash[i]);
 
 	sc->sc_rthash_key = arc4random();
 	CK_LIST_INIT(&sc->sc_rtlist);
 }
 
 /*
  * bridge_rtable_fini:
  *
  *	Deconstruct the route table for this bridge.
  */
 static void
 bridge_rtable_fini(struct bridge_softc *sc)
 {
 
 	KASSERT(sc->sc_brtcnt == 0,
 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
 	free(sc->sc_rthash, M_DEVBUF);
 }
 
 /*
  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
  */
 #define	mix(a, b, c)							\
 do {									\
 	a -= b; a -= c; a ^= (c >> 13);					\
 	b -= c; b -= a; b ^= (a << 8);					\
 	c -= a; c -= b; c ^= (b >> 13);					\
 	a -= b; a -= c; a ^= (c >> 12);					\
 	b -= c; b -= a; b ^= (a << 16);					\
 	c -= a; c -= b; c ^= (b >> 5);					\
 	a -= b; a -= c; a ^= (c >> 3);					\
 	b -= c; b -= a; b ^= (a << 10);					\
 	c -= a; c -= b; c ^= (b >> 15);					\
 } while (/*CONSTCOND*/0)
 
 static __inline uint32_t
 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
 {
 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
 
 	b += addr[5] << 8;
 	b += addr[4];
 	a += addr[3] << 24;
 	a += addr[2] << 16;
 	a += addr[1] << 8;
 	a += addr[0];
 
 	mix(a, b, c);
 
 	return (c & BRIDGE_RTHASH_MASK);
 }
 
 #undef mix
 
 static int
 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
 {
 	int i, d;
 
 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
 		d = ((int)a[i]) - ((int)b[i]);
 	}
 
 	return (d);
 }
 
 /*
  * bridge_rtnode_lookup:
  *
  *	Look up a bridge route node for the specified destination. Compare the
  *	vlan id or if zero then just return the first match.
  */
 static struct bridge_rtnode *
 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 	uint32_t hash;
 	int dir;
 
 	BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(sc);
 
 	hash = bridge_rthash(sc, addr);
 	CK_LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0))
 			return (brt);
 		if (dir > 0)
 			return (NULL);
 	}
 
 	return (NULL);
 }
 
 /*
  * bridge_rtnode_insert:
  *
  *	Insert the specified bridge node into the route table.  We
  *	assume the entry is not already in the table.
  */
 static int
 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
 {
 	struct bridge_rtnode *lbrt;
 	uint32_t hash;
 	int dir;
 
 	BRIDGE_RT_LOCK_ASSERT(sc);
 
 	hash = bridge_rthash(sc, brt->brt_addr);
 
 	lbrt = CK_LIST_FIRST(&sc->sc_rthash[hash]);
 	if (lbrt == NULL) {
 		CK_LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
 		goto out;
 	}
 
 	do {
 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
 			return (EEXIST);
 		if (dir > 0) {
 			CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
 			goto out;
 		}
 		if (CK_LIST_NEXT(lbrt, brt_hash) == NULL) {
 			CK_LIST_INSERT_AFTER(lbrt, brt, brt_hash);
 			goto out;
 		}
 		lbrt = CK_LIST_NEXT(lbrt, brt_hash);
 	} while (lbrt != NULL);
 
 #ifdef DIAGNOSTIC
 	panic("bridge_rtnode_insert: impossible");
 #endif
 
 out:
 	CK_LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
 	sc->sc_brtcnt++;
 
 	return (0);
 }
 
 static void
 bridge_rtnode_destroy_cb(struct epoch_context *ctx)
 {
 	struct bridge_rtnode *brt;
 
 	brt = __containerof(ctx, struct bridge_rtnode, brt_epoch_ctx);
 
 	CURVNET_SET(brt->brt_vnet);
 	uma_zfree(V_bridge_rtnode_zone, brt);
 	CURVNET_RESTORE();
 }
 
 /*
  * bridge_rtnode_destroy:
  *
  *	Destroy a bridge rtnode.
  */
 static void
 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
 {
 	BRIDGE_RT_LOCK_ASSERT(sc);
 
 	CK_LIST_REMOVE(brt, brt_hash);
 
 	CK_LIST_REMOVE(brt, brt_list);
 	sc->sc_brtcnt--;
 	brt->brt_dst->bif_addrcnt--;
 
 	NET_EPOCH_CALL(bridge_rtnode_destroy_cb, &brt->brt_epoch_ctx);
 }
 
 /*
  * bridge_rtable_expire:
  *
  *	Set the expiry time for all routes on an interface.
  */
 static void
 bridge_rtable_expire(struct ifnet *ifp, int age)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_rtnode *brt;
 
 	CURVNET_SET(ifp->if_vnet);
 	BRIDGE_RT_LOCK(sc);
 
 	/*
 	 * If the age is zero then flush, otherwise set all the expiry times to
 	 * age for the interface
 	 */
 	if (age == 0)
 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
 	else {
 		CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
 			/* Cap the expiry time to 'age' */
 			if (brt->brt_ifp == ifp &&
 			    brt->brt_expire > time_uptime + age &&
 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
 				brt->brt_expire = time_uptime + age;
 		}
 	}
 	BRIDGE_RT_UNLOCK(sc);
 	CURVNET_RESTORE();
 }
 
 /*
  * bridge_state_change:
  *
  *	Callback from the bridgestp code when a port changes states.
  */
 static void
 bridge_state_change(struct ifnet *ifp, int state)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	static const char *stpstates[] = {
 		"disabled",
 		"listening",
 		"learning",
 		"forwarding",
 		"blocking",
 		"discarding"
 	};
 
 	CURVNET_SET(ifp->if_vnet);
 	if (V_log_stp)
 		log(LOG_NOTICE, "%s: state changed to %s on %s\n",
 		    sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
 	CURVNET_RESTORE();
 }
 
 /*
  * Send bridge packets through pfil if they are one of the types pfil can deal
  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
  * that interface.
  */
 static int
 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 {
 	int snap, error, i, hlen;
 	struct ether_header *eh1, eh2;
 	struct ip *ip;
 	struct llc llc1;
 	u_int16_t ether_type;
 	pfil_return_t rv;
 
 	snap = 0;
 	error = -1;	/* Default error if not error == 0 */
 
 #if 0
 	/* we may return with the IP fields swapped, ensure its not shared */
 	KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
 #endif
 
 	if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0)
 		return (0); /* filtering is disabled */
 
 	i = min((*mp)->m_pkthdr.len, max_protohdr);
 	if ((*mp)->m_len < i) {
 	    *mp = m_pullup(*mp, i);
 	    if (*mp == NULL) {
 		printf("%s: m_pullup failed\n", __func__);
 		return (-1);
 	    }
 	}
 
 	eh1 = mtod(*mp, struct ether_header *);
 	ether_type = ntohs(eh1->ether_type);
 
 	/*
 	 * Check for SNAP/LLC.
 	 */
 	if (ether_type < ETHERMTU) {
 		struct llc *llc2 = (struct llc *)(eh1 + 1);
 
 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
 		    llc2->llc_control == LLC_UI) {
 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
 			snap = 1;
 		}
 	}
 
 	/*
 	 * If we're trying to filter bridge traffic, don't look at anything
 	 * other than IP and ARP traffic.  If the filter doesn't understand
 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
 	 * but of course we don't have an AppleTalk filter to begin with.
 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
 	 * ARP traffic.)
 	 */
 	switch (ether_type) {
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			if (V_pfil_ipfw_arp == 0)
 				return (0); /* Automatically pass */
 			break;
 
 		case ETHERTYPE_IP:
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 #endif /* INET6 */
 			break;
 		default:
 			/*
 			 * Check to see if the user wants to pass non-ip
 			 * packets, these will not be checked by pfil(9) and
 			 * passed unconditionally so the default is to drop.
 			 */
 			if (V_pfil_onlyip)
 				goto bad;
 	}
 
 	/* Run the packet through pfil before stripping link headers */
 	if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 &&
 	    dir == PFIL_OUT && ifp != NULL) {
-		switch (pfil_run_hooks(V_link_pfil_head, mp, ifp, dir, NULL)) {
+		switch (pfil_mbuf_out(V_link_pfil_head, mp, ifp, NULL)) {
 		case PFIL_DROPPED:
 			return (EACCES);
 		case PFIL_CONSUMED:
 			return (0);
 		}
 	}
 
 	/* Strip off the Ethernet header and keep a copy. */
 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
 	m_adj(*mp, ETHER_HDR_LEN);
 
 	/* Strip off snap header, if present */
 	if (snap) {
 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
 		m_adj(*mp, sizeof(struct llc));
 	}
 
 	/*
 	 * Check the IP header for alignment and errors
 	 */
 	if (dir == PFIL_IN) {
 		switch (ether_type) {
 			case ETHERTYPE_IP:
 				error = bridge_ip_checkbasic(mp);
 				break;
 #ifdef INET6
 			case ETHERTYPE_IPV6:
 				error = bridge_ip6_checkbasic(mp);
 				break;
 #endif /* INET6 */
 			default:
 				error = 0;
 		}
 		if (error)
 			goto bad;
 	}
 
 	error = 0;
 
 	/*
 	 * Run the packet through pfil
 	 */
 	rv = PFIL_PASS;
 	switch (ether_type) {
 	case ETHERTYPE_IP:
 		/*
 		 * Run pfil on the member interface and the bridge, both can
 		 * be skipped by clearing pfil_member or pfil_bridge.
 		 *
 		 * Keep the order:
 		 *   in_if -> bridge_if -> out_if
 		 */
 		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
-		    pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) !=
+		    pfil_mbuf_out(V_inet_pfil_head, mp, bifp, NULL)) !=
 		    PFIL_PASS)
 			break;
 
-		if (V_pfil_member && ifp != NULL && (rv =
-		    pfil_run_hooks(V_inet_pfil_head, mp, ifp, dir, NULL)) !=
-		    PFIL_PASS)
-			break;
+		if (V_pfil_member && ifp != NULL) {
+			rv = (dir == PFIL_OUT) ?
+			    pfil_mbuf_out(V_inet_pfil_head, mp, ifp, NULL) :
+			    pfil_mbuf_in(V_inet_pfil_head, mp, ifp, NULL);
+			if (rv != PFIL_PASS)
+				break;
+		}
 
 		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
-		    pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) !=
+		    pfil_mbuf_in(V_inet_pfil_head, mp, bifp, NULL)) !=
 		    PFIL_PASS)
 			break;
 
 		/* check if we need to fragment the packet */
 		/* bridge_fragment generates a mbuf chain of packets */
 		/* that already include eth headers */
 		if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) {
 			i = (*mp)->m_pkthdr.len;
 			if (i > ifp->if_mtu) {
 				error = bridge_fragment(ifp, mp, &eh2, snap,
 					    &llc1);
 				return (error);
 			}
 		}
 
 		/* Recalculate the ip checksum. */
 		ip = mtod(*mp, struct ip *);
 		hlen = ip->ip_hl << 2;
 		if (hlen < sizeof(struct ip))
 			goto bad;
 		if (hlen > (*mp)->m_len) {
 			if ((*mp = m_pullup(*mp, hlen)) == NULL)
 				goto bad;
 			ip = mtod(*mp, struct ip *);
 			if (ip == NULL)
 				goto bad;
 		}
 		ip->ip_sum = 0;
 		if (hlen == sizeof(struct ip))
 			ip->ip_sum = in_cksum_hdr(ip);
 		else
 			ip->ip_sum = in_cksum(*mp, hlen);
 
 		break;
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
-		    pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) !=
+		    pfil_mbuf_out(V_inet6_pfil_head, mp, bifp, NULL)) !=
 		    PFIL_PASS)
 			break;
 
-		if (V_pfil_member && ifp != NULL && (rv =
-		    pfil_run_hooks(V_inet6_pfil_head, mp, ifp, dir, NULL)) !=
-		    PFIL_PASS)
-			break;
+		if (V_pfil_member && ifp != NULL) {
+			rv = (dir == PFIL_OUT) ?
+			    pfil_mbuf_out(V_inet6_pfil_head, mp, ifp, NULL) :
+			    pfil_mbuf_in(V_inet6_pfil_head, mp, ifp, NULL);
+			if (rv != PFIL_PASS)
+				break;
+		}
 
 		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
-		    pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) !=
+		    pfil_mbuf_in(V_inet6_pfil_head, mp, bifp, NULL)) !=
 		    PFIL_PASS)
 			break;
 		break;
 #endif
 	}
 
 	switch (rv) {
 	case PFIL_CONSUMED:
 		return (0);
 	case PFIL_DROPPED:
 		return (EACCES);
 	default:
 		break;
 	}
 
 	error = -1;
 
 	/*
 	 * Finally, put everything back the way it was and return
 	 */
 	if (snap) {
 		M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
 		if (*mp == NULL)
 			return (error);
 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
 	}
 
 	M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
 	if (*mp == NULL)
 		return (error);
 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
 
 	return (0);
 
 bad:
 	m_freem(*mp);
 	*mp = NULL;
 	return (error);
 }
 
 /*
  * Perform basic checks on header size since
  * pfil assumes ip_input has already processed
  * it for it.  Cut-and-pasted from ip_input.c.
  * Given how simple the IPv6 version is,
  * does the IPv4 version really need to be
  * this complicated?
  *
  * XXX Should we update ipstat here, or not?
  * XXX Right now we update ipstat but not
  * XXX csum_counter.
  */
 static int
 bridge_ip_checkbasic(struct mbuf **mp)
 {
 	struct mbuf *m = *mp;
 	struct ip *ip;
 	int len, hlen;
 	u_short sum;
 
 	if (*mp == NULL)
 		return (-1);
 
 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
 		if ((m = m_copyup(m, sizeof(struct ip),
 			(max_linkhdr + 3) & ~3)) == NULL) {
 			/* XXXJRT new stat, please */
 			KMOD_IPSTAT_INC(ips_toosmall);
 			goto bad;
 		}
 	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
 		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
 			KMOD_IPSTAT_INC(ips_toosmall);
 			goto bad;
 		}
 	}
 	ip = mtod(m, struct ip *);
 	if (ip == NULL) goto bad;
 
 	if (ip->ip_v != IPVERSION) {
 		KMOD_IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) { /* minimum header length */
 		KMOD_IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			KMOD_IPSTAT_INC(ips_badhlen);
 			goto bad;
 		}
 		ip = mtod(m, struct ip *);
 		if (ip == NULL) goto bad;
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		KMOD_IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 	/* Retrieve the packet length. */
 	len = ntohs(ip->ip_len);
 
 	/*
 	 * Check for additional length bogosity
 	 */
 	if (len < hlen) {
 		KMOD_IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < len) {
 		KMOD_IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 
 	/* Checks out, proceed */
 	*mp = m;
 	return (0);
 
 bad:
 	*mp = m;
 	return (-1);
 }
 
 #ifdef INET6
 /*
  * Same as above, but for IPv6.
  * Cut-and-pasted from ip6_input.c.
  * XXX Should we update ip6stat, or not?
  */
 static int
 bridge_ip6_checkbasic(struct mbuf **mp)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6;
 
 	/*
 	 * If the IPv6 header is not aligned, slurp it up into a new
 	 * mbuf with space for link headers, in the event we forward
 	 * it.  Otherwise, if it is aligned, make sure the entire base
 	 * IPv6 header is in the first mbuf of the chain.
 	 */
 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
 		struct ifnet *inifp = m->m_pkthdr.rcvif;
 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
 			    (max_linkhdr + 3) & ~3)) == NULL) {
 			/* XXXJRT new stat, please */
 			IP6STAT_INC(ip6s_toosmall);
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
 			goto bad;
 		}
 	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
 		struct ifnet *inifp = m->m_pkthdr.rcvif;
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
 			IP6STAT_INC(ip6s_toosmall);
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
 			goto bad;
 		}
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		IP6STAT_INC(ip6s_badvers);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 		goto bad;
 	}
 
 	/* Checks out, proceed */
 	*mp = m;
 	return (0);
 
 bad:
 	*mp = m;
 	return (-1);
 }
 #endif /* INET6 */
 
 /*
  * bridge_fragment:
  *
  *	Fragment mbuf chain in multiple packets and prepend ethernet header.
  */
 static int
 bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh,
     int snap, struct llc *llc)
 {
 	struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL;
 	struct ip *ip;
 	int error = -1;
 
 	if (m->m_len < sizeof(struct ip) &&
 	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
 		goto dropit;
 	ip = mtod(m, struct ip *);
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
 	if (error)
 		goto dropit;
 
 	/*
 	 * Walk the chain and re-add the Ethernet header for
 	 * each mbuf packet.
 	 */
 	for (mcur = m; mcur; mcur = mcur->m_nextpkt) {
 		nextpkt = mcur->m_nextpkt;
 		mcur->m_nextpkt = NULL;
 		if (snap) {
 			M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT);
 			if (mcur == NULL) {
 				error = ENOBUFS;
 				if (mprev != NULL)
 					mprev->m_nextpkt = nextpkt;
 				goto dropit;
 			}
 			bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc));
 		}
 
 		M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT);
 		if (mcur == NULL) {
 			error = ENOBUFS;
 			if (mprev != NULL)
 				mprev->m_nextpkt = nextpkt;
 			goto dropit;
 		}
 		bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN);
 
 		/*
 		 * The previous two M_PREPEND could have inserted one or two
 		 * mbufs in front so we have to update the previous packet's
 		 * m_nextpkt.
 		 */
 		mcur->m_nextpkt = nextpkt;
 		if (mprev != NULL)
 			mprev->m_nextpkt = mcur;
 		else {
 			/* The first mbuf in the original chain needs to be
 			 * updated. */
 			*mp = mcur;
 		}
 		mprev = mcur;
 	}
 
 	KMOD_IPSTAT_INC(ips_fragmented);
 	return (error);
 
 dropit:
 	for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */
 		m = mcur->m_nextpkt;
 		m_freem(mcur);
 	}
 	return (error);
 }
 
 static void
 bridge_linkstate(struct ifnet *ifp)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
 	struct bridge_iflist *bif;
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 
 	bif = bridge_lookup_member_if(sc, ifp);
 	if (bif == NULL) {
 		NET_EPOCH_EXIT(et);
 		return;
 	}
 	bridge_linkcheck(sc);
 
 	bstp_linkstate(&bif->bif_stp);
 
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 bridge_linkcheck(struct bridge_softc *sc)
 {
 	struct bridge_iflist *bif;
 	int new_link, hasls;
 
 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
 
 	new_link = LINK_STATE_DOWN;
 	hasls = 0;
 	/* Our link is considered up if at least one of our ports is active */
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE)
 			hasls++;
 		if (bif->bif_ifp->if_link_state == LINK_STATE_UP) {
 			new_link = LINK_STATE_UP;
 			break;
 		}
 	}
 	if (!CK_LIST_EMPTY(&sc->sc_iflist) && !hasls) {
 		/* If no interfaces support link-state then we default to up */
 		new_link = LINK_STATE_UP;
 	}
 	if_link_state_change(sc->sc_ifp, new_link);
 }
diff --git a/sys/net/if_enc.c b/sys/net/if_enc.c
index da6ce7a1a815..b5ea1c68692c 100644
--- a/sys/net/if_enc.c
+++ b/sys/net/if_enc.c
@@ -1,450 +1,454 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 The FreeBSD Project.
  * Copyright (c) 2015 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_enc.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 
 #define ENCMTU		(1024+512)
 
 /* XXX this define must have the same value as in OpenBSD */
 #define M_CONF		0x0400	/* payload was encrypted (ESP-transport) */
 #define M_AUTH		0x0800	/* payload was authenticated (AH or ESP auth) */
 #define M_AUTH_AH	0x2000	/* header was authenticated (AH) */
 
 struct enchdr {
 	u_int32_t af;
 	u_int32_t spi;
 	u_int32_t flags;
 };
 struct enc_softc {
 	struct	ifnet *sc_ifp;
 };
 VNET_DEFINE_STATIC(struct enc_softc *, enc_sc);
 #define	V_enc_sc	VNET(enc_sc)
 VNET_DEFINE_STATIC(struct if_clone *, enc_cloner);
 #define	V_enc_cloner	VNET(enc_cloner)
 
 static int	enc_ioctl(struct ifnet *, u_long, caddr_t);
 static int	enc_output(struct ifnet *, struct mbuf *,
     const struct sockaddr *, struct route *);
 static int	enc_clone_create(struct if_clone *, int, caddr_t);
 static void	enc_clone_destroy(struct ifnet *);
 static int	enc_add_hhooks(struct enc_softc *);
 static void	enc_remove_hhooks(struct enc_softc *);
 
 static const char encname[] = "enc";
 
 #define	IPSEC_ENC_AFTER_PFIL	0x04
 /*
  * Before and after are relative to when we are stripping the
  * outer IP header.
  *
  * AFTER_PFIL flag used only for bpf_mask_*. It enables BPF capturing
  * after PFIL hook execution. It might be useful when PFIL hook does
  * some changes to the packet, e.g. address translation. If PFIL hook
  * consumes mbuf, nothing will be captured.
  */
 VNET_DEFINE_STATIC(int, filter_mask_in) = IPSEC_ENC_BEFORE;
 VNET_DEFINE_STATIC(int, bpf_mask_in) = IPSEC_ENC_BEFORE;
 VNET_DEFINE_STATIC(int, filter_mask_out) = IPSEC_ENC_BEFORE;
 VNET_DEFINE_STATIC(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER;
 #define	V_filter_mask_in	VNET(filter_mask_in)
 #define	V_bpf_mask_in		VNET(bpf_mask_in)
 #define	V_filter_mask_out	VNET(filter_mask_out)
 #define	V_bpf_mask_out		VNET(bpf_mask_out)
 
 static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "enc sysctl");
 static SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "enc input sysctl");
 static SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "enc output sysctl");
 SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask,
     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_in), 0,
     "IPsec input firewall filter mask");
 SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask,
     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_in), 0,
     "IPsec input bpf mask");
 SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask,
     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_out), 0,
     "IPsec output firewall filter mask");
 SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask,
     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_out), 0,
     "IPsec output bpf mask");
 
 static void
 enc_clone_destroy(struct ifnet *ifp)
 {
 	struct enc_softc *sc;
 
 	sc = ifp->if_softc;
 	KASSERT(sc == V_enc_sc, ("sc != ifp->if_softc"));
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 	free(sc, M_DEVBUF);
 	V_enc_sc = NULL;
 }
 
 static int
 enc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ifnet *ifp;
 	struct enc_softc *sc;
 
 	sc = malloc(sizeof(struct enc_softc), M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ENC);
 	if (ifp == NULL) {
 		free(sc, M_DEVBUF);
 		return (ENOSPC);
 	}
 	if (V_enc_sc != NULL) {
 		if_free(ifp);
 		free(sc, M_DEVBUF);
 		return (EEXIST);
 	}
 	V_enc_sc = sc;
 	if_initname(ifp, encname, unit);
 	ifp->if_mtu = ENCMTU;
 	ifp->if_ioctl = enc_ioctl;
 	ifp->if_output = enc_output;
 	ifp->if_softc = sc;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_ENC, sizeof(struct enchdr));
 	return (0);
 }
 
 static int
 enc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 
 	m_freem(m);
 	return (0);
 }
 
 static int
 enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 
 	if (cmd != SIOCSIFFLAGS)
 		return (EINVAL);
 	if (ifp->if_flags & IFF_UP)
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	else
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	return (0);
 }
 
 static void
 enc_bpftap(struct ifnet *ifp, struct mbuf *m, const struct secasvar *sav,
     int32_t hhook_type, uint8_t enc, uint8_t af)
 {
 	struct enchdr hdr;
 
 	if (hhook_type == HHOOK_TYPE_IPSEC_IN &&
 	    (enc & V_bpf_mask_in) == 0)
 		return;
 	else if (hhook_type == HHOOK_TYPE_IPSEC_OUT &&
 	    (enc & V_bpf_mask_out) == 0)
 		return;
 	if (bpf_peers_present(ifp->if_bpf) == 0)
 		return;
 	hdr.af = af;
 	hdr.spi = sav->spi;
 	hdr.flags = 0;
 	if (sav->alg_enc != SADB_EALG_NONE)
 		hdr.flags |= M_CONF;
 	if (sav->alg_auth != SADB_AALG_NONE)
 		hdr.flags |= M_AUTH;
 	bpf_mtap2(ifp->if_bpf, &hdr, sizeof(hdr), m);
 }
 
 /*
  * One helper hook function is used by any hook points.
  * + from hhook_type we can determine the packet direction:
  *   HHOOK_TYPE_IPSEC_IN or HHOOK_TYPE_IPSEC_OUT;
  * + from hhook_id we can determine address family: AF_INET or AF_INET6;
  * + udata contains pointer to enc_softc;
  * + ctx_data contains pointer to struct ipsec_ctx_data.
  */
 static int
 enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data,
     void *hdata, struct osd *hosd)
 {
 	struct ipsec_ctx_data *ctx;
 	struct enc_softc *sc;
 	struct ifnet *ifp, *rcvif;
 	struct pfil_head *ph;
-	int pdir;
+	int pdir, ret;
 
 	sc = (struct enc_softc *)udata;
 	ifp = sc->sc_ifp;
 	if ((ifp->if_flags & IFF_UP) == 0)
 		return (0);
 
 	ctx = (struct ipsec_ctx_data *)ctx_data;
 	/* XXX: wrong hook point was used by caller? */
 	if (ctx->af != hhook_id)
 		return (EPFNOSUPPORT);
 
 	enc_bpftap(ifp, *ctx->mp, ctx->sav, hhook_type, ctx->enc, ctx->af);
 	switch (hhook_type) {
 	case HHOOK_TYPE_IPSEC_IN:
 		if (ctx->enc == IPSEC_ENC_BEFORE) {
 			/* Do accounting only once */
 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 			if_inc_counter(ifp, IFCOUNTER_IBYTES,
 			    (*ctx->mp)->m_pkthdr.len);
 		}
 		if ((ctx->enc & V_filter_mask_in) == 0)
 			return (0); /* skip pfil processing */
 		pdir = PFIL_IN;
 		break;
 	case HHOOK_TYPE_IPSEC_OUT:
 		if (ctx->enc == IPSEC_ENC_BEFORE) {
 			/* Do accounting only once */
 			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 			if_inc_counter(ifp, IFCOUNTER_OBYTES,
 			    (*ctx->mp)->m_pkthdr.len);
 		}
 		if ((ctx->enc & V_filter_mask_out) == 0)
 			return (0); /* skip pfil processing */
 		pdir = PFIL_OUT;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	switch (hhook_id) {
 #ifdef INET
 	case AF_INET:
 		ph = V_inet_pfil_head;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ph = V_inet6_pfil_head;
 		break;
 #endif
 	default:
 		ph = NULL;
 	}
 	if (ph == NULL || (pdir == PFIL_OUT && !PFIL_HOOKED_OUT(ph)) ||
 	    (pdir == PFIL_IN && !PFIL_HOOKED_IN(ph)))
 		return (0);
 	/* Make a packet looks like it was received on enc(4) */
 	rcvif = (*ctx->mp)->m_pkthdr.rcvif;
 	(*ctx->mp)->m_pkthdr.rcvif = ifp;
-	if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, ctx->inp) != PFIL_PASS) {
+	if (pdir == PFIL_IN)
+		ret = pfil_mbuf_in(ph, ctx->mp, ifp, ctx->inp);
+	else
+		ret = pfil_mbuf_out(ph, ctx->mp, ifp, ctx->inp);
+	if (ret != PFIL_PASS) {
 		*ctx->mp = NULL; /* consumed by filter */
 		return (EACCES);
 	}
 	(*ctx->mp)->m_pkthdr.rcvif = rcvif;
 	enc_bpftap(ifp, *ctx->mp, ctx->sav, hhook_type,
 	    IPSEC_ENC_AFTER_PFIL, ctx->af);
 	return (0);
 }
 
 static int
 enc_add_hhooks(struct enc_softc *sc)
 {
 	struct hookinfo hki;
 	int error;
 
 	error = EPFNOSUPPORT;
 	hki.hook_func = enc_hhook;
 	hki.hook_helper = NULL;
 	hki.hook_udata = sc;
 #ifdef INET
 	hki.hook_id = AF_INET;
 	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
 	error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET],
 	    &hki, HHOOK_WAITOK);
 	if (error != 0)
 		return (error);
 	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
 	error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET],
 	    &hki, HHOOK_WAITOK);
 	if (error != 0)
 		return (error);
 #endif
 #ifdef INET6
 	hki.hook_id = AF_INET6;
 	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
 	error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
 	    &hki, HHOOK_WAITOK);
 	if (error != 0)
 		return (error);
 	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
 	error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
 	    &hki, HHOOK_WAITOK);
 	if (error != 0)
 		return (error);
 #endif
 	return (error);
 }
 
 static void
 enc_remove_hhooks(struct enc_softc *sc)
 {
 	struct hookinfo hki;
 
 	hki.hook_func = enc_hhook;
 	hki.hook_helper = NULL;
 	hki.hook_udata = sc;
 #ifdef INET
 	hki.hook_id = AF_INET;
 	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
 	hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET], &hki);
 	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
 	hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET], &hki);
 #endif
 #ifdef INET6
 	hki.hook_id = AF_INET6;
 	hki.hook_type = HHOOK_TYPE_IPSEC_IN;
 	hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6], &hki);
 	hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
 	hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6], &hki);
 #endif
 }
 
 static void
 vnet_enc_init(const void *unused __unused)
 {
 
 	V_enc_sc = NULL;
 	V_enc_cloner = if_clone_simple(encname, enc_clone_create,
 	    enc_clone_destroy, 1);
 }
 VNET_SYSINIT(vnet_enc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_enc_init, NULL);
 
 static void
 vnet_enc_init_proto(void *unused __unused)
 {
 	KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
 
 	if (enc_add_hhooks(V_enc_sc) != 0)
 		enc_clone_destroy(V_enc_sc->sc_ifp);
 }
 VNET_SYSINIT(vnet_enc_init_proto, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_enc_init_proto, NULL);
 
 static void
 vnet_enc_uninit(const void *unused __unused)
 {
 	KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
 
 	if_clone_detach(V_enc_cloner);
 }
 VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
     vnet_enc_uninit, NULL);
 
 /*
  * The hhook consumer needs to go before ip[6]_destroy are called on
  * SI_ORDER_THIRD.
  */
 static void
 vnet_enc_uninit_hhook(const void *unused __unused)
 {
 	KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
 
 	enc_remove_hhooks(V_enc_sc);
 }
 VNET_SYSUNINIT(vnet_enc_uninit_hhook, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
     vnet_enc_uninit_hhook, NULL);
 
 static int
 enc_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t enc_mod = {
 	"if_enc",
 	enc_modevent,
 	0
 };
 
 DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_enc, 1);
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 8bc66497e161..34ff4ac22e7f 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -1,1500 +1,1499 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devctl.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/msan.h>
 #include <sys/proc.h>
 #include <sys/priv.h>
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 
 #include <net/ieee_oui.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 #include <security/mac/mac_framework.h>
 
 #include <crypto/sha1.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
 VNET_DEFINE(pfil_head_t, link_pfil_head);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_attach_p)(struct ifnet *ifp);
 void	(*ng_ether_detach_p)(struct ifnet *ifp);
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* if_bridge(4) support */
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
 struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 static	int ether_requestencap(struct ifnet *, struct if_encap_req *);
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Handle link-layer encapsulation requests.
  */
 static int
 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
 {
 	struct ether_header *eh;
 	struct arphdr *ah;
 	uint16_t etype;
 	const u_char *lladdr;
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < ETHER_HDR_LEN)
 		return (ENOMEM);
 
 	eh = (struct ether_header *)req->buf;
 	lladdr = req->lladdr;
 	req->lladdr_off = 0;
 
 	switch (req->family) {
 	case AF_INET:
 		etype = htons(ETHERTYPE_IP);
 		break;
 	case AF_INET6:
 		etype = htons(ETHERTYPE_IPV6);
 		break;
 	case AF_ARP:
 		ah = (struct arphdr *)req->hdata;
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			etype = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			etype = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (req->flags & IFENCAP_FLAG_BROADCAST)
 			lladdr = ifp->if_broadcastaddr;
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
 	memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 	req->bufsize = sizeof(struct ether_header);
 
 	return (0);
 }
 
 static int
 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro, u_char *phdr,
 	uint32_t *pflags, struct llentry **plle)
 {
 	uint32_t lleflags = 0;
 	int error = 0;
 #if defined(INET) || defined(INET6)
 	struct ether_header *eh = (struct ether_header *)phdr;
 	uint16_t etype;
 #endif
 
 	if (plle)
 		*plle = NULL;
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
 			error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
 			    plle);
 		else {
 			if (m->m_flags & M_BCAST)
 				memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
 				    ETHER_ADDR_LEN);
 			else {
 				const struct in_addr *a;
 				a = &(((const struct sockaddr_in *)dst)->sin_addr);
 				ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
 			}
 			etype = htons(ETHERTYPE_IP);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0) {
 			int af = RO_GET_FAMILY(ro, dst);
 			error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
 			    &lleflags, plle);
 		} else {
 			const struct in6_addr *a6;
 			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
 			ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
 			etype = htons(ETHERTYPE_IPV6);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		if (m != NULL)
 			m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	if (error == EHOSTDOWN) {
 		if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
 			error = EHOSTUNREACH;
 	}
 
 	if (error != 0)
 		return (error);
 
 	*pflags = RT_MAY_LOOP;
 	if (lleflags & LLE_IFADDR)
 		*pflags |= RT_L2_ME;
 
 	return (0);
 }
 
 /*
  * Ethernet output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	int error = 0;
 	char linkhdr[ETHER_HDR_LEN], *phdr;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	bool loop_copy;
 	int hlen;	/* link layer header length */
 	uint32_t pflags;
 	struct llentry *lle = NULL;
 	int addref = 0;
 
 	phdr = NULL;
 	pflags = 0;
 	if (ro != NULL) {
 		/* XXX BPF uses ro_prepend */
 		if (ro->ro_prepend != NULL) {
 			phdr = ro->ro_prepend;
 			hlen = ro->ro_plen;
 		} else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
 			if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
 				lle = ro->ro_lle;
 				if (lle != NULL &&
 				    (lle->la_flags & LLE_VALID) == 0) {
 					LLE_FREE(lle);
 					lle = NULL;	/* redundant */
 					ro->ro_lle = NULL;
 				}
 				if (lle == NULL) {
 					/* if we lookup, keep cache */
 					addref = 1;
 				} else
 					/*
 					 * Notify LLE code that
 					 * the entry was used
 					 * by datapath.
 					 */
 					llentry_provide_feedback(lle);
 			}
 			if (lle != NULL) {
 				phdr = lle->r_linkdata;
 				hlen = lle->r_hdrlen;
 				pflags = lle->r_flags;
 			}
 		}
 	}
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	if (phdr == NULL) {
 		/* No prepend data supplied. Try to calculate ourselves. */
 		phdr = linkhdr;
 		hlen = ETHER_HDR_LEN;
 		error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
 		    addref ? &lle : NULL);
 		if (addref && lle != NULL)
 			ro->ro_lle = lle;
 		if (error != 0)
 			return (error == EWOULDBLOCK ? 0 : error);
 	}
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
 	}
 	loop_copy = (pflags & RT_MAY_LOOP) != 0;
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 *
 	 * Note that we do prepend regardless of RT_HAS_HEADER flag.
 	 * This is done because BPF code shifts m_data pointer
 	 * to the end of ethernet header prior to calling if_output().
 	 */
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	if ((pflags & RT_HAS_HEADER) == 0) {
 		eh = mtod(m, struct ether_header *);
 		memcpy(eh, phdr, hlen);
 	}
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		struct mbuf *n;
 
 		/*
 		 * Because if_simloop() modifies the packet, we need a
 		 * writable copy through m_dup() instead of a readonly
 		 * one as m_copy[m] would give us. The alternative would
 		 * be to modify if_simloop() to handle the readonly mbuf,
 		 * but performancewise it is mostly equivalent (trading
 		 * extra data copying vs. extra locking).
 		 *
 		 * XXX This is a local workaround.  A number of less
 		 * often used kernel parts suffer from the same bug.
 		 * See PR kern/105943 for a proposed general solution.
 		 */
 		if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 			update_mbuf_csumflags(m, n);
 			(void)if_simloop(ifp, n, RO_GET_FAMILY(ro, dst), hlen);
 		} else
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	}
 
        /*
 	* Bridges require special output handling.
 	*/
 	if (ifp->if_bridge) {
 		BRIDGE_OUTPUT(ifp, m, error);
 		return (error);
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
 	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/* Continue with link-layer output */
 	return ether_output_frame(ifp, m);
 }
 
 static bool
 ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
 {
 	struct ether_8021q_tag qtag;
 	struct ether_header *eh;
 
 	eh = mtod(*mp, struct ether_header *);
 	if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
 	    ntohs(eh->ether_type) == ETHERTYPE_QINQ)
 		return (true);
 
 	qtag.vid = 0;
 	qtag.pcp = pcp;
 	qtag.proto = ETHERTYPE_VLAN;
 	if (ether_8021q_frame(mp, ifp, ifp, &qtag))
 		return (true);
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (false);
 }
 
 /*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
  * in the first mbuf (if BRIDGE'ing).
  */
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
 	uint8_t pcp;
 
 	pcp = ifp->if_pcp;
 	if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN &&
 	    !ether_set_pcp(&m, ifp, pcp))
 		return (0);
 
 	if (PFIL_HOOKED_OUT(V_link_pfil_head))
-		switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT,
-		    NULL)) {
+		switch (pfil_mbuf_out(V_link_pfil_head, &m, ifp, NULL)) {
 		case PFIL_DROPPED:
 			return (EACCES);
 		case PFIL_CONSUMED:
 			return (0);
 		}
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 		struct ether_header *eh;
 
 		eh = mtod(m, struct ether_header *);
 		switch (ntohs(eh->ether_type)) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return (EAFNOSUPPORT);
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	/*
 	 * Queue message on interface, update output statistics if successful,
 	 * and start output if interface not yet active.
 	 *
 	 * If KMSAN is enabled, use it to verify that the data does not contain
 	 * any uninitialized bytes.
 	 */
 	kmsan_check_mbuf(m, "ether_output");
 	return ((ifp->if_transmit)(ifp, m));
 }
 
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
  */
 static void
 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
 		m_freem(m);
 		return;
 	}
 #endif
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	random_harvest_queue_ether(m, sizeof(*m));
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 		switch (etype) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return;
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	ETHER_BPF_MTAP(ifp, m);
 
 	/*
 	 * If the CRC is still on the packet, trim it off. We do this once
 	 * and once only in case we are re-entered. Nothing else on the
 	 * Ethernet receive path expects to see the FCS.
 	 */
 	if (m->m_flags & M_HASFCS) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 
 	if (!(ifp->if_capenable & IFCAP_HWSTATS))
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
 		KASSERT(lagg_input_ethernet_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
 		m = (*lagg_input_ethernet_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * If the hardware did not process an 802.1Q tag, do this now,
 	 * to allow 802.1P priority frames to be passed to the main input
 	 * path correctly.
 	 */
 	if ((m->m_flags & M_VLANTAG) == 0 &&
 	    ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ))) {
 		struct ether_vlan_header *evl;
 
 		if (m->m_len < sizeof(*evl) &&
 		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		evl = mtod(m, struct ether_vlan_header *);
 		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 		m->m_flags |= M_VLANTAG;
 
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
 	 */
 	if (ifp->if_bridge != NULL) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
 	/*
 	 * Clear M_PROMISC on frame so that carp(4) will see it when the
 	 * mbuf flows up to Layer 3.
 	 * FreeBSD's implementation of carp(4) uses the inprotosw
 	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
 	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
 	 * is outside the scope of the M_PROMISC test below.
 	 * TODO: Maintain a hash table of ethernet addresses other than
 	 * ether_dhost which may be active on this ifp.
 	 */
 	if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
 		m->m_flags &= ~M_PROMISC;
 	} else
 #endif
 	{
 		/*
 		 * If the frame received was not for our MAC address, set the
 		 * M_PROMISC flag on the mbuf chain. The frame may need to
 		 * be seen by the rest of the Ethernet input path in case of
 		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
 		 * seen by upper protocol layers.
 		 */
 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
 		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
 			m->m_flags |= M_PROMISC;
 	}
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  * load based on RSS.
  *
  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  * not it had already done work distribution via multi-queue.  Then we could
  * direct dispatch in the event load balancing was already complete and
  * handle the case of interfaces with different capabilities better.
  *
  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  * at multiple layers?
  *
  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  * works fine without RSS.  Need to characterise the performance overhead
  * of the detour through the netisr code in the event the result is always
  * direct dispatch.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL,
 	    ("%s: NULL interface pointer", __func__));
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
 static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
 #ifdef RSS
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 #else
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 #endif
 };
 
 static void
 ether_init(__unused void *arg)
 {
 
 	netisr_register(&ether_nh);
 }
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
 vnet_ether_init(__unused void *arg)
 {
 	struct pfil_head_args args;
 
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_ETHERNET;
 	args.pa_headname = PFIL_ETHER_NAME;
 	V_link_pfil_head = pfil_head_register(&args);
 
 #ifdef VIMAGE
 	netisr_register_vnet(&ether_nh);
 #endif
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
 
 #ifdef VIMAGE
 static void
 vnet_ether_pfil_destroy(__unused void *arg)
 {
 
 	pfil_head_unregister(V_link_pfil_head);
 }
 VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
     vnet_ether_pfil_destroy, NULL);
 
 static void
 vnet_ether_destroy(__unused void *arg)
 {
 
 	netisr_unregister_vnet(&ether_nh);
 }
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
 #endif
 
 static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct mbuf *mn;
 	bool needs_epoch;
 
 	needs_epoch = !(ifp->if_flags & IFF_KNOWSEPOCH);
 
 	/*
 	 * The drivers are allowed to pass in a chain of packets linked with
 	 * m_nextpkt. We split them up into separate packets here and pass
 	 * them up. This allows the drivers to amortize the receive lock.
 	 */
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	if (__predict_false(needs_epoch))
 		NET_EPOCH_ENTER(et);
 	while (m) {
 		mn = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We will rely on rcvif being set properly in the deferred
 		 * context, so assert it is correct here.
 		 */
 		MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
 		    "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
 		netisr_dispatch(NETISR_ETHER, m);
 		m = mn;
 	}
 	if (__predict_false(needs_epoch))
 		NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 }
 
 /*
  * Upper layer processing for a received Ethernet packet.
  */
 void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	int i, isr;
 	u_short ether_type;
 
 	NET_EPOCH_ASSERT();
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 	/* Do not grab PROMISC frames in case we are re-entered. */
 	if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) {
-		i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL);
+		i = pfil_mbuf_in(V_link_pfil_head, &m, ifp, NULL);
 		if (i != 0 || m == NULL)
 			return;
 	}
 
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
 	/*
 	 * If this frame has a VLAN tag other than 0, call vlan_input()
 	 * if its module is loaded. Otherwise, drop.
 	 */
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			m_freem(m);
 			return;
 		}
 		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
 		    __func__));
 		/* Clear before possibly re-entering ether_input(). */
 		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
 	 * Pass promiscuously received frames to the upper layer if the user
 	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
 	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		goto discard;
 	}
 
 	/* Strip off Ethernet header. */
 	m_adj(m, ETHER_HDR_LEN);
 
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	/*
 	 * Packet is to be discarded.  If netgraph is present,
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
 	m_freem(m);
 }
 
 /*
  * Convert Ethernet address to printable (loggable) representation.
  * This routine is for compatibility; it's better to just use
  *
  *	printf("%6D", <pointer to address>, ":");
  *
  * since there's no static buffer involved.
  */
 char *
 ether_sprintf(const u_char *ap)
 {
 	static char etherbuf[18];
 	snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
 	return (etherbuf);
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_addrlen = ETHER_ADDR_LEN;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 	ifp->if_mtu = ETHERMTU;
 	if_attach(ifp);
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
 	ifp->if_requestencap = ether_requestencap;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	if (ifp->if_hw_addr != NULL)
 		bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 
 	uuid_ether_add(LLADDR(sdl));
 
 	/* Add necessary bits are setup; announce it now. */
 	EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
 }
 
 /*
  * Perform common duties while detaching an Ethernet interface
  */
 void
 ether_ifdetach(struct ifnet *ifp)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 	uuid_ether_del(LLADDR(sdl));
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 #ifdef VIMAGE
 void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	if (ng_ether_attach_p != NULL) {
 		CURVNET_SET_QUIET(new_vnet);
 		(*ng_ether_attach_p)(ifp);
 		CURVNET_RESTORE();
 	}
 }
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Ethernet");
 
 #if 0
 /*
  * This is for reference.  We have a table-driven version
  * of the little-endian crc32 generator, which is faster
  * than the double-loop.
  */
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = (crc ^ data) & 1;
 			crc >>= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_LE);
 		}
 	}
 
 	return (crc);
 }
 #else
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	static const uint32_t crctab[] = {
 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 	};
 	size_t i;
 	uint32_t crc;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		crc ^= buf[i];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 	}
 
 	return (crc);
 }
 #endif
 
 uint32_t
 ether_crc32_be(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc, carry;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 			crc <<= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 		}
 	}
 
 	return (crc);
 }
 
 int
 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
 		    ETHER_ADDR_LEN);
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > ETHERMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 
 	case SIOCSLANPCP:
 		error = priv_check(curthread, PRIV_NET_SETLANPCP);
 		if (error != 0)
 			break;
 		if (ifr->ifr_lan_pcp > 7 &&
 		    ifr->ifr_lan_pcp != IFNET_PCP_NONE) {
 			error = EINVAL;
 		} else {
 			ifp->if_pcp = ifr->ifr_lan_pcp;
 			/* broadcast event about PCP change */
 			EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
 		}
 		break;
 
 	case SIOCGLANPCP:
 		ifr->ifr_lan_pcp = ifp->if_pcp;
 		break;
 
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = NULL;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static moduledata_t ether_mod = {
 	.name = "ether",
 };
 
 void
 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 {
 	struct ether_vlan_header vlan;
 	struct mbuf mv, mb;
 
 	KASSERT((m->m_flags & M_VLANTAG) != 0,
 	    ("%s: vlan information not present", __func__));
 	KASSERT(m->m_len >= sizeof(struct ether_header),
 	    ("%s: mbuf not large enough for header", __func__));
 	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 	vlan.evl_proto = vlan.evl_encap_proto;
 	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 	m->m_len -= sizeof(struct ether_header);
 	m->m_data += sizeof(struct ether_header);
 	/*
 	 * If a data link has been supplied by the caller, then we will need to
 	 * re-create a stack allocated mbuf chain with the following structure:
 	 *
 	 * (1) mbuf #1 will contain the supplied data link
 	 * (2) mbuf #2 will contain the vlan header
 	 * (3) mbuf #3 will contain the original mbuf's packet data
 	 *
 	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
 	 */
 	if (data != NULL) {
 		mv.m_next = m;
 		mv.m_data = (caddr_t)&vlan;
 		mv.m_len = sizeof(vlan);
 		mb.m_next = &mv;
 		mb.m_data = data;
 		mb.m_len = dlen;
 		bpf_mtap(bp, &mb);
 	} else
 		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 	m->m_len += sizeof(struct ether_header);
 	m->m_data -= sizeof(struct ether_header);
 }
 
 struct mbuf *
 ether_vlanencap_proto(struct mbuf *m, uint16_t tag, uint16_t proto)
 {
 	struct ether_vlan_header *evl;
 
 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 
 	if (m->m_len < sizeof(*evl)) {
 		m = m_pullup(m, sizeof(*evl));
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * Transform the Ethernet header into an Ethernet header
 	 * with 802.1Q encapsulation.
 	 */
 	evl = mtod(m, struct ether_vlan_header *);
 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	evl->evl_encap_proto = htons(proto);
 	evl->evl_tag = htons(tag);
 	return (m);
 }
 
 void
 ether_bpf_mtap_if(struct ifnet *ifp, struct mbuf *m)
 {
 	if (bpf_peers_present(ifp->if_bpf)) {
 		M_ASSERTVALID(m);
 		if ((m->m_flags & M_VLANTAG) != 0)
 			ether_vlan_mtap(ifp->if_bpf, m, NULL, 0);
 		else
 			bpf_mtap(ifp->if_bpf, m);
 	}
 }
 
 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IEEE 802.1Q VLAN");
 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "for consistency");
 
 VNET_DEFINE_STATIC(int, soft_pad);
 #define	V_soft_pad	VNET(soft_pad)
 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(soft_pad), 0,
     "pad short frames before tagging");
 
 /*
  * For now, make preserving PCP via an mbuf tag optional, as it increases
  * per-packet memory allocations and frees.  In the future, it would be
  * preferable to reuse ether_vtag for this, or similar.
  */
 VNET_DEFINE(int, vlan_mtag_pcp) = 0;
 #define	V_vlan_mtag_pcp	VNET(vlan_mtag_pcp)
 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(vlan_mtag_pcp), 0,
     "Retain VLAN PCP information as packets are passed up the stack");
 
 bool
 ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
     struct ether_8021q_tag *qtag)
 {
 	struct m_tag *mtag;
 	int n;
 	uint16_t tag;
 	static const char pad[8];	/* just zeros */
 
 	/*
 	 * Pad the frame to the minimum size allowed if told to.
 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
 	 * paragraph C.4.4.3.b.  It can help to work around buggy
 	 * bridges that violate paragraph C.4.4.3.a from the same
 	 * document, i.e., fail to pad short frames after untagging.
 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
 	 * untagging it will produce a 62-byte frame, which is a runt
 	 * and requires padding.  There are VLAN-enabled network
 	 * devices that just discard such runts instead or mishandle
 	 * them somehow.
 	 */
 	if (V_soft_pad && p->if_type == IFT_ETHER) {
 		for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
 		     n > 0; n -= sizeof(pad)) {
 			if (!m_append(*mp, min(n, sizeof(pad)), pad))
 				break;
 		}
 		if (n > 0) {
 			m_freem(*mp);
 			*mp = NULL;
 			if_printf(ife, "cannot pad short frame");
 			return (false);
 		}
 	}
 
 	/*
 	 * If PCP is set in mbuf, use it
 	 */
 	if ((*mp)->m_flags & M_VLANTAG) {
 		qtag->pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag);
 	}
 
 	/*
 	 * If underlying interface can do VLAN tag insertion itself,
 	 * just pass the packet along. However, we need some way to
 	 * tell the interface where the packet came from so that it
 	 * knows how to find the VLAN tag to use, so we attach a
 	 * packet tag that holds it.
 	 */
 	if (V_vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
 	    MTAG_8021Q_PCP_OUT, NULL)) != NULL)
 		tag = EVL_MAKETAG(qtag->vid, *(uint8_t *)(mtag + 1), 0);
 	else
 		tag = EVL_MAKETAG(qtag->vid, qtag->pcp, 0);
 	if ((p->if_capenable & IFCAP_VLAN_HWTAGGING) &&
 	    (qtag->proto == ETHERTYPE_VLAN)) {
 		(*mp)->m_pkthdr.ether_vtag = tag;
 		(*mp)->m_flags |= M_VLANTAG;
 	} else {
 		*mp = ether_vlanencap_proto(*mp, tag, qtag->proto);
 		if (*mp == NULL) {
 			if_printf(ife, "unable to prepend 802.1Q header");
 			return (false);
 		}
 	}
 	return (true);
 }
 
 /*
  * Allocate an address from the FreeBSD Foundation OUI.  This uses a
  * cryptographic hash function on the containing jail's name, UUID and the
  * interface name to attempt to provide a unique but stable address.
  * Pseudo-interfaces which require a MAC address should use this function to
  * allocate non-locally-administered addresses.
  */
 void
 ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
 {
 	SHA1_CTX ctx;
 	char *buf;
 	char uuid[HOSTUUIDLEN + 1];
 	uint64_t addr;
 	int i, sz;
 	char digest[SHA1_RESULTLEN];
 	char jailname[MAXHOSTNAMELEN];
 
 	getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
 	if (strncmp(uuid, DEFAULT_HOSTUUID, sizeof(uuid)) == 0) {
 		/* Fall back to a random mac address. */
 		goto rando;
 	}
 
 	/* If each (vnet) jail would also have a unique hostuuid this would not
 	 * be necessary. */
 	getjailname(curthread->td_ucred, jailname, sizeof(jailname));
 	sz = asprintf(&buf, M_TEMP, "%s-%s-%s", uuid, if_name(ifp),
 	    jailname);
 	if (sz < 0) {
 		/* Fall back to a random mac address. */
 		goto rando;
 	}
 
 	SHA1Init(&ctx);
 	SHA1Update(&ctx, buf, sz);
 	SHA1Final(digest, &ctx);
 	free(buf, M_TEMP);
 
 	addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
 	    OUI_FREEBSD_GENERATED_MASK;
 	addr = OUI_FREEBSD(addr);
 	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
 		hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
 		    0xFF;
 	}
 
 	return;
 rando:
 	arc4rand(hwaddr, sizeof(*hwaddr), 0);
 	/* Unicast */
 	hwaddr->octet[0] &= 0xFE;
 	/* Locally administered. */
 	hwaddr->octet[0] |= 0x02;
 }
 
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 739138a6f791..e62935b247da 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -1,1596 +1,1595 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
 #include "opt_kern_tls.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_ratelimit.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 #include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktls.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/ethernet.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef MBUF_STRESS_TEST
 static int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static void	ip_mloopback(struct ifnet *, const struct mbuf *, int);
 
 extern int in_mcast_loop;
 
 static inline int
 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
     struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error)
 {
 	struct m_tag *fwd_tag = NULL;
 	struct mbuf *m;
 	struct in_addr odst;
 	struct ip *ip;
-	int pflags = PFIL_OUT;
 
 	m = *mp;
 	ip = mtod(m, struct ip *);
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
-	switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, pflags, inp)) {
+	switch (pfil_mbuf_out(V_inet_pfil_head, mp, ifp, inp)) {
 	case PFIL_DROPPED:
 		*error = EACCES;
 		/* FALLTHROUGH */
 	case PFIL_CONSUMED:
 		return 1; /* Finished */
 	case PFIL_PASS:
 		*error = 0;
 	}
 	m = *mp;
 	ip = mtod(m, struct ip *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (odst.s_addr != ip->ip_dst.s_addr) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip_input(). */
 		if (in_localip(ip->ip_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 					CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 				CSUM_IP_CHECKED | CSUM_IP_VALID;
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			*error = netisr_queue(NETISR_IP, m);
 			return 1; /* Finished */
 		}
 
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 
 		return -1; /* Reloop */
 	}
 	/* See if fib was changed by packet filter. */
 	if ((*fibnum) != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		*fibnum = M_GETFIB(m);
 		return -1; /* Reloop for FIB change */
 	}
 
 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			m->m_pkthdr.csum_flags |=
 				CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		m->m_pkthdr.csum_flags |=
 			CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 		*error = netisr_queue(NETISR_IP, m);
 		return 1; /* Finished */
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 
 		return -1; /* Reloop for CHANGE of dst */
 	}
 
 	return 0;
 }
 
 static int
 ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
     const struct sockaddr *gw, struct route *ro, bool stamp_tag)
 {
 #ifdef KERN_TLS
 	struct ktls_session *tls = NULL;
 #endif
 	struct m_snd_tag *mst;
 	int error;
 
 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 	mst = NULL;
 
 #ifdef KERN_TLS
 	/*
 	 * If this is an unencrypted TLS record, save a reference to
 	 * the record.  This local reference is used to call
 	 * ktls_output_eagain after the mbuf has been freed (thus
 	 * dropping the mbuf's reference) in if_output.
 	 */
 	if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
 		tls = ktls_hold(m->m_next->m_epg_tls);
 		mst = tls->snd_tag;
 
 		/*
 		 * If a TLS session doesn't have a valid tag, it must
 		 * have had an earlier ifp mismatch, so drop this
 		 * packet.
 		 */
 		if (mst == NULL) {
 			m_freem(m);
 			error = EAGAIN;
 			goto done;
 		}
 		/*
 		 * Always stamp tags that include NIC ktls.
 		 */
 		stamp_tag = true;
 	}
 #endif
 #ifdef RATELIMIT
 	if (inp != NULL && mst == NULL) {
 		if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
 		    (inp->inp_snd_tag != NULL &&
 		    inp->inp_snd_tag->ifp != ifp))
 			in_pcboutput_txrtlmt(inp, ifp, m);
 
 		if (inp->inp_snd_tag != NULL)
 			mst = inp->inp_snd_tag;
 	}
 #endif
 	if (stamp_tag && mst != NULL) {
 		KASSERT(m->m_pkthdr.rcvif == NULL,
 		    ("trying to add a send tag to a forwarded packet"));
 		if (mst->ifp != ifp) {
 			m_freem(m);
 			error = EAGAIN;
 			goto done;
 		}
 
 		/* stamp send tag on mbuf */
 		m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
 		m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
 	}
 
 	error = (*ifp->if_output)(ifp, m, gw, ro);
 
 done:
 	/* Check for route change invalidating send tags. */
 #ifdef KERN_TLS
 	if (tls != NULL) {
 		if (error == EAGAIN)
 			error = ktls_output_eagain(inp, tls);
 		ktls_free(tls);
 	}
 #endif
 #ifdef RATELIMIT
 	if (error == EAGAIN)
 		in_pcboutput_eagain(inp);
 #endif
 	return (error);
 }
 
 /* rte<>ro_flags translation */
 static inline void
 rt_update_ro_flags(struct route *ro, const struct nhop_object *nh)
 {
 	int nh_flags = nh->nh_flags;
 
 	ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
 
 	ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0;
 	ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0;
 	ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0;
 }
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * In the IP forwarding case, the packet will arrive with options already
  * inserted, so must have a NULL opt pointer.
  */
 int
 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
     struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
 	int mtu = 0;
 	int error = 0;
 	int vlan_pcp = -1;
 	struct sockaddr_in *dst;
 	const struct sockaddr *gw;
 	struct in_ifaddr *ia = NULL;
 	struct in_addr src;
 	int isbroadcast;
 	uint16_t ip_len, ip_off;
 	struct route iproute;
 	uint32_t fibnum;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	int no_route_but_check_spd = 0;
 #endif
 
 	M_ASSERTPKTHDR(m);
 	NET_EPOCH_ASSERT();
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 		if ((inp->inp_flags2 & INP_2PCP_SET) != 0)
 			vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >>
 			    INP_2PCP_SHIFT;
 #ifdef NUMA
 		m->m_pkthdr.numa_domain = inp->inp_numa_domain;
 #endif
 	}
 
 	if (opt) {
 		int len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len; /* ip->ip_hl is updated above */
 	}
 	ip = mtod(m, struct ip *);
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
 		ip_fillid(ip);
 	} else {
 		/* Header already set, fetch hlen from there */
 		hlen = ip->ip_hl << 2;
 	}
 	if ((flags & IP_FORWARDING) == 0)
 		IPSTAT_INC(ips_localout);
 
 	/*
 	 * dst/gw handling:
 	 *
 	 * gw is readonly but can point either to dst OR rt_gateway,
 	 * therefore we need restore gw if we're redoing lookup.
 	 */
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
 	}
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 	if (ro->ro_nh == NULL) {
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
 	gw = (const struct sockaddr *)dst;
 again:
 	/*
 	 * Validate route against routing table additions;
 	 * a better/more specific route might have been added.
 	 */
 	if (inp != NULL && ro->ro_nh != NULL)
 		NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
 	 * and is still up.  If not, free it and try again.
 	 * The address family should also be checked in case of sharing the
 	 * cache with IPv6.
 	 * Also check whether routing cache needs invalidation.
 	 */
 	if (ro->ro_nh != NULL &&
 	    ((!NH_IS_VALID(ro->ro_nh)) || dst->sin_family != AF_INET ||
 	    dst->sin_addr.s_addr != ip->ip_dst.s_addr))
 		RO_INVALIDATE_CACHE(ro);
 	ia = NULL;
 	/*
 	 * If routing to interface only, short circuit routing lookup.
 	 * The use of an all-ones broadcast address implies this; an
 	 * interface is specified by the broadcast address of an interface,
 	 * or the destination address of a ptp interface.
 	 */
 	if (flags & IP_SENDONES) {
 		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
 						      M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
 		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
 		isbroadcast = 1;
 		src = IA_SIN(ia)->sin_addr;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
 						M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ifp = ia->ia_ifp;
 		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
 		isbroadcast = ifp->if_flags & IFF_BROADCAST ?
 		    in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
 		src = IA_SIN(ia)->sin_addr;
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		mtu = ifp->if_mtu;
 		IFP_TO_IA(ifp, ia);
 		isbroadcast = 0;	/* fool gcc */
 		/* Interface may have no addresses. */
 		if (ia != NULL)
 			src = IA_SIN(ia)->sin_addr;
 		else
 			src.s_addr = INADDR_ANY;
 	} else if (ro != &iproute) {
 		if (ro->ro_nh == NULL) {
 			/*
 			 * We want to do any cloning requested by the link
 			 * layer, as this is probably required in all cases
 			 * for correct operation (as it is for ARP).
 			 */
 			uint32_t flowid;
 			flowid = m->m_pkthdr.flowid;
 			ro->ro_nh = fib4_lookup(fibnum, dst->sin_addr, 0,
 			    NHR_REF, flowid);
 
 			if (ro->ro_nh == NULL || (!NH_IS_VALID(ro->ro_nh))) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 				/*
 				 * There is no route for this packet, but it is
 				 * possible that a matching SPD entry exists.
 				 */
 				no_route_but_check_spd = 1;
 				goto sendit;
 #endif
 				IPSTAT_INC(ips_noroute);
 				error = EHOSTUNREACH;
 				goto bad;
 			}
 		}
 		struct nhop_object *nh = ro->ro_nh;
 
 		ia = ifatoia(nh->nh_ifa);
 		ifp = nh->nh_ifp;
 		counter_u64_add(nh->nh_pksent, 1);
 		rt_update_ro_flags(ro, nh);
 		if (nh->nh_flags & NHF_GATEWAY)
 			gw = &nh->gw_sa;
 		if (nh->nh_flags & NHF_HOST)
 			isbroadcast = (nh->nh_flags & NHF_BROADCAST);
 		else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET))
 			isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia);
 		else
 			isbroadcast = 0;
 		mtu = nh->nh_mtu;
 		src = IA_SIN(ia)->sin_addr;
 	} else {
 		struct nhop_object *nh;
 
 		nh = fib4_lookup(M_GETFIB(m), dst->sin_addr, 0, NHR_NONE,
 		    m->m_pkthdr.flowid);
 		if (nh == NULL) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 			/*
 			 * There is no route for this packet, but it is
 			 * possible that a matching SPD entry exists.
 			 */
 			no_route_but_check_spd = 1;
 			goto sendit;
 #endif
 			IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ifp = nh->nh_ifp;
 		mtu = nh->nh_mtu;
 		rt_update_ro_flags(ro, nh);
 		if (nh->nh_flags & NHF_GATEWAY)
 			gw = &nh->gw_sa;
 		ia = ifatoia(nh->nh_ifa);
 		src = IA_SIN(ia)->sin_addr;
 		isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
 		    (NHF_HOST | NHF_BROADCAST)) ||
 		    ((ifp->if_flags & IFF_BROADCAST) &&
 		    (gw->sa_family == AF_INET) &&
 		    in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia)));
 	}
 
 	/* Catch a possible divide by zero later. */
 	KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p",
 	    __func__, mtu, ro,
 	    (ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp));
 
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "gw"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
 		gw = (const struct sockaddr *)dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				IPSTAT_INC(ips_noroute);
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY)
 			ip->ip_src = src;
 
 		if ((imo == NULL && in_mcast_loop) ||
 		    (imo && imo->imo_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we are not a member
 			 * of the group; ip_input() will filter it later,
 			 * thus deferring a hash lookup and mutex acquisition
 			 * at the expense of a cheap copy using m_copym().
 			 */
 			ip_mloopback(ifp, m, hlen);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!V_rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy. ip_input() will drop the copy if
 		 * this host does not belong to the destination group on
 		 * the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY)
 		ip->ip_src = src;
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) {
 			if (error == EINPROGRESS)
 				error = 0;
 			goto done;
 		}
 	}
 	/*
 	 * Check if there was a route for this packet; return error if not.
 	 */
 	if (no_route_but_check_spd) {
 		IPSTAT_INC(ips_noroute);
 		error = EHOSTUNREACH;
 		goto bad;
 	}
 	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (PFIL_HOOKED_OUT(V_inet_pfil_head)) {
 		switch (ip_output_pfil(&m, ifp, flags, inp, dst, &fibnum,
 		    &error)) {
 		case 1: /* Finished */
 			goto done;
 
 		case 0: /* Continue normally */
 			ip = mtod(m, struct ip *);
 			break;
 
 		case -1: /* Need to try again */
 			/* Reset everything for a new round */
 			if (ro != NULL) {
 				RO_NHFREE(ro);
 				ro->ro_prepend = NULL;
 			}
 			gw = (const struct sockaddr *)dst;
 			ip = mtod(m, struct ip *);
 			goto again;
 		}
 	}
 
 	if (vlan_pcp > -1)
 		EVL_APPLY_PRI(m, vlan_pcp);
 
 	/* IN_LOOPBACK must not appear on the wire - RFC1122. */
 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	/* Ensure the packet data is mapped if the interface requires it. */
 	if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
 		m = mb_unmapped_to_ext(m);
 		if (m == NULL) {
 			IPSTAT_INC(ips_odropped);
 			error = ENOBUFS;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 * Note that if_vxlan could have requested TSO even though the outer
 	 * frame is UDP.  It is correct to not fragment such datagrams and
 	 * instead just pass them on to the driver.
 	 */
 	if (ip_len <= mtu ||
 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist &
 	    (CSUM_TSO | CSUM_INNER_TSO)) != 0) {
 		ip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 
 		/*
 		 * Record statistics for this interface address.
 		 * With CSUM_TSO the byte/packet count will be slightly
 		 * incorrect because we count the IP+TCP headers only
 		 * once instead of for every generated packet.
 		 */
 		if (!(flags & IP_FORWARDING) && ia) {
 			if (m->m_pkthdr.csum_flags &
 			    (CSUM_TSO | CSUM_INNER_TSO))
 				counter_u64_add(ia->ia_ifa.ifa_opackets,
 				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
 			else
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 
 			counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
 		}
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
 		m_clrprotoflags(m);
 		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 		error = ip_output_send(inp, ifp, m, gw, ro,
 		    (flags & IP_NO_SND_TAG_RL) ? false : true);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) ||
 	    (m->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_INNER_TSO))) {
 		error = EMSGSIZE;
 		IPSTAT_INC(ips_cantfrag);
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			/*
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
 			m_clrprotoflags(m);
 
 			IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
 			    mtod(m, struct ip *), NULL);
 			error = ip_output_send(inp, ifp, m, gw, ro, true);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IPSTAT_INC(ips_fragmented);
 
 done:
 	return (error);
  bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
     u_long if_hwassist_flags)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 	uint16_t ip_len, ip_off;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Packet shall not have "Don't Fragment" flag and have at least 8
 	 * bytes of payload.
 	 */
 	if (__predict_false((ip_off & IP_DF) || len < 8)) {
 		IPSTAT_INC(ips_cantfrag);
 		return (EMSGSIZE);
 	}
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
 		sctp_delayed_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 	if (len > PAGE_SIZE) {
 		/*
 		 * Fragment large datagrams such that each segment
 		 * contains a multiple of PAGE_SIZE amount of data,
 		 * plus headers. This enables a receiver to perform
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 
 		off = MIN(mtu, m0->m_pkthdr.len);
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		/*
 		 * Make sure the complete packet header gets copied
 		 * from the originating mbuf to the newly created
 		 * mbuf. This also ensures that existing firewall
 		 * classification(s), VLAN tags and so on get copied
 		 * to the resulting fragmented packet(s):
 		 */
 		if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
 			m_free(m);
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copym().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ip_len)
 			len = ip_len - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 #ifdef MAC
 		mac_netinet_fragment(m0, m);
 #endif
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 			mhip->ip_sum = in_cksum(m, mhlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	IPSTAT_ADD(ips_ofragments, nfrags);
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off = htons(ip_off | IP_MF);
 	ip->ip_sum = 0;
 	if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 		ip->ip_sum = in_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 	}
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	struct udphdr *uh;
 	uint16_t cklen, csum, offset;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 
 	if (m->m_pkthdr.csum_flags & CSUM_UDP) {
 		/* if udp header is not in the first mbuf copy udplen */
 		if (offset + sizeof(struct udphdr) > m->m_len) {
 			m_copydata(m, offset + offsetof(struct udphdr,
 			    uh_ulen), sizeof(cklen), (caddr_t)&cklen);
 			cklen = ntohs(cklen);
 		} else {
 			uh = (struct udphdr *)mtodo(m, offset);
 			cklen = ntohs(uh->uh_ulen);
 		}
 		csum = in_cksum_skip(m, cklen + offset, offset);
 		if (csum == 0)
 			csum = 0xffff;
 	} else {
 		cklen = ntohs(ip->ip_len);
 		csum = in_cksum_skip(m, cklen, offset);
 	}
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(csum) > m->m_len)
 		m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
 	else
 		*(u_short *)mtodo(m, offset) = csum;
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					inp->inp_flags2 |= INP_REUSEADDR;
 				else
 					inp->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					inp->inp_flags2 |= INP_REUSEPORT;
 				else
 					inp->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_REUSEPORT_LB:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEPORT_LB) != 0)
 					inp->inp_flags2 |= INP_REUSEPORT_LB;
 				else
 					inp->inp_flags2 &= ~INP_REUSEPORT_LB;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(inp);
 				inp->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_MAX_PACING_RATE:
 #ifdef RATELIMIT
 				INP_WLOCK(inp);
 				inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
 				INP_WUNLOCK(inp);
 				error = 0;
 #else
 				error = EOPNOTSUPP;
 #endif
 				break;
 			default:
 				break;
 			}
 		}
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			if (error) {
 				m_free(m);
 				break;
 			}
 			INP_WLOCK(inp);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_WUNLOCK(inp);
 			return (error);
 		}
 
 		case IP_BINDANY:
 			if (sopt->sopt_td != NULL) {
 				error = priv_check(sopt->sopt_td,
 				    PRIV_NETINET_BINDANY);
 				if (error)
 					break;
 			}
 			/* FALLTHROUGH */
 		case IP_BINDMULTI:
 #ifdef	RSS
 		case IP_RSS_LISTEN_BUCKET:
 #endif
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_ORIGDSTADDR:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_RECVTOS:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RECVRSSBUCKETID:
 #endif
 		case IP_VLAN_PCP:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 
 			case IP_MINTTL:
 				if (optval >= 0 && optval <= MAXTTL)
 					inp->inp_ip_minttl = optval;
 				else
 					error = EINVAL;
 				break;
 
 #define	OPTSET(bit) do {						\
 	INP_WLOCK(inp);							\
 	if (optval)							\
 		inp->inp_flags |= bit;					\
 	else								\
 		inp->inp_flags &= ~bit;					\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 #define	OPTSET2(bit, val) do {						\
 	INP_WLOCK(inp);							\
 	if (val)							\
 		inp->inp_flags2 |= bit;					\
 	else								\
 		inp->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_ORIGDSTADDR:
 				OPTSET2(INP_ORIGDSTADDR, optval);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				OPTSET(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				OPTSET(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				OPTSET(INP_RECVTOS);
 				break;
 			case IP_BINDMULTI:
 				OPTSET2(INP_BINDMULTI, optval);
 				break;
 			case IP_RECVFLOWID:
 				OPTSET2(INP_RECVFLOWID, optval);
 				break;
 #ifdef	RSS
 			case IP_RSS_LISTEN_BUCKET:
 				if ((optval >= 0) &&
 				    (optval < rss_getnumbuckets())) {
 					inp->inp_rss_listen_bucket = optval;
 					OPTSET2(INP_RSS_BUCKET_SET, 1);
 				} else {
 					error = EINVAL;
 				}
 				break;
 			case IP_RECVRSSBUCKETID:
 				OPTSET2(INP_RECVRSSBUCKETID, optval);
 				break;
 #endif
 			case IP_VLAN_PCP:
 				if ((optval >= -1) && (optval <=
 				    (INP_2PCP_MASK >> INP_2PCP_SHIFT))) {
 					if (optval == -1) {
 						INP_WLOCK(inp);
 						inp->inp_flags2 &=
 						    ~(INP_2PCP_SET |
 						      INP_2PCP_MASK);
 						INP_WUNLOCK(inp);
 					} else {
 						INP_WLOCK(inp);
 						inp->inp_flags2 |=
 						    INP_2PCP_SET;
 						inp->inp_flags2 &=
 						    ~INP_2PCP_MASK;
 						inp->inp_flags2 |=
 						    optval << INP_2PCP_SHIFT;
 						INP_WUNLOCK(inp);
 					}
 				} else
 					error = EINVAL;
 				break;
 			}
 			break;
 #undef OPTSET
 #undef OPTSET2
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 		case IP_ADD_SOURCE_MEMBERSHIP:
 		case IP_DROP_SOURCE_MEMBERSHIP:
 		case IP_BLOCK_SOURCE:
 		case IP_UNBLOCK_SOURCE:
 		case IP_MSFILTER:
 		case MCAST_JOIN_GROUP:
 		case MCAST_LEAVE_GROUP:
 		case MCAST_JOIN_SOURCE_GROUP:
 		case MCAST_LEAVE_SOURCE_GROUP:
 		case MCAST_BLOCK_SOURCE:
 		case MCAST_UNBLOCK_SOURCE:
 			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			INP_WLOCK(inp);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		case IP_IPSEC_POLICY:
 			if (IPSEC_ENABLED(ipv4)) {
 				error = IPSEC_PCBCTL(ipv4, inp, sopt);
 				break;
 			}
 			/* FALLTHROUGH */
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			INP_RLOCK(inp);
 			if (inp->inp_options) {
 				struct mbuf *options;
 
 				options = m_copym(inp->inp_options, 0,
 				    M_COPYALL, M_NOWAIT);
 				INP_RUNLOCK(inp);
 				if (options != NULL) {
 					error = sooptcopyout(sopt,
 							     mtod(options, char *),
 							     options->m_len);
 					m_freem(options);
 				} else
 					error = ENOMEM;
 			} else {
 				INP_RUNLOCK(inp);
 				sopt->sopt_valsize = 0;
 			}
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_ORIGDSTADDR:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_BINDANY:
 		case IP_RECVTOS:
 		case IP_BINDMULTI:
 		case IP_FLOWID:
 		case IP_FLOWTYPE:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RSSBUCKETID:
 		case IP_RECVRSSBUCKETID:
 #endif
 		case IP_VLAN_PCP:
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 			case IP_MINTTL:
 				optval = inp->inp_ip_minttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 #define	OPTBIT2(bit)	(inp->inp_flags2 & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_ORIGDSTADDR:
 				optval = OPTBIT2(INP_ORIGDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				optval = OPTBIT(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				optval = OPTBIT(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				optval = OPTBIT(INP_RECVTOS);
 				break;
 			case IP_FLOWID:
 				optval = inp->inp_flowid;
 				break;
 			case IP_FLOWTYPE:
 				optval = inp->inp_flowtype;
 				break;
 			case IP_RECVFLOWID:
 				optval = OPTBIT2(INP_RECVFLOWID);
 				break;
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				retval = rss_hash2bucket(inp->inp_flowid,
 				    inp->inp_flowtype,
 				    &rss_bucket);
 				if (retval == 0)
 					optval = rss_bucket;
 				else
 					error = EINVAL;
 				break;
 			case IP_RECVRSSBUCKETID:
 				optval = OPTBIT2(INP_RECVRSSBUCKETID);
 				break;
 #endif
 			case IP_BINDMULTI:
 				optval = OPTBIT2(INP_BINDMULTI);
 				break;
 			case IP_VLAN_PCP:
 				if (OPTBIT2(INP_2PCP_SET)) {
 					optval = (inp->inp_flags2 &
 					    INP_2PCP_MASK) >> INP_2PCP_SHIFT;
 				} else {
 					optval = -1;
 				}
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_MSFILTER:
 			error = inp_getmoptions(inp, sopt);
 			break;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		case IP_IPSEC_POLICY:
 			if (IPSEC_ENABLED(ipv4)) {
 				error = IPSEC_PCBCTL(ipv4, inp, sopt);
 				break;
 			}
 			/* FALLTHROUGH */
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
 {
 	struct ip *ip;
 	struct mbuf *copym;
 
 	/*
 	 * Make a deep copy of the packet because we're going to
 	 * modify the pack in order to generate checksums.
 	 */
 	copym = m_dup(m, M_NOWAIT);
 	if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/* If needed, compute the checksum and mark it as valid. */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(copym);
 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 		if_simloop(ifp, copym, AF_INET, 0);
 	}
 }