Index: head/sys/dev/vmware/vmxnet3/if_vmx.c
===================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmx.c	(revision 357041)
+++ head/sys/dev/vmware/vmxnet3/if_vmx.c	(revision 357042)
@@ -1,2416 +1,2464 @@
 /*-
  * Copyright (c) 2013 Tsubai Masanari
  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
  * Copyright (c) 2018 Patrick Kelsey
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
  */
 
 /* Driver for VMware vmxnet3 virtual ethernet devices. */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_rss.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/endian.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 #include <net/iflib.h>
+#ifdef RSS
+#include <net/rss_config.h>
+#endif
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "ifdi_if.h"
 
 #include "if_vmxreg.h"
 #include "if_vmxvar.h"
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 
 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
 
 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
 {
 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
 	/* required last entry */
 	PVID_END
 };
 
 static void	*vmxnet3_register(device_t);
 static int	vmxnet3_attach_pre(if_ctx_t);
 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
 static int	vmxnet3_attach_post(if_ctx_t);
 static int	vmxnet3_detach(if_ctx_t);
 static int	vmxnet3_shutdown(if_ctx_t);
 static int	vmxnet3_suspend(if_ctx_t);
 static int	vmxnet3_resume(if_ctx_t);
 
 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
 static int	vmxnet3_check_version(struct vmxnet3_softc *);
 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
 
 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
 static void	vmxnet3_queues_free(if_ctx_t);
 
 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
 static void	vmxnet3_free_data(struct vmxnet3_softc *);
 
 static void	vmxnet3_evintr(struct vmxnet3_softc *);
 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
 static int	vmxnet3_legacy_intr(void *);
 static int	vmxnet3_rxq_intr(void *);
 static int	vmxnet3_event_intr(void *);
 
 static void	vmxnet3_stop(if_ctx_t);
 
 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
 static void	vmxnet3_init(if_ctx_t);
 static void	vmxnet3_multi_set(if_ctx_t);
 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
 static int	vmxnet3_media_change(if_ctx_t);
 static int	vmxnet3_promisc_set(if_ctx_t, int);
 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
 static void	vmxnet3_update_admin_status(if_ctx_t);
 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
 
 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
 		    uint16_t);
 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
 
 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
 static void	vmxnet3_link_status(struct vmxnet3_softc *);
 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
 
 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
 
 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
 		    uint32_t);
 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
 		    uint32_t);
 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
 
 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
 static void	vmxnet3_link_intr_enable(if_ctx_t);
 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
 static void	vmxnet3_intr_enable_all(if_ctx_t);
 static void	vmxnet3_intr_disable_all(if_ctx_t);
 
 typedef enum {
 	VMXNET3_BARRIER_RD,
 	VMXNET3_BARRIER_WR,
 	VMXNET3_BARRIER_RDWR,
 } vmxnet3_barrier_t;
 
 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
 
 
 static device_method_t vmxnet3_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_register, vmxnet3_register),
 	DEVMETHOD(device_probe, iflib_device_probe),
 	DEVMETHOD(device_attach, iflib_device_attach),
 	DEVMETHOD(device_detach, iflib_device_detach),
 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
 	DEVMETHOD(device_suspend, iflib_device_suspend),
 	DEVMETHOD(device_resume, iflib_device_resume),
 	DEVMETHOD_END
 };
 
 static driver_t vmxnet3_driver = {
 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
 };
 
 static devclass_t vmxnet3_devclass;
 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
 MODULE_VERSION(vmx, 2);
 
 MODULE_DEPEND(vmx, pci, 1, 1, 1);
 MODULE_DEPEND(vmx, ether, 1, 1, 1);
 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
 
 static device_method_t vmxnet3_iflib_methods[] = {
 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
 
 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
 
 	DEVMETHOD(ifdi_init, vmxnet3_init),
 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
 
 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
 
 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
 
 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
 
 	DEVMETHOD_END
 };
 
 static driver_t vmxnet3_iflib_driver = {
 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
 };
 
 struct if_txrx vmxnet3_txrx = {
 	.ift_txd_encap = vmxnet3_isc_txd_encap,
 	.ift_txd_flush = vmxnet3_isc_txd_flush,
 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
 	.ift_rxd_available = vmxnet3_isc_rxd_available,
 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
 	.ift_legacy_intr = vmxnet3_legacy_intr
 };
 
 static struct if_shared_ctx vmxnet3_sctx_init = {
 	.isc_magic = IFLIB_MAGIC,
 	.isc_q_align = 512,
 
 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
 
 	/*
 	 * These values are used to configure the busdma tag used for
 	 * receive descriptors.  Each receive descriptor only points to one
 	 * buffer.
 	 */
 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
 
 	.isc_admin_intrcnt = 1,
 	.isc_vendor_info = vmxnet3_vendor_info_array,
 	.isc_driver_version = "2",
 	.isc_driver = &vmxnet3_iflib_driver,
 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
 
 	/*
 	 * Number of receive queues per receive queue set, with associated
 	 * descriptor settings for each.
 	 */
 	.isc_nrxqs = 3,
 	.isc_nfl = 2, /* one free list for each receive command queue */
 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
 
 	/*
 	 * Number of transmit queues per transmit queue set, with associated
 	 * descriptor settings for each.
 	 */
 	.isc_ntxqs = 2,
 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
 };
 
 static void *
 vmxnet3_register(device_t dev)
 {
 	return (&vmxnet3_sctx_init);
 }
 
 static int
 vmxnet3_attach_pre(if_ctx_t ctx)
 {
 	device_t dev;
 	if_softc_ctx_t scctx;
 	struct vmxnet3_softc *sc;
 	uint32_t intr_config;
 	int error;
 
 	dev = iflib_get_dev(ctx);
 	sc = iflib_get_softc(ctx);
 	sc->vmx_dev = dev;
 	sc->vmx_ctx = ctx;
 	sc->vmx_sctx = iflib_get_sctx(ctx);
 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
 	sc->vmx_ifp = iflib_get_ifp(ctx);
 	sc->vmx_media = iflib_get_media(ctx);
 	scctx = sc->vmx_scctx;
 
 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
 	/* isc_tx_tso_size_max doesn't include possible vlan header */
 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
 	scctx->isc_txrx = &vmxnet3_txrx;
 
 	/* If 0, the iflib tunable was not set, so set to the default */
 	if (scctx->isc_nrxqsets == 0)
 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
 
 	/* If 0, the iflib tunable was not set, so set to the default */
 	if (scctx->isc_ntxqsets == 0)
 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
 
 	/*
 	 * Enforce that the transmit completion queue descriptor count is
 	 * the same as the transmit command queue descriptor count.
 	 */
 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
 	scctx->isc_txqsizes[0] =
 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
 	scctx->isc_txqsizes[1] =
 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
 
 	/*
 	 * Enforce that the receive completion queue descriptor count is the
 	 * sum of the receive command queue descriptor counts, and that the
 	 * second receive command queue descriptor count is the same as the
 	 * first one.
 	 */
 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
 	scctx->isc_rxqsizes[0] =
 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
 	scctx->isc_rxqsizes[1] =
 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
 	scctx->isc_rxqsizes[2] =
 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
 
 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
 
 	/* Map PCI BARs */
 	error = vmxnet3_alloc_resources(sc);
 	if (error)
 		goto fail;
 
 	/* Check device versions */
 	error = vmxnet3_check_version(sc);
 	if (error)
 		goto fail;
 
 	/* 
 	 * The interrupt mode can be set in the hypervisor configuration via
 	 * the parameter ethernet<N>.intrMode.
 	 */
 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
 
 	/*
 	 * Configure the softc context to attempt to configure the interrupt
 	 * mode now indicated by intr_config.  iflib will follow the usual
 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
 	 * starting mode.
 	 */
 	switch (intr_config & 0x03) {
 	case VMXNET3_IT_AUTO:
 	case VMXNET3_IT_MSIX:
 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
 		break;
 	case VMXNET3_IT_MSI:
 		scctx->isc_msix_bar = -1;
 		scctx->isc_disable_msix = 1;
 		break;
 	case VMXNET3_IT_LEGACY:
 		scctx->isc_msix_bar = 0;
 		break;
 	}
 
 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
 	scctx->isc_capabilities = scctx->isc_capenable =
 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
 	    IFCAP_TSO4 | IFCAP_TSO6 |
 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
 	    IFCAP_JUMBO_MTU;
 
 	/* These capabilities are not enabled by default. */
 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
 
 	vmxnet3_get_lladdr(sc);
 	iflib_set_mac(ctx, sc->vmx_lladdr);
 
 	return (0);
 fail:
 	/*
 	 * We must completely clean up anything allocated above as iflib
 	 * will not invoke any other driver entry points as a result of this
 	 * failure.
 	 */
 	vmxnet3_free_resources(sc);
 
 	return (error);
 }
 
 static int
 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
 {
 	struct vmxnet3_softc *sc;
 	if_softc_ctx_t scctx;
 	struct vmxnet3_rxqueue *rxq;
 	int error;
 	int i;
 	char irq_name[16];
 
 	sc = iflib_get_softc(ctx);
 	scctx = sc->vmx_scctx;
 	
 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
 
 		rxq = &sc->vmx_rxq[i];
 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
 		    IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name);
 		if (error) {
 			device_printf(iflib_get_dev(ctx),
 			    "Failed to register rxq %d interrupt handler\n", i);
 			return (error);
 		}
 	}
 
 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
 
 		/*
 		 * Don't provide the corresponding rxq irq for reference -
 		 * we want the transmit task to be attached to a task queue
 		 * that is different from the one used by the corresponding
 		 * rxq irq.  That is because the TX doorbell writes are very
 		 * expensive as virtualized MMIO operations, so we want to
 		 * be able to defer them to another core when possible so
 		 * that they don't steal receive processing cycles during
 		 * stack turnarounds like TCP ACK generation.  The other
 		 * piece to this approach is enabling the iflib abdicate
 		 * option (currently via an interface-specific
 		 * tunable/sysctl).
 		 */
 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
 		    irq_name);
 	}
 
 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
 	    "event");
 	if (error) {
 		device_printf(iflib_get_dev(ctx),
 		    "Failed to register event interrupt handler\n");
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
 {
 	if_softc_ctx_t scctx;
 	struct vmxnet3_rxqueue *rxq;
 	int i;
 
 	scctx = sc->vmx_scctx;
 
 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
 		rxq = &sc->vmx_rxq[i];
 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
 	}
 
 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
 }
 
 static int
 vmxnet3_attach_post(if_ctx_t ctx)
 {
 	device_t dev;
 	if_softc_ctx_t scctx;
 	struct vmxnet3_softc *sc;
 	int error;
 
 	dev = iflib_get_dev(ctx);
 	scctx = iflib_get_softc_ctx(ctx);
 	sc = iflib_get_softc(ctx);
 
 	if (scctx->isc_nrxqsets > 1)
 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
 
 	error = vmxnet3_alloc_data(sc);
 	if (error)
 		goto fail;
 
 	vmxnet3_set_interrupt_idx(sc);
 	vmxnet3_setup_sysctl(sc);
 
 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
 
 fail:
 	return (error);
 }
 
 static int
 vmxnet3_detach(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = iflib_get_softc(ctx);
 
 	vmxnet3_free_irqs(sc);
 	vmxnet3_free_data(sc);
 	vmxnet3_free_resources(sc);
 
 	return (0);
 }
 
 static int
 vmxnet3_shutdown(if_ctx_t ctx)
 {
 
 	return (0);
 }
 
 static int
 vmxnet3_suspend(if_ctx_t ctx)
 {
 
 	return (0);
 }
 
 static int
 vmxnet3_resume(if_ctx_t ctx)
 {
 
 	return (0);
 }
 
 static int
 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	int rid;
 
 	dev = sc->vmx_dev;
 
 	rid = PCIR_BAR(0);
 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->vmx_res0 == NULL) {
 		device_printf(dev,
 		    "could not map BAR0 memory\n");
 		return (ENXIO);
 	}
 
 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
 
 	rid = PCIR_BAR(1);
 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->vmx_res1 == NULL) {
 		device_printf(dev,
 		    "could not map BAR1 memory\n");
 		return (ENXIO);
 	}
 
 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
 
 	return (0);
 }
 
 static void
 vmxnet3_free_resources(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 
 	dev = sc->vmx_dev;
 
 	if (sc->vmx_res0 != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
 		sc->vmx_res0 = NULL;
 	}
 
 	if (sc->vmx_res1 != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
 		sc->vmx_res1 = NULL;
 	}
 }
 
 static int
 vmxnet3_check_version(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	uint32_t version;
 
 	dev = sc->vmx_dev;
 
 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
 	if ((version & 0x01) == 0) {
 		device_printf(dev, "unsupported hardware version %#x\n",
 		    version);
 		return (ENOTSUP);
 	}
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
 
 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
 	if ((version & 0x01) == 0) {
 		device_printf(dev, "unsupported UPT version %#x\n", version);
 		return (ENOTSUP);
 	}
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
 
 	return (0);
 }
 
 static void
 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
 {
 	if_softc_ctx_t scctx;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txq_shared *txs;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxq_shared *rxs;
 	int intr_idx;
 	int i;
 
 	scctx = sc->vmx_scctx;
 
 	/*
 	 * There is always one interrupt per receive queue, assigned
 	 * starting with the first interrupt.  When there is only one
 	 * interrupt available, the event interrupt shares the receive queue
 	 * interrupt, otherwise it uses the interrupt following the last
 	 * receive queue interrupt.  Transmit queues are not assigned
 	 * interrupts, so they are given indexes beyond the indexes that
 	 * correspond to the real interrupts.
 	 */
 
 	/* The event interrupt is always the last vector. */
 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
 
 	intr_idx = 0;
 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
 		rxq = &sc->vmx_rxq[i];
 		rxs = rxq->vxrxq_rs;
 		rxq->vxrxq_intr_idx = intr_idx;
 		rxs->intr_idx = rxq->vxrxq_intr_idx;
 	}
 
 	/*
 	 * Assign the tx queues interrupt indexes above what we are actually
 	 * using.  These interrupts will never be enabled.
 	 */
 	intr_idx = scctx->isc_vectors;
 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
 		txq = &sc->vmx_txq[i];
 		txs = txq->vxtxq_ts;
 		txq->vxtxq_intr_idx = intr_idx;
 		txs->intr_idx = txq->vxtxq_intr_idx;
 	}
 }
 
 static int
 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
 {
 	if_softc_ctx_t scctx;
 	int size;
 	int error;
 	
 	scctx = sc->vmx_scctx;
 
 	/*
 	 * The txq and rxq shared data areas must be allocated contiguously
 	 * as vmxnet3_driver_shared contains only a single address member
 	 * for the shared queue data area.
 	 */
 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
 	if (error) {
 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
 {
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_comp_ring *txc;
 	struct vmxnet3_txring *txr;
 	if_softc_ctx_t scctx;
 	
 	txq = &sc->vmx_txq[q];
 	txc = &txq->vxtxq_comp_ring;
 	txr = &txq->vxtxq_cmd_ring;
 	scctx = sc->vmx_scctx;
 
 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
 	    device_get_nameunit(sc->vmx_dev), q);
 
 	txq->vxtxq_sc = sc;
 	txq->vxtxq_id = q;
 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
 }
 
 static int
 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
     int ntxqs, int ntxqsets)
 {
 	struct vmxnet3_softc *sc;
 	int q;
 	int error;
 	caddr_t kva;
 	
 	sc = iflib_get_softc(ctx);
 
 	/* Allocate the array of transmit queues */
 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (sc->vmx_txq == NULL)
 		return (ENOMEM);
 
 	/* Initialize driver state for each transmit queue */
 	for (q = 0; q < ntxqsets; q++)
 		vmxnet3_init_txq(sc, q);
 
 	/*
 	 * Allocate queue state that is shared with the device.  This check
 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
 	 * order iflib invokes those routines in.
 	 */
 	if (sc->vmx_qs_dma.idi_size == 0) {
 		error = vmxnet3_queues_shared_alloc(sc);
 		if (error)
 			return (error);
 	}
 
 	kva = sc->vmx_qs_dma.idi_vaddr;
 	for (q = 0; q < ntxqsets; q++) {
 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
 		kva += sizeof(struct vmxnet3_txq_shared);
 	}
 
 	/* Record descriptor ring vaddrs and paddrs */
 	for (q = 0; q < ntxqsets; q++) {
 		struct vmxnet3_txqueue *txq;
 		struct vmxnet3_txring *txr;
 		struct vmxnet3_comp_ring *txc;
 
 		txq = &sc->vmx_txq[q];
 		txc = &txq->vxtxq_comp_ring;
 		txr = &txq->vxtxq_cmd_ring;
 
 		/* Completion ring */
 		txc->vxcr_u.txcd =
 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
 
 		/* Command ring */
 		txr->vxtxr_txd =
 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
 {
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_comp_ring *rxc;
 	struct vmxnet3_rxring *rxr;
 	if_softc_ctx_t scctx;
 	int i;
 
 	rxq = &sc->vmx_rxq[q];
 	rxc = &rxq->vxrxq_comp_ring;
 	scctx = sc->vmx_scctx;
 
 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
 	    device_get_nameunit(sc->vmx_dev), q);
 
 	rxq->vxrxq_sc = sc;
 	rxq->vxrxq_id = q;
 
 	/*
 	 * First rxq is the completion queue, so there are nrxqs - 1 command
 	 * rings starting at iflib queue id 1.
 	 */
 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
 	for (i = 0; i < nrxqs - 1; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
 	}
 }
 
 static int
 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
     int nrxqs, int nrxqsets)
 {
 	struct vmxnet3_softc *sc;
 	if_softc_ctx_t scctx;
 	int q;
 	int i;
 	int error;
 	caddr_t kva;
 	
 	sc = iflib_get_softc(ctx);
 	scctx = sc->vmx_scctx;
 
 	/* Allocate the array of receive queues */
 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (sc->vmx_rxq == NULL)
 		return (ENOMEM);
 
 	/* Initialize driver state for each receive queue */
 	for (q = 0; q < nrxqsets; q++)
 		vmxnet3_init_rxq(sc, q, nrxqs);
 
 	/*
 	 * Allocate queue state that is shared with the device.  This check
 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
 	 * order iflib invokes those routines in.
 	 */
 	if (sc->vmx_qs_dma.idi_size == 0) {
 		error = vmxnet3_queues_shared_alloc(sc);
 		if (error)
 			return (error);
 	}
 
 	kva = sc->vmx_qs_dma.idi_vaddr +
 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
 	for (q = 0; q < nrxqsets; q++) {
 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
 		kva += sizeof(struct vmxnet3_rxq_shared);
 	}
 
 	/* Record descriptor ring vaddrs and paddrs */
 	for (q = 0; q < nrxqsets; q++) {
 		struct vmxnet3_rxqueue *rxq;
 		struct vmxnet3_rxring *rxr;
 		struct vmxnet3_comp_ring *rxc;
 
 		rxq = &sc->vmx_rxq[q];
 		rxc = &rxq->vxrxq_comp_ring;
 
 		/* Completion ring */
 		rxc->vxcr_u.rxcd =
 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
 
 		/* Command ring(s) */
 		for (i = 0; i < nrxqs - 1; i++) {
 			rxr = &rxq->vxrxq_cmd_ring[i];
 
 			rxr->vxrxr_rxd =
 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
 		}
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_queues_free(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = iflib_get_softc(ctx);
 
 	/* Free queue state area that is shared with the device */
 	if (sc->vmx_qs_dma.idi_size != 0) {
 		iflib_dma_free(&sc->vmx_qs_dma);
 		sc->vmx_qs_dma.idi_size = 0;
 	}
 
 	/* Free array of receive queues */
 	if (sc->vmx_rxq != NULL) {
 		free(sc->vmx_rxq, M_DEVBUF);
 		sc->vmx_rxq = NULL;
 	}
 
 	/* Free array of transmit queues */
 	if (sc->vmx_txq != NULL) {
 		free(sc->vmx_txq, M_DEVBUF);
 		sc->vmx_txq = NULL;
 	}
 }
 
 static int
 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	size_t size;
 	int error;
 
 	dev = sc->vmx_dev;
 
 	/* Top level state structure shared with the device */
 	size = sizeof(struct vmxnet3_driver_shared);
 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
 	if (error) {
 		device_printf(dev, "cannot alloc shared memory\n");
 		return (error);
 	}
 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
 
 	/* RSS table state shared with the device */
 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
 		size = sizeof(struct vmxnet3_rss_shared);
 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
 		    &sc->vmx_rss_dma, 0);
 		if (error) {
 			device_printf(dev, "cannot alloc rss shared memory\n");
 			return (error);
 		}
 		sc->vmx_rss =
 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
 {
 
 	/* Free RSS table state shared with the device */
 	if (sc->vmx_rss != NULL) {
 		iflib_dma_free(&sc->vmx_rss_dma);
 		sc->vmx_rss = NULL;
 	}
 
 	/* Free top level state structure shared with the device */
 	if (sc->vmx_ds != NULL) {
 		iflib_dma_free(&sc->vmx_ds_dma);
 		sc->vmx_ds = NULL;
 	}
 }
 
 static int
 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
 {
 	int error;
 
 	/* Multicast table state shared with the device */
 	error = iflib_dma_alloc_align(sc->vmx_ctx,
 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
 	if (error)
 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
 	else
 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
 
 	return (error);
 }
 
 static void
 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
 {
 
 	/* Free multicast table state shared with the device */
 	if (sc->vmx_mcast != NULL) {
 		iflib_dma_free(&sc->vmx_mcast_dma);
 		sc->vmx_mcast = NULL;
 	}
 }
 
 static void
 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
 {
 	struct vmxnet3_driver_shared *ds;
 	if_shared_ctx_t sctx;
 	if_softc_ctx_t scctx;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txq_shared *txs;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxq_shared *rxs;
 	int i;
 
 	ds = sc->vmx_ds;
 	sctx = sc->vmx_sctx;
 	scctx = sc->vmx_scctx;
 
 	/*
 	 * Initialize fields of the shared data that remains the same across
 	 * reinits. Note the shared data is zero'd when allocated.
 	 */
 
 	ds->magic = VMXNET3_REV1_MAGIC;
 
 	/* DriverInfo */
 	ds->version = VMXNET3_DRIVER_VERSION;
 	ds->guest = VMXNET3_GOS_FREEBSD |
 #ifdef __LP64__
 	    VMXNET3_GOS_64BIT;
 #else
 	    VMXNET3_GOS_32BIT;
 #endif
 	ds->vmxnet3_revision = 1;
 	ds->upt_version = 1;
 
 	/* Misc. conf */
 	ds->driver_data = vtophys(sc);
 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
 
 	/* RSS conf */
 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
 		ds->rss.version = 1;
 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
 		ds->rss.len = sc->vmx_rss_dma.idi_size;
 	}
 
 	/* Interrupt control. */
 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
 	/*
 	 * Total number of interrupt indexes we are using in the shared
 	 * config data, even though we don't actually allocate interrupt
 	 * resources for the tx queues.  Some versions of the device will
 	 * fail to initialize successfully if interrupt indexes are used in
 	 * the shared config that exceed the number of interrupts configured
 	 * here.
 	 */
 	ds->nintr = (scctx->isc_vectors == 1) ?
 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
 	ds->evintr = sc->vmx_event_intr_idx;
 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
 
 	for (i = 0; i < ds->nintr; i++)
 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
 
 	/* Receive filter. */
 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
 
 	/* Tx queues */
 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
 		txq = &sc->vmx_txq[i];
 		txs = txq->vxtxq_ts;
 
 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
 		txs->driver_data = vtophys(txq);
 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
 	}
 
 	/* Rx queues */
 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
 		rxq = &sc->vmx_rxq[i];
 		rxs = rxq->vxrxq_rs;
 
 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
 		rxs->driver_data = vtophys(rxq);
 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
 	}
 }
 
 static void
 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
 {
 	/*
 	 * Use the same key as the Linux driver until FreeBSD can do
 	 * RSS (presumably Toeplitz) in software.
 	 */
 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
 	};
 
 	struct vmxnet3_driver_shared *ds;
 	if_softc_ctx_t scctx;
 	struct vmxnet3_rss_shared *rss;
+#ifdef RSS
+	uint8_t rss_algo;
+#endif
 	int i;
-	
+
 	ds = sc->vmx_ds;
 	scctx = sc->vmx_scctx;
 	rss = sc->vmx_rss;
 
 	rss->hash_type =
 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
-	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
-
-	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
-		rss->ind_table[i] = i % scctx->isc_nrxqsets;
+#ifdef RSS
+	/*
+	 * If the software RSS is configured to anything else other than
+	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
+	 * the packet distribution, but report the hash as opaque to
+	 * disengage from the software RSS.
+	 */
+	rss_algo = rss_gethashalgo();
+	if (rss_algo == RSS_HASH_TOEPLITZ) {
+		rss_getkey(rss->hash_key);
+		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
+			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
+			    scctx->isc_nrxqsets;
+		}
+		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
+	} else
+#endif
+	{
+		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
+		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
+			rss->ind_table[i] = i % scctx->isc_nrxqsets;
+		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
+	}
 }
 
 static void
 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 	struct vmxnet3_driver_shared *ds;
 	if_softc_ctx_t scctx;
 	
 	ifp = sc->vmx_ifp;
 	ds = sc->vmx_ds;
 	scctx = sc->vmx_scctx;
 	
 	ds->mtu = ifp->if_mtu;
 	ds->ntxqueue = scctx->isc_ntxqsets;
 	ds->nrxqueue = scctx->isc_nrxqsets;
 
 	ds->upt_features = 0;
 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
 		ds->upt_features |= UPT1_F_CSUM;
 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
 		ds->upt_features |= UPT1_F_VLAN;
 	if (ifp->if_capenable & IFCAP_LRO)
 		ds->upt_features |= UPT1_F_LRO;
 
 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
 		ds->upt_features |= UPT1_F_RSS;
 		vmxnet3_reinit_rss_shared_data(sc);
 	}
 
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
 }
 
 static int
 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
 {
 	int error;
 
 	error = vmxnet3_alloc_shared_data(sc);
 	if (error)
 		return (error);
 
 	error = vmxnet3_alloc_mcast_table(sc);
 	if (error)
 		return (error);
 
 	vmxnet3_init_shared_data(sc);
 
 	return (0);
 }
 
 static void
 vmxnet3_free_data(struct vmxnet3_softc *sc)
 {
 
 	vmxnet3_free_mcast_table(sc);
 	vmxnet3_free_shared_data(sc);
 }
 
 static void
 vmxnet3_evintr(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct vmxnet3_txq_shared *ts;
 	struct vmxnet3_rxq_shared *rs;
 	uint32_t event;
 
 	dev = sc->vmx_dev;
 
 	/* Clear events. */
 	event = sc->vmx_ds->event;
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
 
 	if (event & VMXNET3_EVENT_LINK)
 		vmxnet3_link_status(sc);
 
 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
 		ts = sc->vmx_txq[0].vxtxq_ts;
 		if (ts->stopped != 0)
 			device_printf(dev, "Tx queue error %#x\n", ts->error);
 		rs = sc->vmx_rxq[0].vxrxq_rs;
 		if (rs->stopped != 0)
 			device_printf(dev, "Rx queue error %#x\n", rs->error);
 
 		/* XXX - rely on liflib watchdog to reset us? */
 		device_printf(dev, "Rx/Tx queue error event ... "
 		    "waiting for iflib watchdog reset\n");
 	}
 
 	if (event & VMXNET3_EVENT_DIC)
 		device_printf(dev, "device implementation change event\n");
 	if (event & VMXNET3_EVENT_DEBUG)
 		device_printf(dev, "debug event\n");
 }
 
 static int
 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_txdesc *txd, *sop;
 	bus_dma_segment_t *segs;
 	int nsegs;
 	int pidx;
 	int hdrlen;
 	int i;
 	int gen;
 
 	sc = vsc;
 	txq = &sc->vmx_txq[pi->ipi_qsidx];
 	txr = &txq->vxtxq_cmd_ring;
 	segs = pi->ipi_segs;
 	nsegs = pi->ipi_nsegs;
 	pidx = pi->ipi_pidx;
 
 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
 	    ("%s: packet with too many segments %d", __func__, nsegs));
 
 	sop = &txr->vxtxr_txd[pidx];
 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
 
 	for (i = 0; i < nsegs; i++) {
 		txd = &txr->vxtxr_txd[pidx];
 
 		txd->addr = segs[i].ds_addr;
 		txd->len = segs[i].ds_len;
 		txd->gen = gen;
 		txd->dtype = 0;
 		txd->offload_mode = VMXNET3_OM_NONE;
 		txd->offload_pos = 0;
 		txd->hlen = 0;
 		txd->eop = 0;
 		txd->compreq = 0;
 		txd->vtag_mode = 0;
 		txd->vtag = 0;
 
 		if (++pidx == txr->vxtxr_ndesc) {
 			pidx = 0;
 			txr->vxtxr_gen ^= 1;
 		}
 		gen = txr->vxtxr_gen;
 	}
 	txd->eop = 1;
 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
 	pi->ipi_new_pidx = pidx;
 
 	/*
 	 * VLAN
 	 */
 	if (pi->ipi_mflags & M_VLANTAG) {
 		sop->vtag_mode = 1;
 		sop->vtag = pi->ipi_vtag;
 	}
 
 	/*
 	 * TSO and checksum offloads
 	 */
 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
 	if (pi->ipi_csum_flags & CSUM_TSO) {
 		sop->offload_mode = VMXNET3_OM_TSO;
 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
 		sop->offload_pos = pi->ipi_tso_segsz;
 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
 		sop->offload_mode = VMXNET3_OM_CSUM;
 		sop->hlen = hdrlen;
 		sop->offload_pos = hdrlen +
 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
 			offsetof(struct tcphdr, th_sum) :
 			offsetof(struct udphdr, uh_sum));
 	}
 
 	/* Finally, change the ownership. */
 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
 	sop->gen ^= 1;
 
 	return (0);
 }
 
 static void
 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 
 	sc = vsc;
 	txq = &sc->vmx_txq[txqid];
 
 	/*
 	 * pidx is what we last set ipi_new_pidx to in
 	 * vmxnet3_isc_txd_encap()
 	 */
 
 	/*
 	 * Avoid expensive register updates if the flush request is
 	 * redundant.
 	 */
 	if (txq->vxtxq_last_flush == pidx)
 		return;
 	txq->vxtxq_last_flush = pidx;
 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
 }
 
 static int
 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_txqueue *txq;
 	struct vmxnet3_comp_ring *txc;
 	struct vmxnet3_txcompdesc *txcd;
 	struct vmxnet3_txring *txr;
 	int processed;
 	
 	sc = vsc;
 	txq = &sc->vmx_txq[txqid];
 	txc = &txq->vxtxq_comp_ring;
 	txr = &txq->vxtxq_cmd_ring;
 
 	/*
 	 * If clear is true, we need to report the number of TX command ring
 	 * descriptors that have been processed by the device.  If clear is
 	 * false, we just need to report whether or not at least one TX
 	 * command ring descriptor has been processed by the device.
 	 */
 	processed = 0;
 	for (;;) {
 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
 		if (txcd->gen != txc->vxcr_gen)
 			break;
 		else if (!clear)
 			return (1);
 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
 
 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
 			txc->vxcr_next = 0;
 			txc->vxcr_gen ^= 1;
 		}
 
 		if (txcd->eop_idx < txr->vxtxr_next)
 			processed += txr->vxtxr_ndesc -
 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
 		else
 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
 	}
 
 	return (processed);
 }
 
 static int
 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_comp_ring *rxc;
 	struct vmxnet3_rxcompdesc *rxcd;
 	int avail;
 	int completed_gen;
 #ifdef INVARIANTS
 	int expect_sop = 1;
 #endif	
 	sc = vsc;
 	rxq = &sc->vmx_rxq[rxqid];
 	rxc = &rxq->vxrxq_comp_ring;
 
 	avail = 0;
 	completed_gen = rxc->vxcr_gen;
 	for (;;) {
 		rxcd = &rxc->vxcr_u.rxcd[idx];
 		if (rxcd->gen != completed_gen)
 			break;
 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
 
 #ifdef INVARIANTS
 		if (expect_sop)
 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
 		else
 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
 		expect_sop = rxcd->eop;
 #endif
 		if (rxcd->eop && (rxcd->len != 0))
 			avail++;
 		if (avail > budget)
 			break;
 		if (++idx == rxc->vxcr_ndesc) {
 			idx = 0;
 			completed_gen ^= 1;
 		}
 	}
 	
 	return (avail);
 }
 
 static int
 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
 {
 	struct vmxnet3_softc *sc;
 	if_softc_ctx_t scctx;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_comp_ring *rxc;
 	struct vmxnet3_rxcompdesc *rxcd;
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_rxdesc *rxd;
 	if_rxd_frag_t frag;
 	int cqidx;
 	uint16_t total_len;
 	uint8_t nfrags;
 	uint8_t flid;
 
 	sc = vsc;
 	scctx = sc->vmx_scctx;
 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
 	rxc = &rxq->vxrxq_comp_ring;
 
 	/*
 	 * Get a single packet starting at the given index in the completion
 	 * queue.  That we have been called indicates that
 	 * vmxnet3_isc_rxd_available() has already verified that either
 	 * there is a complete packet available starting at the given index,
 	 * or there are one or more zero length packets starting at the
 	 * given index followed by a complete packet, so no verification of
 	 * ownership of the descriptors (and no associated read barrier) is
 	 * required here.
 	 */
 	cqidx = ri->iri_cidx;
 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
 	while (rxcd->len == 0) {
 		KASSERT(rxcd->sop && rxcd->eop,
 		    ("%s: zero-length packet without both sop and eop set",
 			__func__));
 		if (++cqidx == rxc->vxcr_ndesc) {
 			cqidx = 0;
 			rxc->vxcr_gen ^= 1;
 		}
 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
 	}
 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
 
 	/*
-	 * RSS and flow ID
+	 * RSS and flow ID.
+	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
+	 * be used only if the software RSS is enabled and it uses the same
+	 * algorithm and the hash key as the "hardware".  If the software RSS
+	 * is not enabled, then it's simply pointless to use those types.
+	 * If it's enabled but with different parameters, then hash values will
+	 * not match.
 	 */
 	ri->iri_flowid = rxcd->rss_hash;
-	switch (rxcd->rss_type) {
-	case VMXNET3_RCD_RSS_TYPE_NONE:
-		ri->iri_flowid = ri->iri_qsidx;
-		ri->iri_rsstype = M_HASHTYPE_NONE;
-		break;
-	case VMXNET3_RCD_RSS_TYPE_IPV4:
-		ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
-		break;
-	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
-		ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
-		break;
-	case VMXNET3_RCD_RSS_TYPE_IPV6:
-		ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
-		break;
-	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
-		ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
-		break;
-	default:
-		ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
-		break;
+#ifdef RSS
+	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
+		switch (rxcd->rss_type) {
+		case VMXNET3_RCD_RSS_TYPE_NONE:
+			ri->iri_flowid = ri->iri_qsidx;
+			ri->iri_rsstype = M_HASHTYPE_NONE;
+			break;
+		case VMXNET3_RCD_RSS_TYPE_IPV4:
+			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
+			break;
+		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
+			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
+			break;
+		case VMXNET3_RCD_RSS_TYPE_IPV6:
+			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
+			break;
+		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
+			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
+			break;
+		default:
+			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
+			break;
+		}
+	} else
+#endif
+	{
+		switch (rxcd->rss_type) {
+		case VMXNET3_RCD_RSS_TYPE_NONE:
+			ri->iri_flowid = ri->iri_qsidx;
+			ri->iri_rsstype = M_HASHTYPE_NONE;
+			break;
+		default:
+			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
+			break;
+		}
 	}
 
 	/* VLAN */
 	if (rxcd->vlan) {
 		ri->iri_flags |= M_VLANTAG;
 		ri->iri_vtag = rxcd->vtag;
 	}
 
 	/* Checksum offload */
 	if (!rxcd->no_csum) {
 		uint32_t csum_flags = 0;
 
 		if (rxcd->ipv4) {
 			csum_flags |= CSUM_IP_CHECKED;
 			if (rxcd->ipcsum_ok)
 				csum_flags |= CSUM_IP_VALID;
 		}
 		if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
 			csum_flags |= CSUM_L4_CALC;
 			if (rxcd->csum_ok) {
 				csum_flags |= CSUM_L4_VALID;
 				ri->iri_csum_data = 0xffff;
 			}
 		}
 		ri->iri_csum_flags = csum_flags;
 	}
 
 	/*
 	 * The queue numbering scheme used for rxcd->qid is as follows:
 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
 	 *
 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
 	 * indicates command ring (and flid) 1.
 	 */
 	nfrags = 0;
 	total_len = 0;
 	do {
 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
 		KASSERT(rxcd->gen == rxc->vxcr_gen,
 		    ("%s: generation mismatch", __func__));
 		flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
 		rxr = &rxq->vxrxq_cmd_ring[flid];
 		rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
 
 		frag = &ri->iri_frags[nfrags];
 		frag->irf_flid = flid;
 		frag->irf_idx = rxcd->rxd_idx;
 		frag->irf_len = rxcd->len;
 		total_len += rxcd->len;
 		nfrags++;
 		if (++cqidx == rxc->vxcr_ndesc) {
 			cqidx = 0;
 			rxc->vxcr_gen ^= 1;
 		}
 	} while (!rxcd->eop);
 
 	ri->iri_cidx = cqidx;
 	ri->iri_nfrags = nfrags;
 	ri->iri_len = total_len;
 
 	return (0);
 }
 
 static void
 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_rxdesc *rxd;
 	uint64_t *paddrs;
 	int count;
 	int len;
 	int pidx;
 	int i;
 	uint8_t flid;
 	uint8_t btype;
 
 	count = iru->iru_count;
 	len = iru->iru_buf_size;
 	pidx = iru->iru_pidx;
 	flid = iru->iru_flidx;
 	paddrs = iru->iru_paddrs;
 
 	sc = vsc;
 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
 	rxr = &rxq->vxrxq_cmd_ring[flid];
 	rxd = rxr->vxrxr_rxd;
 
 	/*
 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
 	 * command ring 1 is filled with BTYPE_BODY descriptors.
 	 */
 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
 	for (i = 0; i < count; i++) {
 		rxd[pidx].addr = paddrs[i];
 		rxd[pidx].len = len;
 		rxd[pidx].btype = btype;
 		rxd[pidx].gen = rxr->vxrxr_gen;
 
 		if (++pidx == rxr->vxrxr_ndesc) {
 			pidx = 0;
 			rxr->vxrxr_gen ^= 1;
 		}
 	}
 }
 
 static void
 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_rxqueue *rxq;
 	struct vmxnet3_rxring *rxr;
 	bus_size_t r;
 	
 	sc = vsc;
 	rxq = &sc->vmx_rxq[rxqid];
 	rxr = &rxq->vxrxq_cmd_ring[flid];
 
 	if (flid == 0)
 		r = VMXNET3_BAR0_RXH1(rxqid);
 	else
 		r = VMXNET3_BAR0_RXH2(rxqid);
 
 	/*
 	 * pidx is the index of the last descriptor with a buffer the device
 	 * can use, and the device needs to be told which index is one past
 	 * that.
 	 */
 	if (++pidx == rxr->vxrxr_ndesc)
 		pidx = 0;
 	vmxnet3_write_bar0(sc, r, pidx);
 }
 
 static int
 vmxnet3_legacy_intr(void *xsc)
 {
 	struct vmxnet3_softc *sc;
 	if_softc_ctx_t scctx;
 	if_ctx_t ctx;
 	
 	sc = xsc;
 	scctx = sc->vmx_scctx;
 	ctx = sc->vmx_ctx;
 
 	/*
 	 * When there is only a single interrupt configured, this routine
 	 * runs in fast interrupt context, following which the rxq 0 task
 	 * will be enqueued.
 	 */
 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
 			return (FILTER_HANDLED);
 	}
 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
 		vmxnet3_intr_disable_all(ctx);
 
 	if (sc->vmx_ds->event != 0)
 		iflib_admin_intr_deferred(ctx);
 
 	/*
 	 * XXX - When there is both rxq and event activity, do we care
 	 * whether the rxq 0 task or the admin task re-enables the interrupt
 	 * first?
 	 */
 	return (FILTER_SCHEDULE_THREAD);
 }
 
 static int
 vmxnet3_rxq_intr(void *vrxq)
 {
 	struct vmxnet3_softc *sc;
 	struct vmxnet3_rxqueue *rxq;
 
 	rxq = vrxq;
 	sc = rxq->vxrxq_sc;
 
 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
 
 	return (FILTER_SCHEDULE_THREAD);
 }
 
 static int
 vmxnet3_event_intr(void *vsc)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = vsc;
 
 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
 
 	/*
 	 * The work will be done via vmxnet3_update_admin_status(), and the
 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
 	 *
 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
 	 */
 	return (FILTER_SCHEDULE_THREAD);
 }
 
 static void
 vmxnet3_stop(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = iflib_get_softc(ctx);
 
 	sc->vmx_link_active = 0;
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
 }
 
 static void
 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
 {
 	struct vmxnet3_txring *txr;
 	struct vmxnet3_comp_ring *txc;
 
 	txq->vxtxq_last_flush = -1;
 	
 	txr = &txq->vxtxq_cmd_ring;
 	txr->vxtxr_next = 0;
 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
 	/*
 	 * iflib has zeroed out the descriptor array during the prior attach
 	 * or stop
 	 */
 
 	txc = &txq->vxtxq_comp_ring;
 	txc->vxcr_next = 0;
 	txc->vxcr_gen = VMXNET3_INIT_GEN;
 	/*
 	 * iflib has zeroed out the descriptor array during the prior attach
 	 * or stop
 	 */
 }
 
 static void
 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
 {
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_comp_ring *rxc;
 	int i;
 
 	/*
 	 * The descriptors will be populated with buffers during a
 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
 	 */
 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
 		/*
 		 * iflib has zeroed out the descriptor array during the
 		 * prior attach or stop
 		 */
 	}
 
 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_gen = 0;
 		bzero(rxr->vxrxr_rxd,
 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
 	}
 
 	rxc = &rxq->vxrxq_comp_ring;
 	rxc->vxcr_next = 0;
 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
 	/*
 	 * iflib has zeroed out the descriptor array during the prior attach
 	 * or stop
 	 */
 }
 
 static void
 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
 {
 	if_softc_ctx_t scctx;
 	int q;
 
 	scctx = sc->vmx_scctx;
 
 	for (q = 0; q < scctx->isc_ntxqsets; q++)
 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
 
 	for (q = 0; q < scctx->isc_nrxqsets; q++)
 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
 }
 
 static int
 vmxnet3_enable_device(struct vmxnet3_softc *sc)
 {
 	if_softc_ctx_t scctx;
 	int q;
 
 	scctx = sc->vmx_scctx;
 
 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
 		device_printf(sc->vmx_dev, "device enable command failed!\n");
 		return (1);
 	}
 
 	/* Reset the Rx queue heads. */
 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
 	}
 
 	return (0);
 }
 
 static void
 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp;
 
 	ifp = sc->vmx_ifp;
 
 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
 		    sizeof(sc->vmx_ds->vlan_filter));
 	else
 		bzero(sc->vmx_ds->vlan_filter,
 		    sizeof(sc->vmx_ds->vlan_filter));
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
 }
 
 static void
 vmxnet3_init(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 	if_softc_ctx_t scctx;
 	
 	sc = iflib_get_softc(ctx);
 	scctx = sc->vmx_scctx;
 
 	scctx->isc_max_frame_size = if_getmtu(iflib_get_ifp(ctx)) +
 	    ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
 
 	/* Use the current MAC address. */
 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
 	vmxnet3_set_lladdr(sc);
 
 	vmxnet3_reinit_shared_data(sc);
 	vmxnet3_reinit_queues(sc);
 
 	vmxnet3_enable_device(sc);
 
 	vmxnet3_reinit_rxfilters(sc);
 	vmxnet3_link_status(sc);
 }
 
 static void
 vmxnet3_multi_set(if_ctx_t ctx)
 {
 
 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
 	    if_getflags(iflib_get_ifp(ctx)));
 }
 
 static int
 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
 {
 
 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
 		ETHER_CRC_LEN))
 		return (EINVAL);
 
 	return (0);
 }
 
 static void
 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = iflib_get_softc(ctx);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (vmxnet3_link_is_up(sc) != 0) {
 		ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_active |= IFM_AUTO;
 	} else
 		ifmr->ifm_active |= IFM_NONE;
 }
 
 static int
 vmxnet3_media_change(if_ctx_t ctx)
 {
 
 	/* Ignore. */
 	return (0);
 }
 
 static int
 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
 {
 
 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
 
 	return (0);
 }
 
 static uint64_t
 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
 {
 	if_t ifp = iflib_get_ifp(ctx);
 
 	if (cnt < IFCOUNTERS)
 		return if_get_counter_default(ifp, cnt);
 
 	return (0);
 }
 
 static void
 vmxnet3_update_admin_status(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = iflib_get_softc(ctx);
 	if (sc->vmx_ds->event != 0)
 		vmxnet3_evintr(sc);
 
 	vmxnet3_refresh_host_stats(sc);
 }
 
 static void
 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
 {
 	/* Host stats refresh is global, so just trigger it on txq 0 */
 	if (qid == 0)
 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
 }
 
 static void
 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
 {
 	int idx, bit;
 
 	if (tag == 0 || tag > 4095)
 		return;
 
 	idx = (tag >> 5) & 0x7F;
 	bit = tag & 0x1F;
 
 	/* Update our private VLAN bitvector. */
 	if (add)
 		sc->vmx_vlan_filter[idx] |= (1 << bit);
 	else
 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
 }
 
 static void
 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
 {
 
 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
 }
 
 static void
 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
 {
 
 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
 }
 
 static u_int
 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
 {
 	struct vmxnet3_softc *sc = arg;
 
 	if (count < VMXNET3_MULTICAST_MAX)
 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
 		    ETHER_ADDR_LEN);
 
 	return (1);
 }
 
 static void
 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
 {
 	struct ifnet *ifp;
 	struct vmxnet3_driver_shared *ds;
 	u_int mode;
 
 	ifp = sc->vmx_ifp;
 	ds = sc->vmx_ds;
 
 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
 	if (flags & IFF_PROMISC)
 		mode |= VMXNET3_RXMODE_PROMISC;
 	if (flags & IFF_ALLMULTI)
 		mode |= VMXNET3_RXMODE_ALLMULTI;
 	else {
 		int cnt;
 
 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
 		if (cnt >= VMXNET3_MULTICAST_MAX) {
 			cnt = 0;
 			mode |= VMXNET3_RXMODE_ALLMULTI;
 		} else if (cnt > 0)
 			mode |= VMXNET3_RXMODE_MCAST;
 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
 	}
 
 	ds->rxmode = mode;
 
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
 }
 
 static void
 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
 {
 
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
 }
 
 static int
 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
 {
 	uint32_t status;
 
 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
 	return !!(status & 0x1);
 }
 
 static void
 vmxnet3_link_status(struct vmxnet3_softc *sc)
 {
 	if_ctx_t ctx;
 	uint64_t speed;
 	int link;
 
 	ctx = sc->vmx_ctx;
 	link = vmxnet3_link_is_up(sc);
 	speed = IF_Gbps(10);
 	
 	if (link != 0 && sc->vmx_link_active == 0) {
 		sc->vmx_link_active = 1;
 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
 	} else if (link == 0 && sc->vmx_link_active != 0) {
 		sc->vmx_link_active = 0;
 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
 	}
 }
 
 static void
 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
 {
 	uint32_t ml, mh;
 
 	ml  = sc->vmx_lladdr[0];
 	ml |= sc->vmx_lladdr[1] << 8;
 	ml |= sc->vmx_lladdr[2] << 16;
 	ml |= sc->vmx_lladdr[3] << 24;
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
 
 	mh  = sc->vmx_lladdr[4];
 	mh |= sc->vmx_lladdr[5] << 8;
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
 }
 
 static void
 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
 {
 	uint32_t ml, mh;
 
 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
 
 	sc->vmx_lladdr[0] = ml;
 	sc->vmx_lladdr[1] = ml >> 8;
 	sc->vmx_lladdr[2] = ml >> 16;
 	sc->vmx_lladdr[3] = ml >> 24;
 	sc->vmx_lladdr[4] = mh;
 	sc->vmx_lladdr[5] = mh >> 8;
 }
 
 static void
 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	struct sysctl_oid *node, *txsnode;
 	struct sysctl_oid_list *list, *txslist;
 	struct UPT1_TxStats *txstats;
 	char namebuf[16];
 
 	txstats = &txq->vxtxq_ts->stats;
 
 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
 	    NULL, "Transmit Queue");
 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
 
 	/*
 	 * Add statistics reported by the host. These are updated by the
 	 * iflib txq timer on txq 0.
 	 */
 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
 	    NULL, "Host Statistics");
 	txslist = SYSCTL_CHILDREN(txsnode);
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
 	    &txstats->TSO_packets, "TSO packets");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
 	    &txstats->TSO_bytes, "TSO bytes");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
 	    &txstats->ucast_packets, "Unicast packets");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
 	    &txstats->ucast_bytes, "Unicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
 	    &txstats->mcast_packets, "Multicast packets");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
 	    &txstats->mcast_bytes, "Multicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
 	    &txstats->error, "Errors");
 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
 	    &txstats->discard, "Discards");
 }
 
 static void
 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	struct sysctl_oid *node, *rxsnode;
 	struct sysctl_oid_list *list, *rxslist;
 	struct UPT1_RxStats *rxstats;
 	char namebuf[16];
 
 	rxstats = &rxq->vxrxq_rs->stats;
 
 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
 	    NULL, "Receive Queue");
 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
 
 	/*
 	 * Add statistics reported by the host. These are updated by the
 	 * iflib txq timer on txq 0.
 	 */
 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
 	    NULL, "Host Statistics");
 	rxslist = SYSCTL_CHILDREN(rxsnode);
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
 	    &rxstats->LRO_packets, "LRO packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
 	    &rxstats->LRO_bytes, "LRO bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
 	    &rxstats->ucast_packets, "Unicast packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
 	    &rxstats->ucast_bytes, "Unicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
 	    &rxstats->mcast_packets, "Multicast packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
 	    &rxstats->mcast_bytes, "Multicast bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
 	    &rxstats->bcast_packets, "Broadcast packets");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
 	    &rxstats->bcast_bytes, "Broadcast bytes");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
 	    &rxstats->nobuffer, "No buffer");
 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
 	    &rxstats->error, "Errors");
 }
 
 static void
 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	if_softc_ctx_t scctx;
 	struct sysctl_oid *node;
 	struct sysctl_oid_list *list;
 	int i;
 
 	scctx = sc->vmx_scctx;
 	
 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
 
 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
 		    "debug", CTLFLAG_RD, NULL, "");
 		list = SYSCTL_CHILDREN(node);
 
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
 	}
 
 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
 
 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
 		    "debug", CTLFLAG_RD, NULL, "");
 		list = SYSCTL_CHILDREN(node);
 
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
 	}
 }
 
 static void
 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
 {
 	if_softc_ctx_t scctx;
 	int i;
 
 	scctx = sc->vmx_scctx;
 	
 	for (i = 0; i < scctx->isc_ntxqsets; i++)
 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
 	for (i = 0; i < scctx->isc_nrxqsets; i++)
 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
 
 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
 }
 
 static void
 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
 {
 	device_t dev;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
 
 	dev = sc->vmx_dev;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
 }
 
 static void
 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
 {
 
 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
 }
 
 static uint32_t
 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
 {
 
 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
 }
 
 static void
 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
 {
 
 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
 }
 
 static void
 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
 {
 
 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
 }
 
 static uint32_t
 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
 {
 
 	vmxnet3_write_cmd(sc, cmd);
 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
 }
 
 static void
 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
 {
 
 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
 }
 
 static void
 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
 {
 
 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
 }
 
 static int
 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
 {
 	/* Not using interrupts for TX */
 	return (0);
 }
 
 static int
 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = iflib_get_softc(ctx);
 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
 	return (0);
 }
 
 static void
 vmxnet3_link_intr_enable(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 
 	sc = iflib_get_softc(ctx);
 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
 }
 
 static void
 vmxnet3_intr_enable_all(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 	if_softc_ctx_t scctx;
 	int i;
 
 	sc = iflib_get_softc(ctx);
 	scctx = sc->vmx_scctx;
 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
 	for (i = 0; i < scctx->isc_vectors; i++)
 		vmxnet3_enable_intr(sc, i);
 }
 
 static void
 vmxnet3_intr_disable_all(if_ctx_t ctx)
 {
 	struct vmxnet3_softc *sc;
 	int i;
 
 	sc = iflib_get_softc(ctx);
 	/*
 	 * iflib may invoke this routine before vmxnet3_attach_post() has
 	 * run, which is before the top level shared data area is
 	 * initialized and the device made aware of it.
 	 */
 	if (sc->vmx_ds != NULL)
 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
 		vmxnet3_disable_intr(sc, i);
 }
 
 /*
  * Since this is a purely paravirtualized device, we do not have
  * to worry about DMA coherency. But at times, we must make sure
  * both the compiler and CPU do not reorder memory operations.
  */
 static inline void
 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
 {
 
 	switch (type) {
 	case VMXNET3_BARRIER_RD:
 		rmb();
 		break;
 	case VMXNET3_BARRIER_WR:
 		wmb();
 		break;
 	case VMXNET3_BARRIER_RDWR:
 		mb();
 		break;
 	default:
 		panic("%s: bad barrier type %d", __func__, type);
 	}
 }
Index: head/sys/dev/vmware/vmxnet3/if_vmxvar.h
===================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmxvar.h	(revision 357041)
+++ head/sys/dev/vmware/vmxnet3/if_vmxvar.h	(revision 357042)
@@ -1,193 +1,195 @@
 /*-
  * Copyright (c) 2013 Tsubai Masanari
  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
  * Copyright (c) 2018 Patrick Kelsey
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #ifndef _IF_VMXVAR_H
 #define _IF_VMXVAR_H
 
 struct vmxnet3_softc;
 
 /*
  * The number of Rx/Tx queues this driver prefers.
  */
 #define VMXNET3_DEF_RX_QUEUES	8
 #define VMXNET3_DEF_TX_QUEUES	8
 
 /*
  * The number of Rx rings in each Rx queue.
  */
 #define VMXNET3_RXRINGS_PERQ	2
 
 /*
  * The number of descriptors in each Rx/Tx ring.
  */
 #define VMXNET3_DEF_TX_NDESC		512
 #define VMXNET3_MAX_TX_NDESC		4096
 #define VMXNET3_MIN_TX_NDESC		32
 #define VMXNET3_MASK_TX_NDESC		0x1F
 #define VMXNET3_DEF_RX_NDESC		256
 #define VMXNET3_MAX_RX_NDESC		2048
 #define VMXNET3_MIN_RX_NDESC		32
 #define VMXNET3_MASK_RX_NDESC		0x1F
 
 #define VMXNET3_MAX_TX_NCOMPDESC	VMXNET3_MAX_TX_NDESC
 #define VMXNET3_MAX_RX_NCOMPDESC \
     (VMXNET3_MAX_RX_NDESC * VMXNET3_RXRINGS_PERQ)
 
 struct vmxnet3_txring {
 	u_int			 vxtxr_next;
 	u_int			 vxtxr_ndesc;
 	int			 vxtxr_gen;
 	struct vmxnet3_txdesc	*vxtxr_txd;
 	bus_addr_t		 vxtxr_paddr;
 };
 
 struct vmxnet3_rxring {
 	struct vmxnet3_rxdesc	*vxrxr_rxd;
 	u_int			 vxrxr_ndesc;
 	int			 vxrxr_gen;
 	bus_addr_t		 vxrxr_paddr;
 };
 
 struct vmxnet3_comp_ring {
 	union {
 		struct vmxnet3_txcompdesc *txcd;
 		struct vmxnet3_rxcompdesc *rxcd;
 	}			 vxcr_u;
 	/*
 	 * vxcr_next is used on the transmit side to track the next index to
 	 * begin cleaning at.  It is not used on the receive side.
 	 */
 	u_int			 vxcr_next;
 	u_int			 vxcr_ndesc;
 	int			 vxcr_gen;
 	bus_addr_t		 vxcr_paddr;
 };
 
 struct vmxnet3_txqueue {
 	struct vmxnet3_softc		*vxtxq_sc;
 	int				 vxtxq_id;
 	int				 vxtxq_last_flush;
 	int				 vxtxq_intr_idx;
 	struct vmxnet3_txring		 vxtxq_cmd_ring;
 	struct vmxnet3_comp_ring	 vxtxq_comp_ring;
 	struct vmxnet3_txq_shared	*vxtxq_ts;
 	struct sysctl_oid_list		*vxtxq_sysctl;
 	char				 vxtxq_name[16];
 } __aligned(CACHE_LINE_SIZE);
 
 struct vmxnet3_rxqueue {
 	struct vmxnet3_softc		*vxrxq_sc;
 	int				 vxrxq_id;
 	int				 vxrxq_intr_idx;
 	struct if_irq			 vxrxq_irq;
 	struct vmxnet3_rxring		 vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ];
 	struct vmxnet3_comp_ring	 vxrxq_comp_ring;
 	struct vmxnet3_rxq_shared	*vxrxq_rs;
 	struct sysctl_oid_list		*vxrxq_sysctl;
 	char				 vxrxq_name[16];
 } __aligned(CACHE_LINE_SIZE);
 
 struct vmxnet3_softc {
 	device_t			 vmx_dev;
 	if_ctx_t			 vmx_ctx;
 	if_shared_ctx_t			 vmx_sctx;
 	if_softc_ctx_t			 vmx_scctx;
 	struct ifnet			*vmx_ifp;
 	struct vmxnet3_driver_shared	*vmx_ds;
 	uint32_t			 vmx_flags;
 #define VMXNET3_FLAG_RSS	0x0002
+#define VMXNET3_FLAG_SOFT_RSS	0x0004		/* Software RSS is enabled with
+						   compatible algorithm. */
 
 	struct vmxnet3_rxqueue		*vmx_rxq;
 	struct vmxnet3_txqueue		*vmx_txq;
 
 	struct resource			*vmx_res0;
 	bus_space_tag_t			 vmx_iot0;
 	bus_space_handle_t		 vmx_ioh0;
 	struct resource			*vmx_res1;
 	bus_space_tag_t			 vmx_iot1;
 	bus_space_handle_t		 vmx_ioh1;
 
 	int				 vmx_link_active;
 
 	int				 vmx_intr_mask_mode;
 	int				 vmx_event_intr_idx;
 	struct if_irq			 vmx_event_intr_irq;
 
 	uint8_t				*vmx_mcast;
 	struct vmxnet3_rss_shared	*vmx_rss;
 	struct iflib_dma_info		 vmx_ds_dma;
 	struct iflib_dma_info		 vmx_qs_dma;
 	struct iflib_dma_info		 vmx_mcast_dma;
 	struct iflib_dma_info		 vmx_rss_dma;
 	struct ifmedia			*vmx_media;
 	uint32_t			 vmx_vlan_filter[4096/32];
 	uint8_t				 vmx_lladdr[ETHER_ADDR_LEN];
 };
 
 /*
  * Our driver version we report to the hypervisor; we just keep
  * this value constant.
  */
 #define VMXNET3_DRIVER_VERSION 0x00010000
 
 /*
  * Max descriptors per Tx packet. We must limit the size of the
  * any TSO packets based on the number of segments.
  */
 #define VMXNET3_TX_MAXSEGS		32  /* 64K @ 2K segment size */
 #define VMXNET3_TX_MAXSIZE		(VMXNET3_TX_MAXSEGS * MCLBYTES)
 #define VMXNET3_TSO_MAXSIZE		(VMXNET3_TX_MAXSIZE - ETHER_VLAN_ENCAP_LEN) 
 
 /*
  * Maximum supported Tx segment size. The length field in the
  * Tx descriptor is 14 bits.
  *
  * XXX It's possible a descriptor length field of 0 means 2^14, but this
  * isn't confirmed, so limit to 2^14 - 1 for now.
  */
 #define VMXNET3_TX_MAXSEGSIZE		((1 << 14) - 1)
 
 /*
  * Maximum supported Rx segment size. The length field in the
  * Rx descriptor is 14 bits.
  *
  * The reference drivers skip zero-length descriptors, which seems to be a
  * strong indication that on the receive side, a descriptor length field of
  * zero does not mean 2^14.
  */
 #define VMXNET3_RX_MAXSEGSIZE		((1 << 14) - 1)
 
 /*
  * Predetermined size of the multicast MACs filter table. If the
  * number of multicast addresses exceeds this size, then the
  * ALL_MULTI mode is use instead.
  */
 #define VMXNET3_MULTICAST_MAX		32
 
 /*
  * IP protocols that we can perform Tx checksum offloading of.
  */
 #define VMXNET3_CSUM_OFFLOAD		(CSUM_TCP | CSUM_UDP)
 #define VMXNET3_CSUM_OFFLOAD_IPV6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
 
 #define VMXNET3_CSUM_ALL_OFFLOAD	\
     (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | CSUM_TSO)
 
 #endif /* _IF_VMXVAR_H */
Index: head/sys/modules/vmware/vmxnet3/Makefile
===================================================================
--- head/sys/modules/vmware/vmxnet3/Makefile	(revision 357041)
+++ head/sys/modules/vmware/vmxnet3/Makefile	(revision 357042)
@@ -1,33 +1,33 @@
 #
 # $FreeBSD$
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 
 .PATH: ${SRCTOP}/sys/dev/vmware/vmxnet3
 
 KMOD=	if_vmx
 SRCS=	if_vmx.c
 SRCS+=	bus_if.h device_if.h pci_if.h ifdi_if.h
-SRCS+=	opt_inet.h opt_inet6.h
+SRCS+=	opt_inet.h opt_inet6.h opt_rss.h
 
 .include <bsd.kmod.mk>