Index: sys/dev/bnxt/if_bnxt.c
===================================================================
--- sys/dev/bnxt/if_bnxt.c
+++ sys/dev/bnxt/if_bnxt.c
@@ -1640,7 +1640,8 @@
 	}
 
 	for (i=0; i<softc->scctx->isc_ntxqsets; i++)
-		iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, NULL, i,
+		/* TODO: Benchmark and see if tying to the RX irqs helps */
+		iflib_softirq_alloc_generic(ctx, -1, IFLIB_INTR_TX, NULL, i,
 		    "tx_cp");
 
 	return rc;
Index: sys/dev/e1000/e1000_80003es2lan.c
===================================================================
--- sys/dev/e1000/e1000_80003es2lan.c
+++ sys/dev/e1000/e1000_80003es2lan.c
@@ -59,7 +59,6 @@
 static s32  e1000_init_hw_80003es2lan(struct e1000_hw *hw);
 static s32  e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw);
 static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw);
-static s32  e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
 static s32  e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex);
 static s32  e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw);
 static s32  e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw);
@@ -68,7 +67,6 @@
 static s32  e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
 					     u16 data);
 static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw);
-static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
 static s32  e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw);
 static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw);
 
@@ -299,7 +297,7 @@
 	DEBUGFUNC("e1000_acquire_phy_80003es2lan");
 
 	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
-	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
+	return e1000_acquire_swfw_sync(hw, mask);
 }
 
 /**
@@ -315,7 +313,7 @@
 	DEBUGFUNC("e1000_release_phy_80003es2lan");
 
 	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
-	e1000_release_swfw_sync_80003es2lan(hw, mask);
+	e1000_release_swfw_sync(hw, mask);
 }
 
 /**
@@ -333,7 +331,7 @@
 
 	mask = E1000_SWFW_CSR_SM;
 
-	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
+	return e1000_acquire_swfw_sync(hw, mask);
 }
 
 /**
@@ -350,7 +348,7 @@
 
 	mask = E1000_SWFW_CSR_SM;
 
-	e1000_release_swfw_sync_80003es2lan(hw, mask);
+	e1000_release_swfw_sync(hw, mask);
 }
 
 /**
@@ -365,14 +363,14 @@
 
 	DEBUGFUNC("e1000_acquire_nvm_80003es2lan");
 
-	ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
+	ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
 	if (ret_val)
 		return ret_val;
 
 	ret_val = e1000_acquire_nvm_generic(hw);
 
 	if (ret_val)
-		e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
+		e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
 
 	return ret_val;
 }
@@ -388,78 +386,7 @@
 	DEBUGFUNC("e1000_release_nvm_80003es2lan");
 
 	e1000_release_nvm_generic(hw);
-	e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
-}
-
-/**
- *  e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
- *  will also specify which port we're acquiring the lock for.
- **/
-static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-	u32 fwmask = mask << 16;
-	s32 i = 0;
-	s32 timeout = 50;
-
-	DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan");
-
-	while (i < timeout) {
-		if (e1000_get_hw_semaphore_generic(hw))
-			return -E1000_ERR_SWFW_SYNC;
-
-		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-		if (!(swfw_sync & (fwmask | swmask)))
-			break;
-
-		/* Firmware currently using resource (fwmask)
-		 * or other software thread using resource (swmask)
-		 */
-		e1000_put_hw_semaphore_generic(hw);
-		msec_delay_irq(5);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
-		return -E1000_ERR_SWFW_SYNC;
-	}
-
-	swfw_sync |= swmask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
- *  will also specify which port we're releasing the lock for.
- **/
-static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-
-	DEBUGFUNC("e1000_release_swfw_sync_80003es2lan");
-
-	while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS)
-		; /* Empty */
-
-	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-	swfw_sync &= ~mask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
+	e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
 }
 
 /**
Index: sys/dev/e1000/e1000_82571.c
===================================================================
--- sys/dev/e1000/e1000_82571.c
+++ sys/dev/e1000/e1000_82571.c
@@ -70,11 +70,8 @@
 static s32  e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw);
 static s32  e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data);
 static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw);
-static s32  e1000_get_hw_semaphore_82571(struct e1000_hw *hw);
 static s32  e1000_fix_nvm_checksum_82571(struct e1000_hw *hw);
 static s32  e1000_get_phy_id_82571(struct e1000_hw *hw);
-static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw);
-static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw);
 static s32  e1000_get_hw_semaphore_82574(struct e1000_hw *hw);
 static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw);
 static s32  e1000_set_d0_lplu_state_82574(struct e1000_hw *hw,
@@ -125,8 +122,8 @@
 		phy->ops.get_cable_length = e1000_get_cable_length_igp_2;
 		phy->ops.read_reg	= e1000_read_phy_reg_igp;
 		phy->ops.write_reg	= e1000_write_phy_reg_igp;
-		phy->ops.acquire	= e1000_get_hw_semaphore_82571;
-		phy->ops.release	= e1000_put_hw_semaphore_82571;
+		phy->ops.acquire	= e1000_get_hw_semaphore;
+		phy->ops.release	= e1000_put_hw_semaphore;
 		break;
 	case e1000_82573:
 		phy->type		= e1000_phy_m88;
@@ -138,12 +135,11 @@
 		phy->ops.get_cable_length = e1000_get_cable_length_m88;
 		phy->ops.read_reg	= e1000_read_phy_reg_m88;
 		phy->ops.write_reg	= e1000_write_phy_reg_m88;
-		phy->ops.acquire	= e1000_get_hw_semaphore_82571;
-		phy->ops.release	= e1000_put_hw_semaphore_82571;
+		phy->ops.acquire	= e1000_get_hw_semaphore;
+		phy->ops.release	= e1000_put_hw_semaphore;
 		break;
 	case e1000_82574:
 	case e1000_82583:
-		E1000_MUTEX_INIT(&hw->dev_spec._82571.swflag_mutex);
 
 		phy->type		= e1000_phy_bm;
 		phy->ops.get_cfg_done	= e1000_get_cfg_done_generic;
@@ -506,99 +502,21 @@
 }
 
 /**
- *  e1000_get_hw_semaphore_82571 - Acquire hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Acquire the HW semaphore to access the PHY or NVM
- **/
-static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw)
-{
-	u32 swsm;
-	s32 sw_timeout = hw->nvm.word_size + 1;
-	s32 fw_timeout = hw->nvm.word_size + 1;
-	s32 i = 0;
-
-	DEBUGFUNC("e1000_get_hw_semaphore_82571");
-
-	/* If we have timedout 3 times on trying to acquire
-	 * the inter-port SMBI semaphore, there is old code
-	 * operating on the other port, and it is not
-	 * releasing SMBI. Modify the number of times that
-	 * we try for the semaphore to interwork with this
-	 * older code.
-	 */
-	if (hw->dev_spec._82571.smb_counter > 2)
-		sw_timeout = 1;
-
-	/* Get the SW semaphore */
-	while (i < sw_timeout) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		if (!(swsm & E1000_SWSM_SMBI))
-			break;
-
-		usec_delay(50);
-		i++;
-	}
-
-	if (i == sw_timeout) {
-		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
-		hw->dev_spec._82571.smb_counter++;
-	}
-	/* Get the FW semaphore. */
-	for (i = 0; i < fw_timeout; i++) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
-
-		/* Semaphore acquired if bit latched */
-		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
-			break;
-
-		usec_delay(50);
-	}
-
-	if (i == fw_timeout) {
-		/* Release semaphores */
-		e1000_put_hw_semaphore_82571(hw);
-		DEBUGOUT("Driver can't access the NVM\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_put_hw_semaphore_82571 - Release hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Release hardware semaphore used to access the PHY or NVM
- **/
-static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw)
-{
-	u32 swsm;
-
-	DEBUGFUNC("e1000_put_hw_semaphore_generic");
-
-	swsm = E1000_READ_REG(hw, E1000_SWSM);
-
-	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
-
-	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
-}
-
-/**
- *  e1000_get_hw_semaphore_82573 - Acquire hardware semaphore
+ *  e1000_get_hw_semaphore_82574 - Acquire hardware semaphore
  *  @hw: pointer to the HW structure
  *
  *  Acquire the HW semaphore during reset.
  *
  **/
-static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw)
+static s32
+e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
 {
 	u32 extcnf_ctrl;
 	s32 i = 0;
-
+	/* XXX assert that mutex is held */
 	DEBUGFUNC("e1000_get_hw_semaphore_82573");
 
+	ASSERT_CTX_LOCK_HELD(hw);
 	extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
 	do {
 		extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
@@ -614,7 +532,7 @@
 
 	if (i == MDIO_OWNERSHIP_TIMEOUT) {
 		/* Release semaphores */
-		e1000_put_hw_semaphore_82573(hw);
+		e1000_put_hw_semaphore_82574(hw);
 		DEBUGOUT("Driver can't access the PHY\n");
 		return -E1000_ERR_PHY;
 	}
@@ -623,17 +541,18 @@
 }
 
 /**
- *  e1000_put_hw_semaphore_82573 - Release hardware semaphore
+ *  e1000_put_hw_semaphore_82574 - Release hardware semaphore
  *  @hw: pointer to the HW structure
  *
  *  Release hardware semaphore used during reset.
  *
  **/
-static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw)
+static void
+e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
 {
 	u32 extcnf_ctrl;
 
-	DEBUGFUNC("e1000_put_hw_semaphore_82573");
+	DEBUGFUNC("e1000_put_hw_semaphore_82574");
 
 	extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
 	extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
@@ -641,41 +560,6 @@
 }
 
 /**
- *  e1000_get_hw_semaphore_82574 - Acquire hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Acquire the HW semaphore to access the PHY or NVM.
- *
- **/
-static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_get_hw_semaphore_82574");
-
-	E1000_MUTEX_LOCK(&hw->dev_spec._82571.swflag_mutex);
-	ret_val = e1000_get_hw_semaphore_82573(hw);
-	if (ret_val)
-		E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex);
-	return ret_val;
-}
-
-/**
- *  e1000_put_hw_semaphore_82574 - Release hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Release hardware semaphore used to access the PHY or NVM
- *
- **/
-static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_put_hw_semaphore_82574");
-
-	e1000_put_hw_semaphore_82573(hw);
-	E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex);
-}
-
-/**
  *  e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state
  *  @hw: pointer to the HW structure
  *  @active: TRUE to enable LPLU, FALSE to disable
@@ -746,7 +630,7 @@
 
 	DEBUGFUNC("e1000_acquire_nvm_82571");
 
-	ret_val = e1000_get_hw_semaphore_82571(hw);
+	ret_val = e1000_get_hw_semaphore(hw);
 	if (ret_val)
 		return ret_val;
 
@@ -759,7 +643,7 @@
 	}
 
 	if (ret_val)
-		e1000_put_hw_semaphore_82571(hw);
+		e1000_put_hw_semaphore(hw);
 
 	return ret_val;
 }
@@ -775,7 +659,7 @@
 	DEBUGFUNC("e1000_release_nvm_82571");
 
 	e1000_release_nvm_generic(hw);
-	e1000_put_hw_semaphore_82571(hw);
+	e1000_put_hw_semaphore(hw);
 }
 
 /**
@@ -1092,8 +976,6 @@
 	 */
 	switch (hw->mac.type) {
 	case e1000_82573:
-		ret_val = e1000_get_hw_semaphore_82573(hw);
-		break;
 	case e1000_82574:
 	case e1000_82583:
 		ret_val = e1000_get_hw_semaphore_82574(hw);
@@ -1110,10 +992,6 @@
 	/* Must release MDIO ownership and mutex after MAC reset. */
 	switch (hw->mac.type) {
 	case e1000_82573:
-		/* Release mutex only if the hw semaphore is acquired */
-		if (!ret_val)
-			e1000_put_hw_semaphore_82573(hw);
-		break;
 	case e1000_82574:
 	case e1000_82583:
 		/* Release mutex only if the hw semaphore is acquired */
@@ -1121,6 +999,7 @@
 			e1000_put_hw_semaphore_82574(hw);
 		break;
 	default:
+		panic("unknown mac type %x\n", hw->mac.type);
 		break;
 	}
 
Index: sys/dev/e1000/e1000_82575.c
===================================================================
--- sys/dev/e1000/e1000_82575.c
+++ sys/dev/e1000/e1000_82575.c
@@ -79,11 +79,9 @@
 static s32  e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw,
 					    u32 offset, u16 data);
 static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw);
-static s32  e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
 static s32  e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
 						 u16 *speed, u16 *duplex);
 static s32  e1000_get_phy_id_82575(struct e1000_hw *hw);
-static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
 static bool e1000_sgmii_active_82575(struct e1000_hw *hw);
 static s32  e1000_reset_init_script_82575(struct e1000_hw *hw);
 static s32  e1000_read_mac_addr_82575(struct e1000_hw *hw);
@@ -511,12 +509,8 @@
 	/* link info */
 	mac->ops.get_link_up_info = e1000_get_link_up_info_82575;
 	/* acquire SW_FW sync */
-	mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575;
-	mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575;
-	if (mac->type >= e1000_i210) {
-		mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210;
-		mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210;
-	}
+	mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync;
+	mac->ops.release_swfw_sync = e1000_release_swfw_sync;
 
 	/* set lan id for port to determine which phy lock to use */
 	hw->mac.ops.set_lan_id(hw);
@@ -988,7 +982,7 @@
 
 	DEBUGFUNC("e1000_acquire_nvm_82575");
 
-	ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+	ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
 	if (ret_val)
 		goto out;
 
@@ -1019,7 +1013,7 @@
 
 	ret_val = e1000_acquire_nvm_generic(hw);
 	if (ret_val)
-		e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+		e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
 
 out:
 	return ret_val;
@@ -1038,83 +1032,7 @@
 
 	e1000_release_nvm_generic(hw);
 
-	e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
-}
-
-/**
- *  e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
- *  will also specify which port we're acquiring the lock for.
- **/
-static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-	u32 fwmask = mask << 16;
-	s32 ret_val = E1000_SUCCESS;
-	s32 i = 0, timeout = 200;
-
-	DEBUGFUNC("e1000_acquire_swfw_sync_82575");
-
-	while (i < timeout) {
-		if (e1000_get_hw_semaphore_generic(hw)) {
-			ret_val = -E1000_ERR_SWFW_SYNC;
-			goto out;
-		}
-
-		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-		if (!(swfw_sync & (fwmask | swmask)))
-			break;
-
-		/*
-		 * Firmware currently using resource (fwmask)
-		 * or other software thread using resource (swmask)
-		 */
-		e1000_put_hw_semaphore_generic(hw);
-		msec_delay_irq(5);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
-		ret_val = -E1000_ERR_SWFW_SYNC;
-		goto out;
-	}
-
-	swfw_sync |= swmask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_release_swfw_sync_82575 - Release SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
- *  will also specify which port we're releasing the lock for.
- **/
-static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-
-	DEBUGFUNC("e1000_release_swfw_sync_82575");
-
-	while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS)
-		; /* Empty */
-
-	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-	swfw_sync &= ~mask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
+	e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
 }
 
 /**
Index: sys/dev/e1000/e1000_hw.h
===================================================================
--- sys/dev/e1000/e1000_hw.h
+++ sys/dev/e1000/e1000_hw.h
@@ -934,7 +934,6 @@
 struct e1000_dev_spec_82571 {
 	bool laa_is_present;
 	u32 smb_counter;
-	E1000_MUTEX swflag_mutex;
 };
 
 struct e1000_dev_spec_80003es2lan {
@@ -958,8 +957,6 @@
 struct e1000_dev_spec_ich8lan {
 	bool kmrn_lock_loss_workaround_enabled;
 	struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS];
-	E1000_MUTEX nvm_mutex;
-	E1000_MUTEX swflag_mutex;
 	bool nvm_k1_enabled;
 	bool disable_k1_off;
 	bool eee_disable;
Index: sys/dev/e1000/e1000_i210.h
===================================================================
--- sys/dev/e1000/e1000_i210.h
+++ sys/dev/e1000/e1000_i210.h
@@ -43,8 +43,6 @@
 			      u16 words, u16 *data);
 s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset,
 			     u16 words, u16 *data);
-s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
-void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
 s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
 			 u16 *data);
 s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
Index: sys/dev/e1000/e1000_i210.c
===================================================================
--- sys/dev/e1000/e1000_i210.c
+++ sys/dev/e1000/e1000_i210.c
@@ -37,7 +37,6 @@
 
 static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw);
 static void e1000_release_nvm_i210(struct e1000_hw *hw);
-static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw);
 static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
 				u16 *data);
 static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw);
@@ -58,7 +57,7 @@
 
 	DEBUGFUNC("e1000_acquire_nvm_i210");
 
-	ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+	ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
 
 	return ret_val;
 }
@@ -74,152 +73,7 @@
 {
 	DEBUGFUNC("e1000_release_nvm_i210");
 
-	e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
-}
-
-/**
- *  e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
- *  will also specify which port we're acquiring the lock for.
- **/
-s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-	u32 fwmask = mask << 16;
-	s32 ret_val = E1000_SUCCESS;
-	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
-
-	DEBUGFUNC("e1000_acquire_swfw_sync_i210");
-
-	while (i < timeout) {
-		if (e1000_get_hw_semaphore_i210(hw)) {
-			ret_val = -E1000_ERR_SWFW_SYNC;
-			goto out;
-		}
-
-		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-		if (!(swfw_sync & (fwmask | swmask)))
-			break;
-
-		/*
-		 * Firmware currently using resource (fwmask)
-		 * or other software thread using resource (swmask)
-		 */
-		e1000_put_hw_semaphore_generic(hw);
-		msec_delay_irq(5);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
-		ret_val = -E1000_ERR_SWFW_SYNC;
-		goto out;
-	}
-
-	swfw_sync |= swmask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_release_swfw_sync_i210 - Release SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
- *  will also specify which port we're releasing the lock for.
- **/
-void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-
-	DEBUGFUNC("e1000_release_swfw_sync_i210");
-
-	while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS)
-		; /* Empty */
-
-	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-	swfw_sync &= ~mask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-}
-
-/**
- *  e1000_get_hw_semaphore_i210 - Acquire hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Acquire the HW semaphore to access the PHY or NVM
- **/
-static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw)
-{
-	u32 swsm;
-	s32 timeout = hw->nvm.word_size + 1;
-	s32 i = 0;
-
-	DEBUGFUNC("e1000_get_hw_semaphore_i210");
-
-	/* Get the SW semaphore */
-	while (i < timeout) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		if (!(swsm & E1000_SWSM_SMBI))
-			break;
-
-		usec_delay(50);
-		i++;
-	}
-
-	if (i == timeout) {
-		/* In rare circumstances, the SW semaphore may already be held
-		 * unintentionally. Clear the semaphore once before giving up.
-		 */
-		if (hw->dev_spec._82575.clear_semaphore_once) {
-			hw->dev_spec._82575.clear_semaphore_once = FALSE;
-			e1000_put_hw_semaphore_generic(hw);
-			for (i = 0; i < timeout; i++) {
-				swsm = E1000_READ_REG(hw, E1000_SWSM);
-				if (!(swsm & E1000_SWSM_SMBI))
-					break;
-
-				usec_delay(50);
-			}
-		}
-
-		/* If we do not have the semaphore here, we have to give up. */
-		if (i == timeout) {
-			DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
-			return -E1000_ERR_NVM;
-		}
-	}
-
-	/* Get the FW semaphore. */
-	for (i = 0; i < timeout; i++) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
-
-		/* Semaphore acquired if bit latched */
-		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
-			break;
-
-		usec_delay(50);
-	}
-
-	if (i == timeout) {
-		/* Release semaphores */
-		e1000_put_hw_semaphore_generic(hw);
-		DEBUGOUT("Driver can't access the NVM\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
+	e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
 }
 
 /**
Index: sys/dev/e1000/e1000_ich8lan.c
===================================================================
--- sys/dev/e1000/e1000_ich8lan.c
+++ sys/dev/e1000/e1000_ich8lan.c
@@ -694,9 +694,6 @@
 		dev_spec->shadow_ram[i].value    = 0xFFFF;
 	}
 
-	E1000_MUTEX_INIT(&dev_spec->nvm_mutex);
-	E1000_MUTEX_INIT(&dev_spec->swflag_mutex);
-
 	/* Function Pointers */
 	nvm->ops.acquire	= e1000_acquire_nvm_ich8lan;
 	nvm->ops.release	= e1000_release_nvm_ich8lan;
@@ -1847,7 +1844,7 @@
 {
 	DEBUGFUNC("e1000_acquire_nvm_ich8lan");
 
-	E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.nvm_mutex);
+	ASSERT_CTX_LOCK_HELD(hw);
 
 	return E1000_SUCCESS;
 }
@@ -1862,9 +1859,7 @@
 {
 	DEBUGFUNC("e1000_release_nvm_ich8lan");
 
-	E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.nvm_mutex);
-
-	return;
+	ASSERT_CTX_LOCK_HELD(hw);
 }
 
 /**
@@ -1881,7 +1876,7 @@
 
 	DEBUGFUNC("e1000_acquire_swflag_ich8lan");
 
-	E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.swflag_mutex);
+	ASSERT_CTX_LOCK_HELD(hw);
 
 	while (timeout) {
 		extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
@@ -1922,9 +1917,6 @@
 	}
 
 out:
-	if (ret_val)
-		E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex);
-
 	return ret_val;
 }
 
@@ -1949,10 +1941,6 @@
 	} else {
 		DEBUGOUT("Semaphore unexpectedly released by sw/fw/hw\n");
 	}
-
-	E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex);
-
-	return;
 }
 
 /**
@@ -5022,8 +5010,6 @@
 		E1000_WRITE_REG(hw, E1000_FEXTNVM3, reg);
 	}
 
-	if (!ret_val)
-		E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex);
 
 	if (ctrl & E1000_CTRL_PHY_RST) {
 		ret_val = hw->phy.ops.get_cfg_done(hw);
Index: sys/dev/e1000/e1000_mac.h
===================================================================
--- sys/dev/e1000/e1000_mac.h
+++ sys/dev/e1000/e1000_mac.h
@@ -60,7 +60,6 @@
 s32  e1000_get_bus_info_pcie_generic(struct e1000_hw *hw);
 void e1000_set_lan_id_single_port(struct e1000_hw *hw);
 void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw);
-s32  e1000_get_hw_semaphore_generic(struct e1000_hw *hw);
 s32  e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
 					       u16 *duplex);
 s32  e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw,
@@ -85,11 +84,15 @@
 void e1000_clear_vfta_generic(struct e1000_hw *hw);
 void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count);
 void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw);
-void e1000_put_hw_semaphore_generic(struct e1000_hw *hw);
 s32  e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
 void e1000_reset_adaptive_generic(struct e1000_hw *hw);
 void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop);
 void e1000_update_adaptive_generic(struct e1000_hw *hw);
 void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
 
+s32  e1000_get_hw_semaphore(struct e1000_hw *hw);
+void e1000_put_hw_semaphore(struct e1000_hw *hw);
+s32 e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask);
+void e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask);
+
 #endif
Index: sys/dev/e1000/e1000_mac.c
===================================================================
--- sys/dev/e1000/e1000_mac.c
+++ sys/dev/e1000/e1000_mac.c
@@ -1707,76 +1707,6 @@
 }
 
 /**
- *  e1000_get_hw_semaphore_generic - Acquire hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Acquire the HW semaphore to access the PHY or NVM
- **/
-s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw)
-{
-	u32 swsm;
-	s32 timeout = hw->nvm.word_size + 1;
-	s32 i = 0;
-
-	DEBUGFUNC("e1000_get_hw_semaphore_generic");
-
-	/* Get the SW semaphore */
-	while (i < timeout) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		if (!(swsm & E1000_SWSM_SMBI))
-			break;
-
-		usec_delay(50);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
-		return -E1000_ERR_NVM;
-	}
-
-	/* Get the FW semaphore. */
-	for (i = 0; i < timeout; i++) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
-
-		/* Semaphore acquired if bit latched */
-		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
-			break;
-
-		usec_delay(50);
-	}
-
-	if (i == timeout) {
-		/* Release semaphores */
-		e1000_put_hw_semaphore_generic(hw);
-		DEBUGOUT("Driver can't access the NVM\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_put_hw_semaphore_generic - Release hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Release hardware semaphore used to access the PHY or NVM
- **/
-void e1000_put_hw_semaphore_generic(struct e1000_hw *hw)
-{
-	u32 swsm;
-
-	DEBUGFUNC("e1000_put_hw_semaphore_generic");
-
-	swsm = E1000_READ_REG(hw, E1000_SWSM);
-
-	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
-
-	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
-}
-
-/**
  *  e1000_get_auto_rd_done_generic - Check for auto read completion
  *  @hw: pointer to the HW structure
  *
@@ -2251,3 +2181,186 @@
 
 	return E1000_SUCCESS;
 }
+
+/**
+ *  e1000_get_hw_semaphore - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+s32 e1000_get_hw_semaphore(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+	
+	DEBUGFUNC("e1000_get_hw_semaphore");
+#ifdef notyet
+	/* _82571 */
+	/* If we have timedout 3 times on trying to acquire
+	 * the inter-port SMBI semaphore, there is old code
+	 * operating on the other port, and it is not
+	 * releasing SMBI. Modify the number of times that
+	 * we try for the semaphore to interwork with this
+	 * older code.
+	 */
+	if (hw->dev_spec._82571.smb_counter > 2)
+		sw_timeout = 1;
+
+#endif
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		usec_delay(50);
+		i++;
+	}
+
+	if (i == timeout) {
+#ifdef notyet
+		/*
+		 * XXX This sounds more like a driver bug whereby we either
+		 * recursed accidentally or missed clearing it previously
+		 */
+		/* In rare circumstances, the SW semaphore may already be held
+		 * unintentionally. Clear the semaphore once before giving up.
+		 */
+               if (hw->dev_spec._82575.clear_semaphore_once) {
+                       hw->dev_spec._82575.clear_semaphore_once = FALSE;
+                       e1000_put_hw_semaphore_generic(hw);
+                       for (i = 0; i < timeout; i++) {
+                               swsm = E1000_READ_REG(hw, E1000_SWSM);
+                               if (!(swsm & E1000_SWSM_SMBI))
+                                       break;
+
+                               usec_delay(50);
+                       }
+               }
+#endif
+
+		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+		return -E1000_ERR_NVM;
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		usec_delay(50);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore(hw);
+		DEBUGOUT("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_put_hw_semaphore - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ **/
+void e1000_put_hw_semaphore(struct e1000_hw *hw)
+{
+	u32 swsm;
+
+	DEBUGFUNC("e1000_put_hw_semaphore");
+
+	swsm = E1000_READ_REG(hw, E1000_SWSM);
+
+	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+
+	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
+}
+
+
+/**
+ *  e1000_acquire_swfw_sync - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+s32
+e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 ret_val = E1000_SUCCESS;
+	s32 i = 0, timeout = 200;
+
+	DEBUGFUNC("e1000_acquire_swfw_sync");
+	ASSERT_NO_LOCKS();
+	while (i < timeout) {
+		if (e1000_get_hw_semaphore(hw)) {
+			ret_val = -E1000_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/*
+		 * Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		e1000_put_hw_semaphore(hw);
+		msec_delay_irq(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -E1000_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_release_swfw_sync - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+void
+e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	DEBUGFUNC("e1000_release_swfw_sync");
+
+	while (e1000_get_hw_semaphore(hw) != E1000_SUCCESS)
+		; /* Empty */
+
+	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore(hw);
+}
+
Index: sys/dev/e1000/e1000_osdep.h
===================================================================
--- sys/dev/e1000/e1000_osdep.h
+++ sys/dev/e1000/e1000_osdep.h
@@ -39,6 +39,7 @@
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/mbuf.h>
@@ -47,6 +48,14 @@
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/iflib.h>
+
+
+
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
@@ -58,11 +67,41 @@
 
 
 #define ASSERT(x) if(!(x)) panic("EM: x")
+#define us_scale(x)  max(1, (x/(1000000/hz)))
+static inline int
+ms_scale(int x) {
+	if (hz == 1000) {
+		return (x);
+	} else if (hz > 1000) {
+		return (x*(hz/1000));
+	} else {
+		return (max(1, x/(1000/hz)));
+	}
+}
+extern int cold;
+
+static inline void
+safe_pause_us(int x) {
+	if (cold) {
+		DELAY(x);
+	} else {
+		pause("e1000_delay", max(1,  x/(1000000/hz)));
+	}
+}
+
+static inline void
+safe_pause_ms(int x) {
+	if (cold) {
+		DELAY(x*1000);
+	} else {
+		pause("e1000_delay", ms_scale(x));
+	}
+}
 
-#define usec_delay(x) DELAY(x)
+#define usec_delay(x) safe_pause_us(x)
 #define usec_delay_irq(x) usec_delay(x)
-#define msec_delay(x) DELAY(1000*(x))
-#define msec_delay_irq(x) DELAY(1000*(x))
+#define msec_delay(x) safe_pause_ms(x)
+#define msec_delay_irq(x) msec_delay(x)
 
 /* Enable/disable debugging statements in shared code */
 #define DBG		0
@@ -81,16 +120,6 @@
 #define CMD_MEM_WRT_INVALIDATE	0x0010  /* BIT_4 */
 #define PCI_COMMAND_REGISTER	PCIR_COMMAND
 
-/* Mutex used in the shared code */
-#define E1000_MUTEX                     struct mtx
-#define E1000_MUTEX_INIT(mutex)         mtx_init((mutex), #mutex, \
-                                            MTX_NETWORK_LOCK, \
-					    MTX_DEF | MTX_DUPOK)
-#define E1000_MUTEX_DESTROY(mutex)      mtx_destroy(mutex)
-#define E1000_MUTEX_LOCK(mutex)         mtx_lock(mutex)
-#define E1000_MUTEX_TRYLOCK(mutex)      mtx_trylock(mutex)
-#define E1000_MUTEX_UNLOCK(mutex)       mtx_unlock(mutex)
-
 typedef uint64_t	u64;
 typedef uint32_t	u32;
 typedef uint16_t	u16;
@@ -116,6 +145,12 @@
 #endif
 #endif /*__FreeBSD_version < 800000 */
 
+#ifdef INVARIANTS
+#define ASSERT_CTX_LOCK_HELD(hw) (sx_assert(iflib_ctx_lock_get(((struct e1000_osdep *)hw->back)->ctx), SX_XLOCKED))
+#else
+#define ASSERT_CTX_LOCK_HELD(hw)
+#endif
+
 #if defined(__i386__) || defined(__amd64__)
 static __inline
 void prefetch(void *x)
@@ -135,6 +170,7 @@
 	bus_space_tag_t    flash_bus_space_tag;
 	bus_space_handle_t flash_bus_space_handle;
 	device_t	   dev;
+	if_ctx_t	   ctx;
 };
 
 #define E1000_REGISTER(hw, reg) (((hw)->mac.type >= e1000_82543) \
@@ -216,5 +252,22 @@
     bus_space_write_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \
         ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value)
 
+
+#if defined(INVARIANTS)
+#include <sys/proc.h>
+
+#define ASSERT_NO_LOCKS()				\
+	do {						\
+	     int unknown_locks = curthread->td_locks - mtx_owned(&Giant);	\
+	     if (unknown_locks > 0) {					\
+		     WITNESS_WARN(WARN_GIANTOK|WARN_SLEEPOK|WARN_PANIC, NULL, "unexpected non-sleepable lock"); \
+	     }								\
+	     MPASS(curthread->td_rw_rlocks == 0);			\
+	     MPASS(curthread->td_lk_slocks == 0);			\
+	} while (0)
+#else
+#define ASSERT_NO_LOCKS()
+#endif
+
 #endif  /* _FREEBSD_OS_H_ */
 
Index: sys/dev/e1000/em_txrx.c
===================================================================
--- sys/dev/e1000/em_txrx.c
+++ sys/dev/e1000/em_txrx.c
@@ -66,6 +66,7 @@
 static int em_determine_rsstype(u32 pkt_info);
 extern int em_intr(void *arg);
 
+
 struct if_txrx em_txrx = {
 	em_isc_txd_encap,
 	em_isc_txd_flush,
@@ -74,7 +75,7 @@
 	em_isc_rxd_pkt_get,
 	em_isc_rxd_refill,
 	em_isc_rxd_flush,
-	em_intr
+	em_intr,
 };
 
 struct if_txrx lem_txrx = {
@@ -85,7 +86,7 @@
 	lem_isc_rxd_pkt_get,
 	lem_isc_rxd_refill,
 	em_isc_rxd_flush,
-	em_intr
+	em_intr,
 };
 
 extern if_shared_ctx_t em_sctx;
@@ -523,8 +524,8 @@
 	for (i = 0, next_pidx = pidx; i < count; i++) {
 		rxd = &rxr->rx_base[next_pidx];
 		rxd->read.buffer_addr = htole64(paddrs[i]);
-		/* DD bits must be cleared */
-		rxd->wb.upper.status_error = 0;
+		/* Zero out rx desc status */
+		rxd->wb.upper.status_error &= htole32(~0xFF);
 
 		if (++next_pidx == scctx->isc_nrxd[0])
 			next_pidx = 0;
@@ -551,14 +552,9 @@
 	struct e1000_rx_desc *rxd;
 	u32 staterr = 0;
 	int cnt, i;
+	budget = min(budget, scctx->isc_nrxd[0]);
 
-	if (budget == 1) {
-		rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx];
-		staterr = rxd->status;
-		return (staterr & E1000_RXD_STAT_DD);
-	}
-
-	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
+	for (cnt = 0, i = idx; cnt <= budget;) {
 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
 		staterr = rxd->status;
 
@@ -571,6 +567,7 @@
 		if (staterr & E1000_RXD_STAT_EOP)
 			cnt++;
 	}
+	MPASS(cnt <= scctx->isc_nrxd[0]);
 	return (cnt);
 }
 
@@ -584,14 +581,9 @@
 	union e1000_rx_desc_extended *rxd;
 	u32 staterr = 0;
 	int cnt, i;
+	budget = min(budget, scctx->isc_nrxd[0]);
 
-	if (budget == 1) {
-		rxd = &rxr->rx_base[idx];
-		staterr = le32toh(rxd->wb.upper.status_error);
-		return (staterr & E1000_RXD_STAT_DD);
-	}
-
-	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
+	for (cnt = 0, i = idx; cnt <= budget;) {
 		rxd = &rxr->rx_base[i];
 		staterr = le32toh(rxd->wb.upper.status_error);
 
@@ -606,6 +598,7 @@
 			cnt++;
 
 	}
+	MPASS(cnt <= scctx->isc_nrxd[0]);
 	return (cnt);
 }
 
@@ -694,7 +687,8 @@
 		pkt_info = le32toh(rxd->wb.lower.mrq);
 
 		/* Error Checking then decrement count */
-		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
+		KASSERT(staterr & E1000_RXD_STAT_DD,
+			("cidx=%d i=%d iri_len=%d", cidx, i, ri->iri_len));
 
 		len = le16toh(rxd->wb.upper.length);
 		ri->iri_len += len;
Index: sys/dev/e1000/if_em.h
===================================================================
--- sys/dev/e1000/if_em.h
+++ sys/dev/e1000/if_em.h
@@ -434,6 +434,7 @@
 	u32			eims;		/* This queue's EIMS bit */
 	u32                    me;
 	struct tx_ring         txr;
+	struct if_irq          que_irq;
 };
 
 struct em_rx_queue {
@@ -443,7 +444,7 @@
 	u32                    eims;
 	struct rx_ring         rxr;
 	u64                    irqs;
-	struct if_irq          que_irq; 
+	struct if_irq          que_irq;
 };  
 
 /* Our adapter structure */
Index: sys/dev/e1000/if_em.c
===================================================================
--- sys/dev/e1000/if_em.c
+++ sys/dev/e1000/if_em.c
@@ -483,7 +483,7 @@
 	.isc_vendor_info = em_vendor_info_array,
 	.isc_driver_version = em_driver_version,
 	.isc_driver = &em_if_driver,
-	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP,
+	.isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM,
 
 	.isc_nrxd_min = {EM_MIN_RXD},
 	.isc_ntxd_min = {EM_MIN_TXD},
@@ -511,7 +511,7 @@
 	.isc_vendor_info = igb_vendor_info_array,
 	.isc_driver_version = em_driver_version,
 	.isc_driver = &em_if_driver,
-	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP,
+	.isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM,
 
 	.isc_nrxd_min = {EM_MIN_RXD},
 	.isc_ntxd_min = {EM_MIN_TXD},
@@ -723,7 +723,7 @@
 		return (ENXIO);
 	}
 
-	adapter->ctx = ctx;
+	adapter->ctx = adapter->osdep.ctx = ctx;
 	adapter->dev = adapter->osdep.dev = dev;
 	scctx = adapter->shared = iflib_get_softc_ctx(ctx);
 	adapter->media = iflib_get_media(ctx);
@@ -1405,7 +1405,9 @@
 {
 	struct adapter *adapter = arg;
 	u32 reg_icr;
+	int is_igb;
 
+	is_igb = (adapter->hw.mac.type >= igb_mac_min);
 	++adapter->link_irq;
 	MPASS(adapter->hw.back != NULL);
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
@@ -1413,26 +1415,29 @@
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 
-	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
-		em_handle_link(adapter->ctx);
+	if (is_igb) {
+		if (reg_icr & E1000_ICR_LSC)
+			em_handle_link(adapter->ctx);
+		E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
+		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
 	} else {
+		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+			em_handle_link(adapter->ctx);
+		}
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
-				EM_MSIX_LINK | E1000_IMS_LSC);
-		if (adapter->hw.mac.type >= igb_mac_min)
-			E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
-	}
+					EM_MSIX_LINK | E1000_IMS_LSC);
 
-	/*
-	 * Because we must read the ICR for this interrupt
-	 * it may clear other causes using autoclear, for
-	 * this reason we simply create a soft interrupt
-	 * for all these vectors.
-	 */
-	if (reg_icr && adapter->hw.mac.type < igb_mac_min) {
-		E1000_WRITE_REG(&adapter->hw,
-			E1000_ICS, adapter->ims);
+		/*
+		 * Because we must read the ICR for this interrupt
+		 * it may clear other causes using autoclear, for
+		 * this reason we simply create a soft interrupt
+		 * for all these vectors.
+		 */
+		if (reg_icr) {
+			E1000_WRITE_REG(&adapter->hw,
+					E1000_ICS, adapter->ims);
+		}
 	}
-
 	return (FILTER_HANDLED);
 }
 
@@ -1670,13 +1675,6 @@
 		return;
 
 	iflib_admin_intr_deferred(ctx);
-	/* Reset LAA into RAR[0] on 82571 */
-	if ((adapter->hw.mac.type == e1000_82571) &&
-	    e1000_get_laa_state_82571(&adapter->hw))
-		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
-
-	if (adapter->hw.mac.type < em_mac_min)
-		lem_smartspeed(adapter);
 
 	/* Mask to use in the irq trigger */
 	if (adapter->intr_type == IFLIB_INTR_MSIX) {
@@ -1787,6 +1785,14 @@
 	}
 	em_update_stats_counters(adapter);
 
+	/* Reset LAA into RAR[0] on 82571 */
+	if ((adapter->hw.mac.type == e1000_82571) &&
+	    e1000_get_laa_state_82571(&adapter->hw))
+		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
+
+	if (adapter->hw.mac.type < em_mac_min)
+		lem_smartspeed(adapter);
+
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
 }
 
@@ -1902,6 +1908,87 @@
 	return (0);
 }
 
+static int
+igb_intr_assign(if_ctx_t ctx, int msix)
+{
+        struct adapter *adapter = iflib_get_softc(ctx);
+        struct em_rx_queue *rx_que = adapter->rx_queues;
+        struct em_tx_queue *tx_que = adapter->tx_queues;
+        int error, rid, i, vector = 0, rx_vectors;
+        char buf[16];
+
+        /* First set up ring resources */
+        for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) {
+                rid = vector + 1;
+                snprintf(buf, sizeof(buf), "rxq%d", i);
+                error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX,
+				em_msix_que, rx_que, rx_que->me, buf);
+                if (error) {
+                        device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d\n", i, error);
+                        adapter->rx_num_queues = i;
+                        goto fail;
+                }
+
+                rx_que->msix =  vector;
+
+                /*
+                 * Set the bit to enable interrupt 
+                 * in E1000_IMS -- bits 20 and 21
+		 * are for RX0 and RX1, note this has
+		 * NOTHING to do with the MSIX vector
+                 */
+                if (adapter->hw.mac.type == e1000_82574) {
+                        rx_que->eims = 1 << (20 + i);
+                        adapter->ims |= rx_que->eims;
+                        adapter->ivars |= (8 | rx_que->msix) << (i * 4);
+                } else if (adapter->hw.mac.type == e1000_82575)
+                        rx_que->eims = E1000_EICR_TX_QUEUE0 << vector;
+                else
+                        rx_que->eims = 1 << vector;
+        }
+        rx_vectors = vector;
+
+        vector = 0;
+        for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) {
+                snprintf(buf, sizeof(buf), "txq%d", i);
+                tx_que = &adapter->tx_queues[i];
+		tx_que->msix = adapter->rx_queues[i % adapter->rx_num_queues].msix;
+		rid = rman_get_start(adapter->rx_queues[i % adapter->rx_num_queues].que_irq.ii_res);
+                iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf);
+
+                if (adapter->hw.mac.type == e1000_82574) {
+                        tx_que->eims = 1 << (22 + i);
+                        adapter->ims |= tx_que->eims;
+                        adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4));
+                } else if (adapter->hw.mac.type == e1000_82575) {
+                        tx_que->eims = E1000_EICR_TX_QUEUE0 << (i %  adapter->tx_num_queues);
+                } else {
+                        tx_que->eims = 1 << (i %  adapter->tx_num_queues);
+                }
+        }
+
+        /* Link interrupt */
+        rid = rx_vectors + 1;
+        error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq");
+
+        if (error) {
+                device_printf(iflib_get_dev(ctx), "Failed to register admin handler");
+                goto fail;
+        }
+        adapter->linkvec = rx_vectors;
+        if (adapter->hw.mac.type < igb_mac_min) {
+                adapter->ivars |=  (8 | rx_vectors) << 16;
+                adapter->ivars |= 0x80000000;
+        }
+        return (0);
+fail:
+        iflib_irq_free(ctx, &adapter->irq);
+        rx_que = adapter->rx_queues;
+        for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++)
+                iflib_irq_free(ctx, &rx_que->que_irq);
+        return (error);
+}
+
 /*********************************************************************
  *
  *  Setup the MSIX Interrupt handlers
@@ -1913,14 +2000,18 @@
 	struct adapter *adapter = iflib_get_softc(ctx);
 	struct em_rx_queue *rx_que = adapter->rx_queues;
 	struct em_tx_queue *tx_que = adapter->tx_queues;
-	int error, rid, i, vector = 0, rx_vectors;
+	int error, rid, i, vector = 0;
 	char buf[16];
 
+	if (adapter->hw.mac.type >= igb_mac_min) {
+		return igb_intr_assign(ctx, msix);
+	}
+
 	/* First set up ring resources */
 	for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) {
 		rid = vector + 1;
 		snprintf(buf, sizeof(buf), "rxq%d", i);
-		error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf);
+		error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, em_msix_que, rx_que, rx_que->me, buf);
 		if (error) {
 			device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error);
 			adapter->rx_num_queues = i + 1;
@@ -1944,16 +2035,19 @@
 		else
 			rx_que->eims = 1 << vector;
 	}
-	rx_vectors = vector;
 
-	vector = 0;
 	for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) {
 		rid = vector + 1;
 		snprintf(buf, sizeof(buf), "txq%d", i);
 		tx_que = &adapter->tx_queues[i];
-		iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf);
 
-		tx_que->msix = (vector % adapter->tx_num_queues);
+		error = iflib_irq_alloc_generic(ctx, &tx_que->que_irq, rid, IFLIB_INTR_TX, em_msix_que, tx_que, tx_que->me, buf);
+		if (error) {
+			device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error);
+			adapter->tx_num_queues = i + 1;
+			goto fail;
+		}
+		tx_que->msix = vector;
 
 		/*
 		 * Set the bit to enable interrupt
@@ -1966,23 +2060,24 @@
 			adapter->ims |= tx_que->eims;
 			adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4));
 		} else if (adapter->hw.mac.type == e1000_82575) {
-			tx_que->eims = E1000_EICR_TX_QUEUE0 << (i %  adapter->tx_num_queues);
+			tx_que->eims = E1000_EICR_TX_QUEUE0 << vector;
 		} else {
-			tx_que->eims = 1 << (i %  adapter->tx_num_queues);
+			tx_que->eims = 1 << vector;
 		}
 	}
 
 	/* Link interrupt */
-	rid = rx_vectors + 1;
+	rid = vector + 1;
 	error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq");
 
 	if (error) {
 		device_printf(iflib_get_dev(ctx), "Failed to register admin handler");
 		goto fail;
 	}
-	adapter->linkvec = rx_vectors;
+
+	adapter->linkvec = vector;
 	if (adapter->hw.mac.type < igb_mac_min) {
-		adapter->ivars |=  (8 | rx_vectors) << 16;
+		adapter->ivars |=  (8 | vector) << 16;
 		adapter->ivars |= 0x80000000;
 	}
 	return (0);
@@ -2139,15 +2234,24 @@
 em_free_pci_resources(if_ctx_t ctx)
 {
 	struct adapter *adapter = iflib_get_softc(ctx);
-	struct em_rx_queue *que = adapter->rx_queues;
+	struct em_rx_queue *rxque = adapter->rx_queues;
+	struct em_tx_queue *txque = adapter->tx_queues;
 	device_t dev = iflib_get_dev(ctx);
+	int is_igb;
 
+	is_igb = (adapter->hw.mac.type >= igb_mac_min);
 	/* Release all msix queue resources */
 	if (adapter->intr_type == IFLIB_INTR_MSIX)
 		iflib_irq_free(ctx, &adapter->irq);
 
-	for (int i = 0; i < adapter->rx_num_queues; i++, que++) {
-		iflib_irq_free(ctx, &que->que_irq);
+	for (int i = 0; i < adapter->rx_num_queues; i++, rxque++) {
+		iflib_irq_free(ctx, &rxque->que_irq);
+	}
+
+	if (!is_igb) {
+		for (int i = 0; i < adapter->tx_num_queues; i++, txque++) {
+			iflib_irq_free(ctx, &txque->que_irq);
+		}
 	}
 
 	/* First release all the interrupt resources */
Index: sys/kern/subr_gtaskqueue.c
===================================================================
--- sys/kern/subr_gtaskqueue.c
+++ sys/kern/subr_gtaskqueue.c
@@ -48,17 +48,26 @@
 #include <sys/unistd.h>
 #include <machine/stdarg.h>
 
-static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
+static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
 static void	gtaskqueue_thread_enqueue(void *);
 static void	gtaskqueue_thread_loop(void *arg);
-
-TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
+static int	_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri);
+TASKQGROUP_DEFINE(softirq, mp_ncpus, 1, false, PI_SOFT);
 
 struct gtaskqueue_busy {
 	struct gtask	*tb_running;
 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
 };
 
+struct gt_intr_thread {
+	int	git_flags;		/* (j) IT_* flags. */
+	int	git_need;		/* Needs service. */
+};
+
+/* Interrupt thread flags kept in it_flags */
+#define	IT_DEAD		0x000001	/* Thread is waiting to exit. */
+#define	IT_WAIT		0x000002	/* Thread is waiting for completion. */
+
 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
 
 struct gtaskqueue {
@@ -69,6 +78,7 @@
 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
 	struct mtx		tq_mutex;
 	struct thread		**tq_threads;
+	struct gt_intr_thread *tq_gt_intrs;
 	int			tq_tcount;
 	int			tq_spin;
 	int			tq_flags;
@@ -80,6 +90,7 @@
 #define	TQ_FLAGS_ACTIVE		(1 << 0)
 #define	TQ_FLAGS_BLOCKED	(1 << 1)
 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
+#define	TQ_FLAGS_INTR		(1 << 3)
 
 #define	DT_CALLOUT_ARMED	(1 << 0)
 
@@ -180,6 +191,32 @@
 	free(queue, M_GTASKQUEUE);
 }
 
+static void
+schedule_ithread(struct gtaskqueue *queue)
+{
+	struct proc *p;
+	struct thread *td;
+	struct gt_intr_thread *git;
+
+	MPASS(queue->tq_tcount == 1);
+	td = queue->tq_threads[0];
+	git = &queue->tq_gt_intrs[0];
+	p = td->td_proc;
+
+	atomic_store_rel_int(&git->git_need, 1);
+	thread_lock(td);
+	if (TD_AWAITING_INTR(td)) {
+		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
+		    td->td_name);
+		TD_CLR_IWAIT(td);
+		sched_add(td, SRQ_INTR);
+	} else {
+		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
+		    __func__, p->p_pid, td->td_name, git->git_need, td->td_state);
+	}
+	thread_unlock(td);
+}
+
 int
 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 {
@@ -197,8 +234,13 @@
 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 	gtask->ta_flags |= TASK_ENQUEUED;
 	TQ_UNLOCK(queue);
-	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
-		queue->tq_enqueue(queue->tq_context);
+	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) {
+		if (queue->tq_flags & TQ_FLAGS_INTR) {
+			schedule_ithread(queue);
+		} else {
+			queue->tq_enqueue(queue->tq_context);
+		}
+	}
 	return (0);
 }
 
@@ -403,7 +445,7 @@
 
 static int
 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
-    cpuset_t *mask, const char *name, va_list ap)
+    cpuset_t *mask, bool intr, const char *name, va_list ap)
 {
 	char ktname[MAXCOMLEN + 1];
 	struct thread *td;
@@ -422,6 +464,12 @@
 		printf("%s: no memory for %s threads\n", __func__, ktname);
 		return (ENOMEM);
 	}
+	tq->tq_gt_intrs = malloc(sizeof(struct gt_intr_thread) * count, M_GTASKQUEUE,
+	    M_NOWAIT | M_ZERO);
+	if (tq->tq_gt_intrs == NULL) {
+		printf("%s: no memory for %s intr info\n", __func__, ktname);
+		return (ENOMEM);
+	}
 
 	for (i = 0; i < count; i++) {
 		if (count == 1)
@@ -439,6 +487,9 @@
 		} else
 			tq->tq_tcount++;
 	}
+	if (intr)
+		tq->tq_flags |= TQ_FLAGS_INTR;
+
 	for (i = 0; i < count; i++) {
 		if (tq->tq_threads[i] == NULL)
 			continue;
@@ -458,7 +509,14 @@
 		}
 		thread_lock(td);
 		sched_prio(td, pri);
-		sched_add(td, SRQ_BORING);
+		if (intr) {
+			/* we need to schedule the thread from the interrupt handler for this to work */
+			TD_SET_IWAIT(td);
+			sched_class(td, PRI_ITHD);
+			td->td_pflags |= TDP_ITHREAD;
+		} else {
+			sched_add(td, SRQ_BORING);
+		}
 		thread_unlock(td);
 	}
 
@@ -467,13 +525,13 @@
 
 static int
 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
-    const char *name, ...)
+   bool intr, const char *name, ...)
 {
 	va_list ap;
 	int error;
 
 	va_start(ap, name);
-	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
+	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, intr, name, ap);
 	va_end(ap);
 	return (error);
 }
@@ -491,16 +549,58 @@
 }
 
 static void
-gtaskqueue_thread_loop(void *arg)
+intr_thread_loop(struct gtaskqueue *tq)
 {
-	struct gtaskqueue **tqp, *tq;
+	struct gt_intr_thread *git;
+	struct thread *td;
 
-	tqp = arg;
-	tq = *tqp;
-	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
-	TQ_LOCK(tq);
+	git = &tq->tq_gt_intrs[0];
+	td = tq->tq_threads[0];
+	MPASS(tq->tq_tcount == 1);
+
+	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
+		THREAD_NO_SLEEPING();
+		while (atomic_cmpset_acq_int(&git->git_need, 1, 0) != 0) {
+			gtaskqueue_run_locked(tq);
+		}
+		THREAD_SLEEPING_OK();
+
+		/*
+		 * Because taskqueue_run() can drop tq_mutex, we need to
+		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
+		 * meantime, which means we missed a wakeup.
+		 */
+		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
+			break;
+
+		TQ_UNLOCK(tq);
+		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
+		mtx_assert(&Giant, MA_NOTOWNED);
+		thread_lock(td);
+		if (atomic_load_acq_int(&git->git_need) == 0 &&
+		    (git->git_flags & (IT_DEAD | IT_WAIT)) == 0) {
+			TD_SET_IWAIT(td);
+			mi_switch(SW_VOL | SWT_IWAIT, NULL);
+		}
+#if 0
+		/* XXX is this something we want? */
+		if (git->git_flags & IT_WAIT) {
+			wake = 1;
+			git->git_flags &= ~IT_WAIT;
+		}
+#endif
+		thread_unlock(td);
+		TQ_LOCK(tq);
+	}
+	THREAD_NO_SLEEPING();
+	gtaskqueue_run_locked(tq);
+	THREAD_SLEEPING_OK();
+}
+
+static void
+timeshare_thread_loop(struct gtaskqueue *tq)
+{
 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
-		/* XXX ? */
 		gtaskqueue_run_locked(tq);
 		/*
 		 * Because taskqueue_run() can drop tq_mutex, we need to
@@ -512,6 +612,23 @@
 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 	}
 	gtaskqueue_run_locked(tq);
+}
+
+static void
+gtaskqueue_thread_loop(void *arg)
+{
+	struct gtaskqueue **tqp, *tq;
+
+	tqp = arg;
+	tq = *tqp;
+	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
+	TQ_LOCK(tq);
+	if (curthread->td_pflags & TDP_ITHREAD) {
+		intr_thread_loop(tq);
+	} else {
+		timeshare_thread_loop(tq);
+	}
+
 	/*
 	 * This thread is on its way out, so just drop the lock temporarily
 	 * in order to call the shutdown callback.  This allows the callback
@@ -558,11 +675,17 @@
 struct taskqgroup {
 	struct taskqgroup_cpu tqg_queue[MAXCPU];
 	struct mtx	tqg_lock;
+	void (*adjust_func)(void*);
 	char *		tqg_name;
 	int		tqg_adjusting;
 	int		tqg_stride;
 	int		tqg_cnt;
+	int		tqg_pri;
+	int		tqg_flags;
+	bool		tqg_intr;
 };
+#define TQG_NEED_ADJUST	0x1
+#define TQG_ADJUSTED		0x2
 
 struct taskq_bind_task {
 	struct gtask bt_task;
@@ -570,16 +693,16 @@
 };
 
 static void
-taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
+taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu, bool intr, int pri)
 {
 	struct taskqgroup_cpu *qcpu;
 
 	qcpu = &qgroup->tqg_queue[idx];
 	LIST_INIT(&qcpu->tgc_tasks);
-	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
+	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK | M_ZERO,
 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
-	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
-	    "%s_%d", qgroup->tqg_name, idx);
+	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, pri,
+	    intr, "%s_%d", qgroup->tqg_name, idx);
 	qcpu->tgc_cpu = cpu;
 }
 
@@ -663,12 +786,20 @@
     void *uniq, int irq, char *name)
 {
 	cpuset_t mask;
-	int qid;
+	int qid, error;
 
 	gtask->gt_uniq = uniq;
 	gtask->gt_name = name;
 	gtask->gt_irq = irq;
 	gtask->gt_cpu = -1;
+
+	mtx_lock(&qgroup->tqg_lock);
+	qgroup->tqg_flags |= TQG_NEED_ADJUST;
+	mtx_unlock(&qgroup->tqg_lock);
+
+	if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED))
+		qgroup->adjust_func(NULL);
+
 	mtx_lock(&qgroup->tqg_lock);
 	qid = taskqgroup_find(qgroup, uniq);
 	qgroup->tqg_queue[qid].tgc_cnt++;
@@ -679,7 +810,9 @@
 		CPU_ZERO(&mask);
 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
 		mtx_unlock(&qgroup->tqg_lock);
-		intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+		error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask);
+		if (error)
+			printf("taskqgroup_attach: setaffinity failed: %d\n", error);
 	} else
 		mtx_unlock(&qgroup->tqg_lock);
 }
@@ -688,7 +821,7 @@
 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 {
 	cpuset_t mask;
-	int qid, cpu;
+	int qid, cpu, error;
 
 	mtx_lock(&qgroup->tqg_lock);
 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
@@ -698,9 +831,10 @@
 
 		CPU_ZERO(&mask);
 		CPU_SET(cpu, &mask);
-		intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
-
+		error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_INTRHANDLER, &mask);
 		mtx_lock(&qgroup->tqg_lock);
+		if (error)
+			printf("taskqgroup_attach_deferred: setaffinity failed: %d\n", error);
 	}
 	qgroup->tqg_queue[qid].tgc_cnt++;
 
@@ -711,27 +845,79 @@
 	mtx_unlock(&qgroup->tqg_lock);
 }
 
+static int
+taskqgroup_adjust_deferred(struct taskqgroup *qgroup, int cpu)
+{
+	int i, error = 0, cpu_max = -1;
+
+	mtx_lock(&qgroup->tqg_lock);
+	for (i = 0; i < qgroup->tqg_cnt; i++)
+		if (qgroup->tqg_queue[i].tgc_cpu > cpu_max)
+			cpu_max = qgroup->tqg_queue[i].tgc_cpu;
+	if (cpu_max >= cpu) {
+		mtx_unlock(&qgroup->tqg_lock);
+		return (0);
+	}
+	MPASS(cpu <= mp_maxid);
+	error = _taskqgroup_adjust(qgroup, cpu + 1, qgroup->tqg_stride,
+				   qgroup->tqg_intr, qgroup->tqg_pri);
+	if (error) {
+		printf("%s: _taskqgroup_adjust(%p, %d, %d, %d, %d) => %d\n\n",
+		       __func__, qgroup, cpu + 1, qgroup->tqg_stride, qgroup->tqg_intr,
+		       qgroup->tqg_pri, error);
+		goto out;
+	}
+	for (i = 0; i < qgroup->tqg_cnt; i++)
+		if (qgroup->tqg_queue[i].tgc_cpu > cpu_max)
+			cpu_max = qgroup->tqg_queue[i].tgc_cpu;
+	MPASS(cpu_max >= cpu);
+out:
+	mtx_unlock(&qgroup->tqg_lock);
+	return (error);
+}
+
 int
 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 	void *uniq, int cpu, int irq, char *name)
 {
 	cpuset_t mask;
-	int i, qid;
+	int i, error, qid;
 
 	qid = -1;
 	gtask->gt_uniq = uniq;
 	gtask->gt_name = name;
 	gtask->gt_irq = irq;
 	gtask->gt_cpu = cpu;
+	MPASS(cpu >= 0);
+
+	mtx_lock(&qgroup->tqg_lock);
+	qgroup->tqg_flags |= TQG_NEED_ADJUST;
+	mtx_unlock(&qgroup->tqg_lock);
+
+	if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) {
+		uintptr_t cpuid = cpu + 1;
+		qgroup->adjust_func((void *)cpuid);
+	}
+	if ((error = taskqgroup_adjust_deferred(qgroup, cpu)))
+		return (error);
+
 	mtx_lock(&qgroup->tqg_lock);
 	if (tqg_smp_started) {
-		for (i = 0; i < qgroup->tqg_cnt; i++)
+		for (i = 0; i < qgroup->tqg_cnt; i++) {
 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 				qid = i;
 				break;
 			}
+#ifdef INVARIANTS
+			else
+				printf("qgroup->tqg_queue[%d].tgc_cpu=0x%x tgc_cnt=0x%x\n",
+				       i, qgroup->tqg_queue[i].tgc_cpu, qgroup->tqg_queue[i].tgc_cnt);
+
+#endif
+		}
 		if (qid == -1) {
 			mtx_unlock(&qgroup->tqg_lock);
+			printf("%s: qid not found for cpu=%d\n", __func__, cpu);
 			return (EINVAL);
 		}
 	} else
@@ -744,8 +930,11 @@
 
 	CPU_ZERO(&mask);
 	CPU_SET(cpu, &mask);
-	if (irq != -1 && tqg_smp_started)
-		intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+	if (irq != -1 && tqg_smp_started) {
+		error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask);
+		if (error)
+			printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error);
+	}
 	return (0);
 }
 
@@ -753,13 +942,18 @@
 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 {
 	cpuset_t mask;
-	int i, qid, irq, cpu;
+	int i, qid, irq, cpu, error;
 
 	qid = -1;
 	irq = gtask->gt_irq;
 	cpu = gtask->gt_cpu;
 	MPASS(tqg_smp_started);
+
+	if ((error = taskqgroup_adjust_deferred(qgroup, cpu)))
+		return (error);
 	mtx_lock(&qgroup->tqg_lock);
+	/* adjust as needed */
+	MPASS(cpu <= mp_maxid);
 	for (i = 0; i < qgroup->tqg_cnt; i++)
 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 			qid = i;
@@ -767,6 +961,7 @@
 		}
 	if (qid == -1) {
 		mtx_unlock(&qgroup->tqg_lock);
+		printf("%s: qid not found for cpu=%d\n", __func__, cpu);
 		return (EINVAL);
 	}
 	qgroup->tqg_queue[qid].tgc_cnt++;
@@ -778,8 +973,11 @@
 	CPU_ZERO(&mask);
 	CPU_SET(cpu, &mask);
 
-	if (irq != -1)
-		intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+	if (irq != -1) {
+		error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask);
+		if (error)
+			printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error);
+	}
 	return (0);
 }
 
@@ -818,8 +1016,25 @@
 		printf("taskqgroup_binder: setaffinity failed: %d\n",
 		    error);
 	free(gtask, M_DEVBUF);
+
 }
+static void
+taskqgroup_ithread_binder(void *ctx)
+{
+	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
+	cpuset_t mask;
+	int error;
+
+	CPU_ZERO(&mask);
+	CPU_SET(gtask->bt_cpuid, &mask);
+	error = cpuset_setthread(curthread->td_tid, &mask);
 
+	if (error)
+		printf("taskqgroup_binder: setaffinity failed: %d\n",
+		    error);
+	free(gtask, M_DEVBUF);
+
+}
 static void
 taskqgroup_bind(struct taskqgroup *qgroup)
 {
@@ -835,7 +1050,10 @@
 
 	for (i = 0; i < qgroup->tqg_cnt; i++) {
 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
-		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
+		if (qgroup->tqg_intr)
+			GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_ithread_binder, gtask);
+		else
+			GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 		    &gtask->bt_task);
@@ -843,7 +1061,7 @@
 }
 
 static int
-_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
+_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri)
 {
 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 	struct grouptask *gtask;
@@ -858,14 +1076,22 @@
 		return (EINVAL);
 	}
 	if (qgroup->tqg_adjusting) {
-		printf("taskqgroup_adjust failed: adjusting\n");
+		printf("%s: failed: adjusting\n", __func__);
 		return (EBUSY);
 	}
+	/* No work to be done */
+	if (qgroup->tqg_cnt == cnt)
+		return (0);
 	qgroup->tqg_adjusting = 1;
 	old_cnt = qgroup->tqg_cnt;
 	old_cpu = 0;
-	if (old_cnt < cnt)
-		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
+	if (old_cnt < cnt) {
+		int old_max_idx = max(0, old_cnt-1);
+		old_cpu = qgroup->tqg_queue[old_max_idx].tgc_cpu;
+		if (old_cnt > 0)
+			for (k = 0; k < stride; k++)
+				old_cpu = CPU_NEXT(old_cpu);
+	}
 	mtx_unlock(&qgroup->tqg_lock);
 	/*
 	 * Set up queue for tasks added before boot.
@@ -881,7 +1107,7 @@
 	 */
 	cpu = old_cpu;
 	for (i = old_cnt; i < cnt; i++) {
-		taskqgroup_cpu_create(qgroup, i, cpu);
+		taskqgroup_cpu_create(qgroup, i, cpu, ithread, pri);
 
 		for (k = 0; k < stride; k++)
 			cpu = CPU_NEXT(cpu);
@@ -889,6 +1115,8 @@
 	mtx_lock(&qgroup->tqg_lock);
 	qgroup->tqg_cnt = cnt;
 	qgroup->tqg_stride = stride;
+	qgroup->tqg_intr = ithread;
+	qgroup->tqg_pri = pri;
 
 	/*
 	 * Adjust drivers to use new taskqs.
@@ -934,12 +1162,34 @@
 }
 
 int
-taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
+taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri)
 {
 	int error;
 
 	mtx_lock(&qgroup->tqg_lock);
-	error = _taskqgroup_adjust(qgroup, cnt, stride);
+	error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri);
+	mtx_unlock(&qgroup->tqg_lock);
+
+	return (error);
+}
+
+void
+taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*))
+{
+	qgroup-> adjust_func = adjust_func;
+}
+
+int
+taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri)
+{
+	int error = 0;
+
+	mtx_lock(&qgroup->tqg_lock);
+	if ((qgroup->tqg_flags & (TQG_ADJUSTED|TQG_NEED_ADJUST)) == TQG_NEED_ADJUST) {
+		qgroup->tqg_flags |= TQG_ADJUSTED;
+		error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri);
+		MPASS(error == 0);
+	}
 	mtx_unlock(&qgroup->tqg_lock);
 
 	return (error);
@@ -954,7 +1204,9 @@
 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
 	qgroup->tqg_name = name;
 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
-
+	MPASS(qgroup->tqg_queue[0].tgc_cnt == 0);
+	MPASS(qgroup->tqg_queue[0].tgc_cpu == 0);
+	MPASS(qgroup->tqg_queue[0].tgc_taskq == 0);
 	return (qgroup);
 }
 
Index: sys/net/iflib.h
===================================================================
--- sys/net/iflib.h
+++ sys/net/iflib.h
@@ -119,6 +119,7 @@
 	qidx_t			ipi_pidx;	/* start pidx for encap */
 	qidx_t			ipi_new_pidx;	/* next available pidx post-encap */
 	/* offload handling */
+	caddr_t			ipi_hdr_data;	/* raw header */
 	uint8_t			ipi_ehdrlen;	/* ether header length */
 	uint8_t			ipi_ip_hlen;	/* ip header length */
 	uint8_t			ipi_tcp_hlen;	/* tcp header length */
@@ -183,6 +184,7 @@
 	void (*ift_rxd_refill) (void * , if_rxd_update_t iru);
 	void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx);
 	int (*ift_legacy_intr) (void *);
+	int (*ift_txd_errata) (void *, struct mbuf **mp);
 } *if_txrx_t;
 
 typedef struct if_softc_ctx {
@@ -294,9 +296,9 @@
  */
 #define IFLIB_HAS_TXCQ		0x08
 /*
- * Interface does checksum in place
+ *
  */
-#define IFLIB_NEED_SCRATCH	0x10
+#define IFLIB_UNUSED___0	0x10
 /*
  * Interface doesn't expect in_pseudo for th_sum
  */
@@ -305,6 +307,10 @@
  * Interface doesn't align IP header
  */
 #define IFLIB_DO_RX_FIXUP	0x40
+/*
+ * Driver needs csum zeroed for offloading
+ */
+#define IFLIB_NEED_ZERO_CSUM	0x80
 
 
 
@@ -381,7 +387,7 @@
 void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count);
 
 
-struct mtx *iflib_ctx_lock_get(if_ctx_t);
+struct sx *iflib_ctx_lock_get(if_ctx_t);
 struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t);
 
 void iflib_led_create(if_ctx_t ctx);
Index: sys/net/iflib.c
===================================================================
--- sys/net/iflib.c
+++ sys/net/iflib.c
@@ -51,7 +51,6 @@
 #include <sys/taskqueue.h>
 #include <sys/limits.h>
 
-
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
@@ -157,7 +156,7 @@
 	if_shared_ctx_t ifc_sctx;
 	struct if_softc_ctx ifc_softc_ctx;
 
-	struct mtx ifc_mtx;
+	struct sx ifc_sx;
 
 	uint16_t ifc_nhwtxqs;
 	uint16_t ifc_nhwrxqs;
@@ -185,6 +184,8 @@
 	uint16_t ifc_sysctl_ntxqs;
 	uint16_t ifc_sysctl_nrxqs;
 	uint16_t ifc_sysctl_qs_eq_override;
+	uint16_t ifc_cpuid_highest;
+	uint16_t ifc_sysctl_rx_budget;
 
 	qidx_t ifc_sysctl_ntxds[8];
 	qidx_t ifc_sysctl_nrxds[8];
@@ -203,8 +204,66 @@
 	eventhandler_tag ifc_vlan_detach_event;
 	uint8_t ifc_mac[ETHER_ADDR_LEN];
 	char ifc_mtx_name[16];
+	LIST_ENTRY(iflib_ctx) ifc_next;
 };
 
+static LIST_HEAD(ctx_head, iflib_ctx) ctx_list;
+static struct mtx ctx_list_lock;
+
+TASKQGROUP_DEFINE(if_io, mp_ncpus, 1, true, PI_NET);
+TASKQGROUP_DEFINE(if_config, 1, 1, false, PI_SOFT);
+
+static void
+iflib_ctx_apply(void (*fn)(if_ctx_t ctx, void *arg), void *arg)
+{
+	if_ctx_t ctx;
+
+	mtx_lock(&ctx_list_lock);
+	LIST_FOREACH(ctx, &ctx_list, ifc_next) {
+		(fn)(ctx, arg);
+	}
+	mtx_unlock(&ctx_list_lock);
+}
+
+static void
+_iflib_cpuid_highest(if_ctx_t ctx, void *arg) {
+	int *cpuid = arg;
+
+	if (*cpuid < ctx->ifc_cpuid_highest)
+		*cpuid = ctx->ifc_cpuid_highest;
+}
+
+static int
+iflib_cpuid_highest(void)
+{
+	int cpuid = 0;
+
+	iflib_ctx_apply(_iflib_cpuid_highest, &cpuid);
+	return (cpuid);
+}
+
+static void
+iflib_ctx_insert(if_ctx_t ctx)
+{
+	mtx_lock(&ctx_list_lock);
+	LIST_INSERT_HEAD(&ctx_list, ctx, ifc_next);
+	mtx_unlock(&ctx_list_lock);
+}
+
+static void
+iflib_ctx_remove(if_ctx_t ctx)
+{
+	int max_cpuid_prev, max_cpuid_new;
+
+	max_cpuid_prev = iflib_cpuid_highest();
+	mtx_lock(&ctx_list_lock);
+	LIST_REMOVE(ctx, ifc_next);
+	mtx_unlock(&ctx_list_lock);
+	max_cpuid_new = max(1, iflib_cpuid_highest());
+	if (max_cpuid_new < max_cpuid_prev) {
+		taskqgroup_adjust(qgroup_if_io, max_cpuid_new, 1, true, PI_NET);
+	}
+}
 
 void *
 iflib_get_softc(if_ctx_t ctx)
@@ -263,9 +322,11 @@
 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
 
 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
-#define TX_SW_DESC_MAP_CREATED	(1 << 1)
-#define RX_SW_DESC_INUSE        (1 << 3)
-#define TX_SW_DESC_MAPPED       (1 << 4)
+#define RX_SW_DESC_INUSE       (1 << 1)
+#define RX_NETMAP_INUSE	(1 << 2)
+
+#define TX_SW_DESC_MAP_CREATED	(1 << 0)
+#define TX_SW_DESC_MAPPED      (1 << 1)
 
 #define	M_TOOBIG		M_PROTO1
 
@@ -357,6 +418,7 @@
 	uint8_t		ift_qstatus;
 	uint8_t		ift_closed;
 	uint8_t		ift_update_freq;
+	uint8_t		ift_stall_count;
 	struct iflib_filter_info ift_filter_info;
 	bus_dma_tag_t		ift_desc_tag;
 	bus_dma_tag_t		ift_tso_desc_tag;
@@ -448,9 +510,11 @@
 	struct grouptask        ifr_task;
 	struct iflib_filter_info ifr_filter_info;
 	iflib_dma_info_t		ifr_ifdi;
-
+	struct if_rxd_info		ifr_ri;
+	struct if_rxd_update	ifr_iru;
 	/* dynamically allocate if any drivers need a value substantially larger than this */
 	struct if_rxd_frag	ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
+
 #ifdef IFLIB_DIAGNOSTICS
 	uint64_t ifr_cpu_exec_count[256];
 #endif
@@ -465,11 +529,11 @@
 
 /* multiple of word size */
 #ifdef __LP64__
-#define PKT_INFO_SIZE	6
+#define PKT_INFO_SIZE	7
 #define RXD_INFO_SIZE	5
 #define PKT_TYPE uint64_t
 #else
-#define PKT_INFO_SIZE	11
+#define PKT_INFO_SIZE	12
 #define RXD_INFO_SIZE	8
 #define PKT_TYPE uint32_t
 #endif
@@ -495,9 +559,10 @@
 	pi_pad = (if_pkt_info_pad_t)pi;
 	pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0;
 	pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0;
+	pi_pad->pkt_val[6] = 0;
 #ifndef __LP64__
-	pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0;
-	pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0;
+	pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0;
+	pi_pad->pkt_val[10] = 0; pi_pad->pkt_val[11] = 0;
 #endif	
 }
 
@@ -525,14 +590,24 @@
 #define MAX_SINGLE_PACKET_FRACTION 12
 #define IF_BAD_DMA (bus_addr_t)-1
 
-#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
+static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
+                   "iflib driver parameters");
+
+static int iflib_timer_int;
+SYSCTL_INT(_net_iflib, OID_AUTO, timer_int, CTLFLAG_RW, &iflib_timer_int,
+    0, "interval at which to run per-queue timers (in ticks)");
+
+static int force_busdma = 0;
+SYSCTL_INT(_net_iflib, OID_AUTO, force_busdma, CTLFLAG_RDTUN, &force_busdma,
+    1, "force busdma");
 
-#define CTX_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF)
+#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
 
-#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx)
-#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx)
-#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx)
+#define CTX_LOCK_INIT(_sc, _name)  sx_init(&(_sc)->ifc_sx, _name)
 
+#define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_sx)
+#define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_sx)
+#define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_sx)
 
 #define CALLOUT_LOCK(txq)	mtx_lock(&txq->ift_mtx)
 #define CALLOUT_UNLOCK(txq) 	mtx_unlock(&txq->ift_mtx)
@@ -553,9 +628,6 @@
 MODULE_DEPEND(iflib, pci, 1, 1, 1);
 MODULE_DEPEND(iflib, ether, 1, 1, 1);
 
-TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
-TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
-
 #ifndef IFLIB_DEBUG_COUNTERS
 #ifdef INVARIANTS
 #define IFLIB_DEBUG_COUNTERS 1
@@ -564,9 +636,6 @@
 #endif /* !INVARIANTS */
 #endif
 
-static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
-                   "iflib driver parameters");
-
 /*
  * XXX need to ensure that this can't accidentally cause the head to be moved backwards 
  */
@@ -689,7 +758,14 @@
 static void iflib_debug_reset(void) {}
 #endif
 
+typedef void async_gtask_fn_t(if_ctx_t ctx, void *arg);
 
+struct async_task_arg {
+	async_gtask_fn_t *ata_fn;
+	if_ctx_t ata_ctx;
+	void *ata_arg;
+	struct grouptask *ata_gtask;
+};
 
 #define IFLIB_DEBUG 0
 
@@ -711,6 +787,12 @@
 static void _iflib_pre_assert(if_softc_ctx_t scctx);
 static void iflib_stop(if_ctx_t ctx);
 static void iflib_if_init_locked(if_ctx_t ctx);
+static int async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
+static int iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg);
+static void iflib_admin_reset_deferred(if_ctx_t ctx);
+
+
+
 #ifndef __NO_STRICT_ALIGNMENT
 static struct mbuf * iflib_fixup_rx(struct mbuf *m);
 #endif
@@ -784,6 +866,94 @@
 	return (status);
 }
 
+static void
+iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid)
+{
+        iflib_fl_t fl;
+
+        fl = &rxq->ifr_fl[flid];
+	iru->iru_paddrs = fl->ifl_bus_addrs;
+	iru->iru_vaddrs = &fl->ifl_vm_addrs[0];
+	iru->iru_idxs = fl->ifl_rxd_idxs;
+	iru->iru_qsidx = rxq->ifr_id;
+	iru->iru_buf_size = fl->ifl_buf_size;
+	iru->iru_flidx = fl->ifl_id;
+}
+
+static int
+netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init)
+{
+	struct netmap_adapter *na = kring->na;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int head = kring->rhead;
+	struct netmap_ring *ring = kring->ring;
+	bus_dmamap_t *map;
+	if_rxd_update_t iru;
+	if_ctx_t ctx = rxq->ifr_ctx;
+	iflib_fl_t fl = &rxq->ifr_fl[0];
+	uint32_t refill_pidx, nic_i;
+
+	iru = &rxq->ifr_iru;
+	iru_init(iru, rxq, 0 /* flid */);
+	map = fl->ifl_sds.ifsd_map;
+	refill_pidx = netmap_idx_k2n(kring, nm_i);
+	if (init && (nm_i == head))
+		head = nm_prev(head, lim);
+	for (int tmp_pidx = 0; nm_i != head; tmp_pidx++) {
+		struct netmap_slot *slot = &ring->slot[nm_i];
+		void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]);
+		uint32_t nic_i_dma = refill_pidx;
+		nic_i = netmap_idx_k2n(kring, nm_i);
+
+		MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH);
+
+		if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+		        return netmap_ring_reinit(kring);
+
+		fl->ifl_vm_addrs[tmp_pidx] = addr;
+		if (__predict_false(init) && map) {
+			netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
+		} else if (map && (slot->flags & NS_BUF_CHANGED)) {
+			/* buffer has changed, reload map */
+			netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
+		}
+		slot->flags &= ~NS_BUF_CHANGED;
+
+		nm_i = nm_next(nm_i, lim);
+		fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim);
+		if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1)
+			continue;
+
+		iru->iru_pidx = refill_pidx;
+		iru->iru_count = tmp_pidx+1;
+		ctx->isc_rxd_refill(ctx->ifc_softc, iru);
+
+		tmp_pidx = 0;
+		refill_pidx = nic_i;
+		if (map == NULL)
+			continue;
+
+		for (int n = 0; n < iru->iru_count; n++) {
+			bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma],
+					BUS_DMASYNC_PREREAD);
+			/* XXX - change this to not use the netmap func*/
+			nic_i_dma = nm_next(nic_i_dma, lim);
+		}
+	}
+	kring->nr_hwcur = head;
+
+	if (map)
+		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+	/*
+	 * IMPORTANT: we must leave one free slot in the ring,
+	 * so move nic_i back by one unit
+	 */
+	nic_i = nm_prev(nic_i, lim);
+	ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
+	return (0);
+}
+
 /*
  * Reconcile kernel and user view of the transmit ring.
  *
@@ -941,18 +1111,20 @@
 	struct netmap_adapter *na = kring->na;
 	struct netmap_ring *ring = kring->ring;
 	uint32_t nm_i;	/* index into the netmap ring */
-	uint32_t nic_i, nic_i_start;	/* index into the NIC ring */
+	uint32_t nic_i;	/* index into the NIC ring */
 	u_int i, n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
-	struct if_rxd_info ri;
-	struct if_rxd_update iru;
+	struct if_rxd_info *ri;
+	struct if_rxd_update *iru;
 
 	struct ifnet *ifp = na->ifp;
 	if_ctx_t ctx = ifp->if_softc;
 	iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
 	iflib_fl_t fl = rxq->ifr_fl;
+	ri = &rxq->ifr_ri;
+	iru = &rxq->ifr_iru;
 	if (head > lim)
 		return netmap_ring_reinit(kring);
 
@@ -988,14 +1160,14 @@
 			nm_i = netmap_idx_n2k(kring, nic_i);
 			avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX);
 			for (n = 0; avail > 0; n++, avail--) {
-				rxd_info_zero(&ri);
-				ri.iri_frags = rxq->ifr_frags;
-				ri.iri_qsidx = kring->ring_id;
-				ri.iri_ifp = ctx->ifc_ifp;
-				ri.iri_cidx = nic_i;
+				rxd_info_zero(ri);
+				ri->iri_frags = rxq->ifr_frags;
+				ri->iri_qsidx = kring->ring_id;
+				ri->iri_ifp = ctx->ifc_ifp;
+				ri->iri_cidx = nic_i;
 
-				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
-				ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen;
+				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, ri);
+				ring->slot[nm_i].len = error ? 0 : ri->iri_len - crclen;
 				ring->slot[nm_i].flags = slot_flags;
 				if (fl->ifl_sds.ifsd_map)
 					bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
@@ -1028,63 +1200,7 @@
 	if (nm_i == head)
 		return (0);
 
-	iru.iru_paddrs = fl->ifl_bus_addrs;
-	iru.iru_vaddrs = &fl->ifl_vm_addrs[0];
-	iru.iru_idxs = fl->ifl_rxd_idxs;
-	iru.iru_qsidx = rxq->ifr_id;
-	iru.iru_buf_size = fl->ifl_buf_size;
-	iru.iru_flidx = fl->ifl_id;
-	nic_i_start = nic_i = netmap_idx_k2n(kring, nm_i);
-	for (i = 0; nm_i != head; i++) {
-		struct netmap_slot *slot = &ring->slot[nm_i];
-		void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]);
-
-		if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
-			goto ring_reset;
-
-		fl->ifl_vm_addrs[i] = addr;
-		if (fl->ifl_sds.ifsd_map && (slot->flags & NS_BUF_CHANGED)) {
-			/* buffer has changed, reload map */
-			netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], addr);
-		}
-		slot->flags &= ~NS_BUF_CHANGED;
-
-		nm_i = nm_next(nm_i, lim);
-		fl->ifl_rxd_idxs[i] = nic_i = nm_next(nic_i, lim);
-		if (nm_i != head && i < IFLIB_MAX_RX_REFRESH)
-			continue;
-
-		iru.iru_pidx = nic_i_start;
-		iru.iru_count = i;
-		i = 0;
-		ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
-		if (fl->ifl_sds.ifsd_map == NULL) {
-			nic_i_start = nic_i;
-			continue;
-		}
-		nic_i = nic_i_start;
-		for (n = 0; n < iru.iru_count; n++) {
-			bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i],
-					BUS_DMASYNC_PREREAD);
-			nic_i = nm_next(nic_i, lim);
-		}
-		nic_i_start = nic_i;
-	}
-	kring->nr_hwcur = head;
-
-	if (fl->ifl_sds.ifsd_map)
-		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
-				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-	/*
-	 * IMPORTANT: we must leave one free slot in the ring,
-	 * so move nic_i back by one unit
-	 */
-	nic_i = nm_prev(nic_i, lim);
-	ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
-	return 0;
-
-ring_reset:
-	return netmap_ring_reinit(kring);
+	return (netmap_fl_refill(rxq, kring, nm_i, false));
 }
 
 static void
@@ -1093,13 +1209,12 @@
 	struct ifnet *ifp = na->ifp;
 	if_ctx_t ctx = ifp->if_softc;
 
-	CTX_LOCK(ctx);
+	/* XXX - do we need synchronization here?*/
 	if (onoff) {
 		IFDI_INTR_ENABLE(ctx);
 	} else {
 		IFDI_INTR_DISABLE(ctx);
 	}
-	CTX_UNLOCK(ctx);
 }
 
 
@@ -1156,55 +1271,15 @@
 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
 {
 	struct netmap_adapter *na = NA(ctx->ifc_ifp);
+	struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
 	struct netmap_slot *slot;
-	struct if_rxd_update iru;
-	iflib_fl_t fl;
-	bus_dmamap_t *map;
-	int nrxd;
-	uint32_t i, j, pidx_start;
+	uint32_t nm_i;
 
 	slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
 	if (slot == NULL)
 		return;
-	fl = &rxq->ifr_fl[0];
-	map = fl->ifl_sds.ifsd_map;
-	nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
-	iru.iru_paddrs = fl->ifl_bus_addrs;
-	iru.iru_vaddrs = &fl->ifl_vm_addrs[0];
-	iru.iru_idxs = fl->ifl_rxd_idxs;
-	iru.iru_qsidx = rxq->ifr_id;
-	iru.iru_buf_size = rxq->ifr_fl[0].ifl_buf_size;
-	iru.iru_flidx = 0;
-
-	for (pidx_start = i = j = 0; i < nrxd; i++, j++) {
-		int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
-		void *addr;
-
-		fl->ifl_rxd_idxs[j] = i;
-		addr = fl->ifl_vm_addrs[j] = PNMB(na, slot + sj, &fl->ifl_bus_addrs[j]);
-		if (map) {
-			netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, *map, addr);
-			map++;
-		}
-
-		if (j < IFLIB_MAX_RX_REFRESH && i < nrxd - 1)
-			continue;
-
-		iru.iru_pidx = pidx_start;
-		pidx_start = i;
-		iru.iru_count = j;
-		j = 0;
-		MPASS(pidx_start + j <= nrxd);
-		/* Update descriptors and the cached value */
-		ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
-	}
-	/* preserve queue */
-	if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
-		struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
-		int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
-		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t);
-	} else
-		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1);
+	nm_i = netmap_idx_n2k(kring, 0);
+	netmap_fl_refill(rxq, kring, nm_i, true);
 }
 
 #define iflib_netmap_detach(ifp) netmap_detach(ifp)
@@ -1226,8 +1301,17 @@
 {
 	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
 }
+static __inline void
+prefetch2(void *x)
+{
+	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
+#if (CACHE_LINE_SIZE < 128)
+	__asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long)))));
+#endif
+}
 #else
 #define prefetch(x)
+#define prefetch2(x)
 #endif
 
 static void
@@ -1343,6 +1427,25 @@
 		iflib_dma_free(*dmaiter);
 }
 
+static void
+txq_validate(iflib_txq_t txq) {
+#ifdef INVARIANTS
+	uint32_t cidx = txq->ift_cidx;
+	struct mbuf **ifsd_m = txq->ift_sds.ifsd_m;
+	if (txq->ift_pidx > cidx) {
+		int i;
+		for (i = txq->ift_pidx; i < txq->ift_size; i++)
+			MPASS(ifsd_m[i] == NULL);
+		for (i = 0; i < cidx; i++)
+			MPASS(ifsd_m[i] == NULL);
+	} else if (txq->ift_pidx < cidx) {
+		int i;
+		for (i = txq->ift_pidx; i < cidx; i++)
+			MPASS(ifsd_m[i] == NULL);
+	}
+#endif
+}
+
 #ifdef EARLY_AP_STARTUP
 static const int iflib_started = 1;
 #else
@@ -1371,6 +1474,7 @@
 {
 	iflib_filter_info_t info = arg;
 	struct grouptask *gtask = info->ifi_task;
+
 	if (!iflib_started)
 		return (FILTER_HANDLED);
 
@@ -1383,6 +1487,35 @@
 }
 
 static int
+iflib_fast_intr_rx(void *arg)
+{
+	iflib_filter_info_t info = arg;
+	struct grouptask *gtask = info->ifi_task;
+	iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx;
+	if_ctx_t ctx;
+	int cidx;
+
+	if (!iflib_started)
+		return (FILTER_HANDLED);
+
+	DBG_COUNTER_INC(fast_intrs);
+	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
+		return (FILTER_HANDLED);
+
+	ctx = rxq->ifr_ctx;
+	if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ)
+		cidx = rxq->ifr_cq_cidx;
+	else
+		cidx = rxq->ifr_fl[0].ifl_cidx;
+	if (iflib_rxd_avail(ctx, rxq, cidx, 1))
+		GROUPTASK_ENQUEUE(gtask);
+	else
+		IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+	return (FILTER_HANDLED);
+}
+
+
+static int
 iflib_fast_intr_rxtx(void *arg)
 {
 	iflib_filter_info_t info = arg;
@@ -1398,11 +1531,10 @@
 	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
 		return (FILTER_HANDLED);
 
+	ctx = rxq->ifr_ctx;
 	for (i = 0; i < rxq->ifr_ntxqirq; i++) {
 		qidx_t txqid = rxq->ifr_txqid[i];
 
-		ctx = rxq->ifr_ctx;
-
 		if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) {
 			IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
 			continue;
@@ -1974,20 +2106,33 @@
 			if (*sd_cl != NULL)
 				uma_zfree(fl->ifl_zone, *sd_cl);
 			*sd_flags = 0;
+		} else if (*sd_flags & RX_NETMAP_INUSE) {
+			if (fl->ifl_sds.ifsd_map != NULL) {
+				bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i];
+				bus_dmamap_unload(fl->ifl_desc_tag, sd_map);
+				bus_dmamap_destroy(fl->ifl_desc_tag, sd_map);
+			}
+			*sd_flags = 0;
+			MPASS(*sd_cl == NULL);
+			MPASS(*sd_m == NULL);
 		} else {
 			MPASS(*sd_cl == NULL);
 			MPASS(*sd_m == NULL);
 		}
+
 #if MEMORY_LOGGING
-		fl->ifl_m_dequeued++;
-		fl->ifl_cl_dequeued++;
+		if (*sd_m != NULL)
+			fl->ifl_m_dequeued++;
+		if (*sd_cl != NULL)
+			fl->ifl_cl_dequeued++;
 #endif
 		*sd_cl = NULL;
 		*sd_m = NULL;
 	}
 #ifdef INVARIANTS
 	for (i = 0; i < fl->ifl_size; i++) {
-		MPASS(fl->ifl_sds.ifsd_flags[i] == 0);
+		KASSERT(fl->ifl_sds.ifsd_flags[i] == 0, ("fl->ifl_sds.ifsd_flags[%d]=0x%x, expected 0",
+							 i, fl->ifl_sds.ifsd_flags[i]));
 		MPASS(fl->ifl_sds.ifsd_cl[i] == NULL);
 		MPASS(fl->ifl_sds.ifsd_m[i] == NULL);
 	}
@@ -2011,7 +2156,7 @@
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 
-	bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size);
+	bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size-1);
 	/*
 	** Free current RX buffer structs and their mbufs
 	*/
@@ -2090,6 +2235,19 @@
 	}
 }
 
+/* CONFIG context only */
+static void
+iflib_handle_hang(if_ctx_t ctx, void  *arg __unused)
+{
+
+	CTX_LOCK(ctx);
+	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
+	IFDI_WATCHDOG_RESET(ctx);
+	ctx->ifc_watchdog_events++;
+	iflib_if_init_locked(ctx);
+	CTX_UNLOCK(ctx);
+}
+
 /*
  * MI independent logic
  *
@@ -2097,46 +2255,49 @@
 static void
 iflib_timer(void *arg)
 {
-	iflib_txq_t txq = arg;
+	iflib_txq_t txq_i, txq = arg;
 	if_ctx_t ctx = txq->ift_ctx;
-	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
+	/* handle any laggards */
+	if (txq->ift_db_pending)
+		GROUPTASK_ENQUEUE(&txq->ift_task);
+	IFDI_TIMER(ctx, txq->ift_id);
+
+	if (ifmp_ring_is_stalled(txq->ift_br) &&
+	    txq->ift_cleaned_prev == txq->ift_cleaned)
+		txq->ift_stall_count++;
+	txq->ift_cleaned_prev = txq->ift_cleaned;
+	if (txq->ift_stall_count > 2) {
+		txq->ift_qstatus = IFLIB_QUEUE_HUNG;
+		device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
+			      txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
+	}
+	if (txq->ift_id != 0) {
+		if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)
+		callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
+				 txq, txq->ift_timer.c_cpu);
+		return;
+	}
 	/*
 	** Check on the state of the TX queue(s), this
 	** can be done without the lock because its RO
 	** and the HUNG state will be static if set.
 	*/
-	IFDI_TIMER(ctx, txq->ift_id);
-	if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
-	    ((txq->ift_cleaned_prev == txq->ift_cleaned) ||
-	     (sctx->isc_pause_frames == 0)))
-		goto hung;
+	txq_i = ctx->ifc_txqs;
+	for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq_i++) {
+		if (txq_i->ift_qstatus == IFLIB_QUEUE_HUNG) {
+			iflib_config_async_gtask_dispatch(ctx, iflib_handle_hang, "hang handler", txq);
+			/* init will reset the callout */
+			return;
+		}
+	}
 
-	if (ifmp_ring_is_stalled(txq->ift_br))
-		txq->ift_qstatus = IFLIB_QUEUE_HUNG;
-	txq->ift_cleaned_prev = txq->ift_cleaned;
-	/* handle any laggards */
-	if (txq->ift_db_pending)
-		GROUPTASK_ENQUEUE(&txq->ift_task);
 
-	sctx->isc_pause_frames = 0;
 	if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 
-		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
-	return;
-hung:
-	CTX_LOCK(ctx);
-	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
-	device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
-				  txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
-
-	IFDI_WATCHDOG_RESET(ctx);
-	ctx->ifc_watchdog_events++;
-
-	ctx->ifc_flags |= IFC_DO_RESET;
-	iflib_admin_intr_deferred(ctx);
-	CTX_UNLOCK(ctx);
+		callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
+		    txq, txq->ift_timer.c_cpu);
 }
 
 static void
@@ -2148,8 +2309,10 @@
 	iflib_fl_t fl;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
-	int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
+	int i, j, tx_ip_csum_flags, tx_ip6_csum_flags, running, reset;
 
+	running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING);
+	reset = !!(ctx->ifc_flags & IFC_DO_RESET);
 
 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 	IFDI_INTR_DISABLE(ctx);
@@ -2173,19 +2336,20 @@
 		CALLOUT_UNLOCK(txq);
 		iflib_netmap_txq_init(ctx, txq);
 	}
-	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
-		MPASS(rxq->ifr_id == i);
-		iflib_netmap_rxq_init(ctx, rxq);
-	}
 #ifdef INVARIANTS
 	i = if_getdrvflags(ifp);
 #endif
 	IFDI_INIT(ctx);
 	MPASS(if_getdrvflags(ifp) == i);
+	if (!running && reset)
+		return;
 	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 		/* XXX this should really be done on a per-queue basis */
-		if (if_getcapenable(ifp) & IFCAP_NETMAP)
+		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+			MPASS(rxq->ifr_id == i);
+			iflib_netmap_rxq_init(ctx, rxq);
 			continue;
+		}
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			if (iflib_fl_setup(fl)) {
 				device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
@@ -2198,10 +2362,11 @@
 	IFDI_INTR_ENABLE(ctx);
 	txq = ctx->ifc_txqs;
 	for (i = 0; i < sctx->isc_ntxqsets; i++, txq++)
-		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
-			txq->ift_timer.c_cpu);
+		callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
+			txq, txq->ift_timer.c_cpu);
 }
 
+/* CONFIG context only */
 static int
 iflib_media_change(if_t ifp)
 {
@@ -2215,17 +2380,19 @@
 	return (err);
 }
 
+/* CONFIG context only */
 static void
 iflib_media_status(if_t ifp, struct ifmediareq *ifmr)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
+	iflib_admin_intr_deferred(ctx);
 	CTX_LOCK(ctx);
-	IFDI_UPDATE_ADMIN_STATUS(ctx);
 	IFDI_MEDIA_STATUS(ctx, ifmr);
 	CTX_UNLOCK(ctx);
 }
 
+/* CONFIG context only */
 static void
 iflib_stop(if_ctx_t ctx)
 {
@@ -2240,9 +2407,7 @@
 	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
 	IFDI_INTR_DISABLE(ctx);
-	DELAY(1000);
 	IFDI_STOP(ctx);
-	DELAY(1000);
 
 	iflib_debug_reset();
 	/* Wait for current tx queue users to exit to disarm watchdog timer. */
@@ -2255,11 +2420,13 @@
 		for (j = 0; j < txq->ift_size; j++) {
 			iflib_txsd_free(ctx, txq, j);
 		}
-		txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
-		txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
+		/* XXX please rewrite to simply bzero this range */
+		txq->ift_processed = txq->ift_cleaned = txq->ift_cleaned_prev = 0;
+		txq->ift_stall_count = txq->ift_cidx_processed = 0;
+		txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = 0;
 		txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
 		txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
-		txq->ift_pullups = 0;
+		txq->ift_no_desc_avail = txq->ift_pullups = 0;
 		ifmp_ring_reset_stats(txq->ift_br);
 		for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++)
 			bzero((void *)di->idi_vaddr, di->idi_size);
@@ -2402,6 +2569,9 @@
 		}
 		cl = *sd->ifsd_cl;
 		*sd->ifsd_cl = NULL;
+#if MEMORY_LOGGING
+		sd->ifsd_fl->ifl_cl_dequeued++;
+#endif
 
 		/* Can these two be made one ? */
 		m_init(m, M_NOWAIT, MT_DATA, flags);
@@ -2471,20 +2641,12 @@
 	 * XXX early demux data packets so that if_input processing only handles
 	 * acks in interrupt context
 	 */
-	struct mbuf *m, *mh, *mt;
+	struct mbuf *m, *mh, *mt, *mf;
 
 	ifp = ctx->ifc_ifp;
-#ifdef DEV_NETMAP
-	if (ifp->if_capenable & IFCAP_NETMAP) {
-		u_int work = 0;
-		if (netmap_rx_irq(ifp, rxq->ifr_id, &work))
-			return (FALSE);
-	}
-#endif
-
 	mh = mt = NULL;
 	MPASS(budget > 0);
-	rx_pkts	= rx_bytes = 0;
+	rx_pkts = rx_bytes = 0;
 	if (sctx->isc_flags & IFLIB_HAS_RXCQ)
 		cidxp = &rxq->ifr_cq_cidx;
 	else
@@ -2547,11 +2709,14 @@
 	}
 	/* make sure that we can refill faster than drain */
 	for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
-		__iflib_fl_refill_lt(ctx, fl, budget + 8);
+		__iflib_fl_refill_lt(ctx, fl, 2*budget + 8);
 
 	lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
+	mt = mf = NULL;
 	while (mh != NULL) {
 		m = mh;
+		if (mf == NULL)
+			mf = m;
 		mh = mh->m_nextpkt;
 		m->m_nextpkt = NULL;
 #ifndef __NO_STRICT_ALIGNMENT
@@ -2561,15 +2726,25 @@
 		rx_bytes += m->m_pkthdr.len;
 		rx_pkts++;
 #if defined(INET6) || defined(INET)
-		if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
+		if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) {
+			if (mf == m)
+				mf = NULL;
 			continue;
+		}
 #endif
+		if (mt != NULL)
+			mt->m_nextpkt = m;
+		mt = m;
+	}
+	if (mf != NULL) {
+		ifp->if_input(ifp, mf);
 		DBG_COUNTER_INC(rx_if_input);
-		ifp->if_input(ifp, m);
 	}
 
-	if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
-	if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
+	if (rx_pkts) {
+		if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
+		if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
+	}
 
 	/*
 	 * Flush any outstanding LRO work
@@ -2577,14 +2752,9 @@
 #if defined(INET6) || defined(INET)
 	tcp_lro_flush_all(&rxq->ifr_lc);
 #endif
-	if (avail)
-		return true;
-	return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
+	return (avail || iflib_rxd_avail(ctx, rxq, *cidxp, 1));
 err:
-	CTX_LOCK(ctx);
-	ctx->ifc_flags |= IFC_DO_RESET;
-	iflib_admin_intr_deferred(ctx);
-	CTX_UNLOCK(ctx);
+	iflib_admin_reset_deferred(ctx);
 	return (false);
 }
 
@@ -2671,20 +2841,19 @@
 static int
 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 {
-	if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
+	if_ctx_t ctx = txq->ift_ctx;
+#ifdef INET
+	if_shared_ctx_t sctx = ctx->ifc_sctx;
+#endif
+	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	struct ether_vlan_header *eh;
 	struct mbuf *m, *n;
+	int err;
 
+	if (scctx->isc_txrx->ift_txd_errata &&
+	    (err = scctx->isc_txrx->ift_txd_errata(ctx->ifc_softc, mp)))
+	    return (err);
 	n = m = *mp;
-	if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
-	    M_WRITABLE(m) == 0) {
-		if ((m = m_dup(m, M_NOWAIT)) == NULL) {
-			return (ENOMEM);
-		} else {
-			m_freem(*mp);
-			n = *mp = m;
-		}
-	}
 
 	/*
 	 * Determine where frame payload starts.
@@ -2705,6 +2874,10 @@
 		pi->ipi_ehdrlen = ETHER_HDR_LEN;
 	}
 
+	if (if_getmtu(txq->ift_ctx->ifc_ifp) >= pi->ipi_len) {
+		pi->ipi_csum_flags &= ~(CSUM_IP_TSO|CSUM_IP6_TSO);
+	}
+
 	switch (pi->ipi_etype) {
 #ifdef INET
 	case ETHERTYPE_IP:
@@ -2749,21 +2922,21 @@
 		pi->ipi_ipproto = ip->ip_p;
 		pi->ipi_flags |= IPI_TX_IPV4;
 
-		if (pi->ipi_csum_flags & CSUM_IP)
+		if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP))
                        ip->ip_sum = 0;
 
-		if (pi->ipi_ipproto == IPPROTO_TCP) {
-			if (__predict_false(th == NULL)) {
-				txq->ift_pullups++;
-				if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
-					return (ENOMEM);
-				th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
-			}
-			pi->ipi_tcp_hflags = th->th_flags;
-			pi->ipi_tcp_hlen = th->th_off << 2;
-			pi->ipi_tcp_seq = th->th_seq;
-		}
 		if (IS_TSO4(pi)) {
+			if (pi->ipi_ipproto == IPPROTO_TCP) {
+				if (__predict_false(th == NULL)) {
+					txq->ift_pullups++;
+					if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
+						return (ENOMEM);
+					th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
+				}
+				pi->ipi_tcp_hflags = th->th_flags;
+				pi->ipi_tcp_hlen = th->th_off << 2;
+				pi->ipi_tcp_seq = th->th_seq;
+			}
 			if (__predict_false(ip->ip_p != IPPROTO_TCP))
 				return (ENXIO);
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
@@ -2794,15 +2967,15 @@
 		pi->ipi_ipproto = ip6->ip6_nxt;
 		pi->ipi_flags |= IPI_TX_IPV6;
 
-		if (pi->ipi_ipproto == IPPROTO_TCP) {
-			if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
-				if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
-					return (ENOMEM);
-			}
-			pi->ipi_tcp_hflags = th->th_flags;
-			pi->ipi_tcp_hlen = th->th_off << 2;
-		}
 		if (IS_TSO6(pi)) {
+			if (pi->ipi_ipproto == IPPROTO_TCP) {
+				if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
+					if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
+						return (ENOMEM);
+				}
+				pi->ipi_tcp_hflags = th->th_flags;
+				pi->ipi_tcp_hlen = th->th_off << 2;
+			}
 
 			if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
 				return (ENXIO);
@@ -2911,9 +3084,9 @@
 	ifsd_m = txq->ift_sds.ifsd_m;
 	ntxd = txq->ift_size;
 	pidx = txq->ift_pidx;
-	if (map != NULL) {
+	MPASS(ifsd_m[pidx] == NULL);
+	if (force_busdma || map != NULL) {
 		uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags;
-
 		err = bus_dmamap_load_mbuf_sg(tag, map,
 					      *m0, segs, nsegs, BUS_DMA_NOWAIT);
 		if (err)
@@ -3066,7 +3239,8 @@
 			next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
 			prefetch(&txq->ift_sds.ifsd_flags[next]);
 		}
-	} else if (txq->ift_sds.ifsd_map != NULL)
+	}
+	if (txq->ift_sds.ifsd_map != NULL)
 		map = txq->ift_sds.ifsd_map[pidx];
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
@@ -3079,18 +3253,19 @@
 	m_head = *m_headp;
 
 	pkt_info_zero(&pi);
-	pi.ipi_len = m_head->m_pkthdr.len;
 	pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
-	pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
-	pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
 	pi.ipi_pidx = pidx;
 	pi.ipi_qsidx = txq->ift_id;
+	pi.ipi_len = m_head->m_pkthdr.len;
+	pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
+	pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
 
 	/* deliberate bitwise OR to make one condition */
 	if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
 		if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0))
 			return (err);
 		m_head = *m_headp;
+		pi.ipi_hdr_data = mtod(m_head, caddr_t);
 	}
 
 retry:
@@ -3267,6 +3442,7 @@
 			gen = 0;
 		}
 	}
+	txq_validate(txq);
 	txq->ift_cidx = cidx;
 	txq->ift_gen = gen;
 }
@@ -3316,10 +3492,10 @@
 
 	prefetch(items[(cidx + offset) & (size-1)]);
 	if (remaining > 1) {
-		prefetch(&items[next]);
-		prefetch(items[(cidx + offset + 1) & (size-1)]);
-		prefetch(items[(cidx + offset + 2) & (size-1)]);
-		prefetch(items[(cidx + offset + 3) & (size-1)]);
+		prefetch2(&items[next]);
+		prefetch2(items[(cidx + offset + 1) & (size-1)]);
+		prefetch2(items[(cidx + offset + 2) & (size-1)]);
+		prefetch2(items[(cidx + offset + 3) & (size-1)]);
 	}
 	return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)]));
 }
@@ -3500,7 +3676,7 @@
 #endif
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
-	if ((ifp->if_capenable & IFCAP_NETMAP)) {
+	if (if_getcapenable(ifp) & IFCAP_NETMAP) {
 		if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
 			netmap_tx_irq(ifp, txq->ift_id);
 		IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
@@ -3508,8 +3684,7 @@
 	}
 	if (txq->ift_db_pending)
 		ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE);
-	else
-		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
+	ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 	if (ctx->ifc_flags & IFC_LEGACY)
 		IFDI_INTR_ENABLE(ctx);
 	else {
@@ -3525,6 +3700,7 @@
 	if_ctx_t ctx = rxq->ifr_ctx;
 	bool more;
 	int rc;
+	uint16_t budget;
 
 #ifdef IFLIB_DIAGNOSTICS
 	rxq->ifr_cpu_exec_count[curcpu]++;
@@ -3532,7 +3708,19 @@
 	DBG_COUNTER_INC(task_fn_rxs);
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 		return;
-	if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) {
+	more = true;
+#ifdef DEV_NETMAP
+	if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) {
+		u_int work = 0;
+		if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) {
+			more = false;
+		}
+	}
+#endif
+	budget = ctx->ifc_sysctl_rx_budget;
+	if (budget == 0)
+		budget = 16;	/* XXX */
+	if (more == false || (more = iflib_rxeof(rxq, budget)) == false) {
 		if (ctx->ifc_flags & IFC_LEGACY)
 			IFDI_INTR_ENABLE(ctx);
 		else {
@@ -3547,43 +3735,44 @@
 		GROUPTASK_ENQUEUE(&rxq->ifr_task);
 }
 
+/* CONFIG context only */
 static void
 _task_fn_admin(void *context)
 {
 	if_ctx_t ctx = context;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 	iflib_txq_t txq;
-	int i;
-
-	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) {
-		if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
-			return;
-		}
-	}
+	int i, running;
 
 	CTX_LOCK(ctx);
+	running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING);
+
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		CALLOUT_UNLOCK(txq);
 	}
-	IFDI_UPDATE_ADMIN_STATUS(ctx);
-	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
-		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
-	IFDI_LINK_INTR_ENABLE(ctx);
+	if (running) {
+		for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
+			callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
+			    txq, txq->ift_timer.c_cpu);
+		IFDI_LINK_INTR_ENABLE(ctx);
+	}
 	if (ctx->ifc_flags & IFC_DO_RESET) {
-		ctx->ifc_flags &= ~IFC_DO_RESET;
 		iflib_if_init_locked(ctx);
+		ctx->ifc_flags &= ~IFC_DO_RESET;
 	}
+	IFDI_UPDATE_ADMIN_STATUS(ctx);
 	CTX_UNLOCK(ctx);
 
-	if (LINK_ACTIVE(ctx) == 0)
+	if (LINK_ACTIVE(ctx) == 0 || !running)
 		return;
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 }
 
 
+/* CONFIG context only */
 static void
 _task_fn_iov(void *context)
 {
@@ -3698,21 +3887,20 @@
 	DBG_COUNTER_INC(tx_seen);
 	err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE);
 
+	GROUPTASK_ENQUEUE(&txq->ift_task);
 	if (err) {
-		GROUPTASK_ENQUEUE(&txq->ift_task);
 		/* support forthcoming later */
 #ifdef DRIVER_BACKPRESSURE
 		txq->ift_closed = TRUE;
 #endif
 		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 		m_freem(m);
-	} else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
-		GROUPTASK_ENQUEUE(&txq->ift_task);
 	}
 
 	return (err);
 }
 
+/* CONFIG context only */
 static void
 iflib_if_qflush(if_t ifp)
 {
@@ -3796,29 +3984,12 @@
 		CTX_UNLOCK(ctx);
 		break;
 	case SIOCSIFFLAGS:
-		CTX_LOCK(ctx);
-		if (if_getflags(ifp) & IFF_UP) {
-			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
-				if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
-				    (IFF_PROMISC | IFF_ALLMULTI)) {
-					err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
-				}
-			} else
-				reinit = 1;
-		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
-			iflib_stop(ctx);
-		}
-		ctx->ifc_if_flags = if_getflags(ifp);
-		CTX_UNLOCK(ctx);
+		err = async_if_ioctl(ctx, command, data);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
-			CTX_LOCK(ctx);
-			IFDI_INTR_DISABLE(ctx);
-			IFDI_MULTI_SET(ctx);
-			IFDI_INTR_ENABLE(ctx);
-			CTX_UNLOCK(ctx);
+			err = async_if_ioctl(ctx, command, data);
 		}
 		break;
 	case SIOCSIFMEDIA:
@@ -3912,6 +4083,7 @@
  *
  **********************************************************************/
 
+/* CONFIG context only */
 static void
 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
 {
@@ -3931,6 +4103,7 @@
 	CTX_UNLOCK(ctx);
 }
 
+/* CONFIG context only */
 static void
 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
 {
@@ -3950,6 +4123,7 @@
 	CTX_UNLOCK(ctx);
 }
 
+/* CONFIG context only */
 static void
 iflib_led_func(void *arg, int onoff)
 {
@@ -4094,8 +4268,10 @@
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
 		}
 	}
-
-	if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
+	CTX_LOCK(ctx);
+	err = IFDI_ATTACH_PRE(ctx);
+	CTX_UNLOCK(ctx);
+	if (err) {
 		device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
 		return (err);
 	}
@@ -4123,6 +4299,8 @@
 	/* set unconditionally for !x86 */
 	ctx->ifc_flags |= IFC_DMAR;
 #endif
+	if (force_busdma)
+		ctx->ifc_flags |= IFC_DMAR;
 
 	msix_bar = scctx->isc_msix_bar;
 	main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
@@ -4135,6 +4313,7 @@
 		if (!powerof2(scctx->isc_nrxd[i])) {
 			/* round down instead? */
 			device_printf(dev, "# rx descriptors must be a power of 2\n");
+
 			err = EINVAL;
 			goto fail;
 		}
@@ -4173,7 +4352,7 @@
 
 	GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
 	/* XXX format name */
-	taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin");
+	taskqgroup_attach(qgroup_if_config, &ctx->ifc_admin_task, ctx, -1, "admin");
 	/*
 	** Now setup MSI or MSI/X, should
 	** return us the number of supported
@@ -4232,7 +4411,10 @@
 		}
 	}
 	ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
-	if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
+	CTX_LOCK(ctx);
+	err = IFDI_ATTACH_POST(ctx);
+	CTX_UNLOCK(ctx);
+	if (err) {
 		device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
 		goto fail_detach;
 	}
@@ -4244,6 +4426,7 @@
 
 	if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 	iflib_add_device_sysctl_post(ctx);
+	iflib_ctx_insert(ctx);
 	ctx->ifc_flags |= IFC_INIT_DONE;
 	return (0);
 fail_detach:
@@ -4254,7 +4437,9 @@
 fail_queues:
 	/* XXX free queues */
 fail:
+	CTX_LOCK(ctx);
 	IFDI_DETACH(ctx);
+	CTX_UNLOCK(ctx);
 	return (err);
 }
 
@@ -4302,12 +4487,10 @@
 
 	iflib_netmap_detach(ifp);
 	ether_ifdetach(ifp);
-	/* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
-	CTX_LOCK_DESTROY(ctx);
 	if (ctx->ifc_led_dev != NULL)
 		led_destroy(ctx->ifc_led_dev);
 	/* XXX drain any dependent tasks */
-	tqg = qgroup_if_io_tqg;
+	tqg = qgroup_if_io;
 	for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		callout_drain(&txq->ift_timer);
 		if (txq->ift_task.gt_uniq != NULL)
@@ -4321,13 +4504,16 @@
 			free(fl->ifl_rx_bitmap, M_IFLIB);
 			
 	}
-	tqg = qgroup_if_config_tqg;
+	tqg = qgroup_if_config;
 	if (ctx->ifc_admin_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_admin_task);
 	if (ctx->ifc_vflr_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
 
+	CTX_LOCK(ctx);
 	IFDI_DETACH(ctx);
+	CTX_UNLOCK(ctx);
+	CTX_LOCK_DESTROY(ctx);
 	device_set_softc(ctx->ifc_dev, NULL);
 	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
 		pci_release_msi(dev);
@@ -4348,6 +4534,7 @@
 	iflib_rx_structures_free(ctx);
 	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
 		free(ctx->ifc_softc, M_IFLIB);
+	iflib_ctx_remove(ctx);
 	free(ctx, M_IFLIB);
 	return (0);
 }
@@ -4443,13 +4630,14 @@
  *
  **********************************************************************/
 
-/*
- * - Start a fast taskqueue thread for each core
- * - Start a taskqueue for control operations
- */
 static int
 iflib_module_init(void)
 {
+
+	iflib_timer_int = hz / 2;
+	TUNABLE_INT_FETCH("net.iflib.timer_int", &iflib_timer_int);
+	LIST_INIT(&ctx_list);
+	mtx_init(&ctx_list_lock, "ctx list", NULL, MTX_DEF);
 	return (0);
 }
 
@@ -4893,25 +5081,124 @@
 	return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
 }
 
+#ifdef SMP
 static int
-find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid)
+find_nth(if_ctx_t ctx, int qid)
 {
+	cpuset_t cpus;
 	int i, cpuid, eqid, count;
 
-	CPU_COPY(&ctx->ifc_cpus, cpus);
+	CPU_COPY(&ctx->ifc_cpus, &cpus);
 	count = CPU_COUNT(&ctx->ifc_cpus);
 	eqid = qid % count;
 	/* clear up to the qid'th bit */
 	for (i = 0; i < eqid; i++) {
-		cpuid = CPU_FFS(cpus);
+		cpuid = CPU_FFS(&cpus);
 		MPASS(cpuid != 0);
-		CPU_CLR(cpuid-1, cpus);
+		CPU_CLR(cpuid-1, &cpus);
 	}
-	cpuid = CPU_FFS(cpus);
+	cpuid = CPU_FFS(&cpus);
 	MPASS(cpuid != 0);
 	return (cpuid-1);
 }
 
+static int
+find_child_with_core(int cpu, struct cpu_group *grp)
+{
+	int i;
+
+	if (grp->cg_children == 0)
+		return -1;
+
+	MPASS(grp->cg_child);
+	for (i = 0; i < grp->cg_children; i++) {
+		if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
+			return i;
+	}
+
+	return -1;
+}
+
+/*
+ * Find the nth thread on the specified core
+ */
+static int
+find_thread(int cpu, int thread_num)
+{
+	struct cpu_group *grp;
+	int i;
+	cpuset_t cs;
+
+	grp = smp_topo();
+	if (grp == NULL)
+		return cpu;
+	i = 0;
+	while ((i = find_child_with_core(cpu, grp)) != -1) {
+		/* If the child only has one cpu, don't descend */
+		if (grp->cg_child[i].cg_count <= 1)
+			break;
+		grp = &grp->cg_child[i];
+	}
+
+	/* If they don't share at least an L2 cache, use the same CPU */
+	if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
+		return cpu;
+
+	/* Now pick one */
+	CPU_COPY(&grp->cg_mask, &cs);
+	for (i = thread_num % grp->cg_count; i > 0; i--) {
+		MPASS(CPU_FFS(&cs));
+		CPU_CLR(CPU_FFS(&cs) - 1, &cs);
+	}
+	MPASS(CPU_FFS(&cs));
+	return CPU_FFS(&cs) - 1;
+}
+
+static int
+get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid)
+{
+	switch (type) {
+	case IFLIB_INTR_TX:
+		/* TX queues get threads on the same core as the corresponding RX queue */
+		/* XXX handle multiple RX threads per core and more than two threads per core */
+		return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
+	case IFLIB_INTR_RX:
+	case IFLIB_INTR_RXTX:
+		/* RX queues get the first thread on their core */
+		return qid / CPU_COUNT(&ctx->ifc_cpus);
+	default:
+		return -1;
+	}
+}
+#else
+#define get_thread_num(ctx, type, qid)	0
+#define find_thread(cpuid, tid)		0
+#define find_nth(ctx, gid)		0
+#endif
+
+/* Just to avoid copy/paste */
+static inline int
+iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid,
+    struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name)
+{
+	int cpuid;
+	int err, tid;
+
+	cpuid = find_nth(ctx, qid);
+	tid = get_thread_num(ctx, type, qid);
+	MPASS(tid >= 0);
+	cpuid = find_thread(cpuid, tid);
+	err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name);
+	if (err) {
+		device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err);
+		return (err);
+	}
+	if (cpuid > ctx->ifc_cpuid_highest)
+		ctx->ifc_cpuid_highest = cpuid;
+	MPASS(gtask->gt_taskqueue != NULL);
+	return 0;
+}
+
 int
 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 						iflib_intr_type_t type, driver_filter_t *filter,
@@ -4920,9 +5207,8 @@
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
 	iflib_filter_info_t info;
-	cpuset_t cpus;
 	gtask_fn_t *fn;
-	int tqrid, err, cpuid;
+	int tqrid, err;
 	driver_filter_t *intr_fast;
 	void *q;
 
@@ -4935,7 +5221,7 @@
 		q = &ctx->ifc_txqs[qid];
 		info = &ctx->ifc_txqs[qid].ift_filter_info;
 		gtask = &ctx->ifc_txqs[qid].ift_task;
-		tqg = qgroup_if_io_tqg;
+		tqg = qgroup_if_io;
 		fn = _task_fn_tx;
 		intr_fast = iflib_fast_intr;
 		GROUPTASK_INIT(gtask, 0, fn, q);
@@ -4944,16 +5230,16 @@
 		q = &ctx->ifc_rxqs[qid];
 		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
-		tqg = qgroup_if_io_tqg;
+		tqg = qgroup_if_io;
 		fn = _task_fn_rx;
-		intr_fast = iflib_fast_intr;
+		intr_fast = iflib_fast_intr_rx;
 		GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_RXTX:
 		q = &ctx->ifc_rxqs[qid];
 		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
-		tqg = qgroup_if_io_tqg;
+		tqg = qgroup_if_io;
 		fn = _task_fn_rx;
 		intr_fast = iflib_fast_intr_rxtx;
 		GROUPTASK_INIT(gtask, 0, fn, q);
@@ -4963,7 +5249,7 @@
 		tqrid = -1;
 		info = &ctx->ifc_filter_info;
 		gtask = &ctx->ifc_admin_task;
-		tqg = qgroup_if_config_tqg;
+		tqg = qgroup_if_config;
 		fn = _task_fn_admin;
 		intr_fast = iflib_fast_intr_ctx;
 		break;
@@ -4985,8 +5271,9 @@
 		return (0);
 
 	if (tqrid != -1) {
-		cpuid = find_nth(ctx, &cpus, qid);
-		taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name);
+		err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name);
+		if (err)
+			return (err);
 	} else {
 		taskqgroup_attach(tqg, gtask, q, tqrid, name);
 	}
@@ -5001,24 +5288,25 @@
 	struct taskqgroup *tqg;
 	gtask_fn_t *fn;
 	void *q;
+	int err;
 
 	switch (type) {
 	case IFLIB_INTR_TX:
 		q = &ctx->ifc_txqs[qid];
 		gtask = &ctx->ifc_txqs[qid].ift_task;
-		tqg = qgroup_if_io_tqg;
+		tqg = qgroup_if_io;
 		fn = _task_fn_tx;
 		break;
 	case IFLIB_INTR_RX:
 		q = &ctx->ifc_rxqs[qid];
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
-		tqg = qgroup_if_io_tqg;
+		tqg = qgroup_if_io;
 		fn = _task_fn_rx;
 		break;
 	case IFLIB_INTR_IOV:
 		q = ctx;
 		gtask = &ctx->ifc_vflr_task;
-		tqg = qgroup_if_config_tqg;
+		tqg = qgroup_if_config;
 		rid = -1;
 		fn = _task_fn_iov;
 		break;
@@ -5026,7 +5314,14 @@
 		panic("unknown net intr type");
 	}
 	GROUPTASK_INIT(gtask, 0, fn, q);
-	taskqgroup_attach(tqg, gtask, q, rid, name);
+	if (rid != -1) {
+		err = iflib_irq_set_affinity(ctx, rid, type, qid, gtask, tqg, q, name);
+		if (err)
+			taskqgroup_attach(tqg, gtask, q, rid, name);
+	}
+	else {
+		taskqgroup_attach(tqg, gtask, q, rid, name);
+	}
 }
 
 void
@@ -5056,7 +5351,7 @@
 	q = &ctx->ifc_rxqs[0];
 	info = &rxq[0].ifr_filter_info;
 	gtask = &rxq[0].ifr_task;
-	tqg = qgroup_if_io_tqg;
+	tqg = qgroup_if_io;
 	tqrid = irq->ii_rid = *rid;
 	fn = _task_fn_rx;
 
@@ -5073,7 +5368,7 @@
 	taskqgroup_attach(tqg, gtask, q, tqrid, name);
 
 	GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
-	taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx");
+	taskqgroup_attach(qgroup_if_io, &txq->ift_task, txq, tqrid, "tx");
 	return (0);
 }
 
@@ -5106,12 +5401,28 @@
 	struct grouptask *gtask;
 
 	gtask = &ctx->ifc_admin_task;
-	MPASS(gtask->gt_taskqueue != NULL);
+	MPASS(gtask != NULL && gtask->gt_taskqueue != NULL);
 #endif
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
 }
 
+/* CONFIG context only */
+static void
+iflib_handle_reset(if_ctx_t ctx, void *arg)
+{
+	CTX_LOCK(ctx);
+	ctx->ifc_flags |= IFC_DO_RESET;
+	iflib_admin_intr_deferred(ctx);
+	CTX_UNLOCK(ctx);
+}
+
+static void
+iflib_admin_reset_deferred(if_ctx_t ctx)
+{
+	iflib_config_async_gtask_dispatch(ctx, iflib_handle_reset, "reset handler", NULL);
+}
+
 void
 iflib_iov_intr_deferred(if_ctx_t ctx)
 {
@@ -5123,7 +5434,7 @@
 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
 {
 
-	taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
+	taskqgroup_attach_cpu(qgroup_if_io, gt, uniq, cpu, -1, name);
 }
 
 void
@@ -5132,14 +5443,104 @@
 {
 
 	GROUPTASK_INIT(gtask, 0, fn, ctx);
-	taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name);
+	taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name);
 }
 
+static void
+iflib_multi_set(if_ctx_t ctx, void *arg)
+{
+	CTX_LOCK(ctx);
+	IFDI_INTR_DISABLE(ctx);
+	IFDI_MULTI_SET(ctx);
+	IFDI_INTR_ENABLE(ctx);
+	CTX_UNLOCK(ctx);
+}
+
+static void
+iflib_flags_set(if_ctx_t ctx, void *arg)
+{
+	int reinit, err;
+	if_t ifp = ctx->ifc_ifp;
+
+	err = reinit = 0;
+	CTX_LOCK(ctx);
+	if (if_getflags(ifp) & IFF_UP) {
+		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+			if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
+			    (IFF_PROMISC | IFF_ALLMULTI)) {
+				err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
+			}
+		} else
+			reinit = 1;
+	} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+		iflib_stop(ctx);
+	}
+	ctx->ifc_if_flags = if_getflags(ifp);
+	if (reinit)
+		iflib_if_init_locked(ctx);
+	CTX_UNLOCK(ctx);
+	if (err)
+		log(LOG_WARNING, "IFDI_PROMISC_SET returned %d\n", err);
+}
+
+static void
+async_gtask(void *ctx)
+{
+	struct async_task_arg *at_arg = ctx;
+	if_ctx_t if_ctx = at_arg->ata_ctx;
+	void *arg = at_arg->ata_arg;
+
+	at_arg->ata_fn(if_ctx, arg);
+	taskqgroup_detach(qgroup_if_config, at_arg->ata_gtask);
+	free(at_arg->ata_gtask, M_IFLIB);
+}
+
+static int
+iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg)
+{
+	struct grouptask *gtask;
+	struct async_task_arg *at_arg;
+
+	if ((gtask = malloc(sizeof(struct grouptask) + sizeof(struct async_task_arg), M_IFLIB, M_NOWAIT|M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	at_arg = (struct async_task_arg *)(gtask + 1);
+	at_arg->ata_fn = fn;
+	at_arg->ata_ctx = ctx;
+	at_arg->ata_arg = arg;
+	at_arg->ata_gtask = gtask;
+
+	GROUPTASK_INIT(gtask, 0, async_gtask, at_arg);
+	taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name);
+	GROUPTASK_ENQUEUE(gtask);
+	return (0);
+}
+
+static int
+async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
+{
+	int rc;
+
+	switch (command) {
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		rc = iflib_config_async_gtask_dispatch(ctx, iflib_multi_set, "async_if_multi", NULL);
+		break;
+	case SIOCSIFFLAGS:
+		rc = iflib_config_async_gtask_dispatch(ctx, iflib_flags_set, "async_if_flags", NULL);
+		break;
+	default:
+		panic("unknown command %lx", command);
+	}
+	return (rc);
+}
+
+
 void
 iflib_config_gtask_deinit(struct grouptask *gtask)
 {
 
-	taskqgroup_detach(qgroup_if_config_tqg, gtask);	
+	taskqgroup_detach(qgroup_if_config, gtask);
 }
 
 void
@@ -5206,11 +5607,11 @@
 	    info, 0, iflib_sysctl_int_delay, "I", description);
 }
 
-struct mtx *
+struct sx *
 iflib_ctx_lock_get(if_ctx_t ctx)
 {
 
-	return (&ctx->ifc_mtx);
+	return (&ctx->ifc_sx);
 }
 
 static int
@@ -5330,13 +5731,22 @@
 		rx_queues = min(rx_queues, tx_queues);
 	}
 
-	device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues);
+	device_printf(dev, "trying %d rx queues %d tx queues \n", rx_queues, tx_queues);
 
-	vectors = rx_queues + admincnt;
+	vectors = tx_queues + rx_queues + admincnt;
 	if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
 		device_printf(dev,
 					  "Using MSIX interrupts with %d vectors\n", vectors);
 		scctx->isc_vectors = vectors;
+
+		if (vectors < tx_queues + rx_queues + admincnt) {
+			vectors -= admincnt;
+			if (vectors % 2 != 0)
+				vectors -= 1;
+			if (rx_queues > vectors / 2)
+				rx_queues = vectors / 2;
+			tx_queues = vectors - rx_queues;
+		}
 		scctx->isc_nrxqsets = rx_queues;
 		scctx->isc_ntxqsets = tx_queues;
 		scctx->isc_intr = IFLIB_INTR_MSIX;
@@ -5471,9 +5881,12 @@
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
                        "permit #txq != #rxq");
-       SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
+	SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
                       CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
                       "disable MSIX (default 0)");
+	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
+		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
+                       "set the rx budget");
 
 	/* XXX change for per-queue sizes */
 	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
@@ -5484,6 +5897,10 @@
 		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
                        mp_ndesc_handler, "A",
                        "list of # of rx descriptors to use, 0 = use default #");
+
+       SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "watchdog_events",
+                      CTLFLAG_RD, &ctx->ifc_watchdog_events, 0,
+                      "Watchdog events seen since load");
 }
 
 static void
Index: sys/net/mp_ring.c
===================================================================
--- sys/net/mp_ring.c
+++ sys/net/mp_ring.c
@@ -226,11 +226,15 @@
 		if (cidx != pidx && pending < 64 && total < budget)
 			continue;
 		critical_enter();
-		do {
+		os.state = ns.state = r->state;
+		ns.cidx = cidx;
+		ns.flags = state_to_flags(ns, total >= budget);
+		while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0) {
+			cpu_spinwait();
 			os.state = ns.state = r->state;
 			ns.cidx = cidx;
 			ns.flags = state_to_flags(ns, total >= budget);
-		} while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0);
+		}
 		critical_exit();
 
 		if (ns.flags == ABDICATED)
@@ -454,18 +458,12 @@
 	do {
 		os.state = ns.state = r->state;
 		ns.pidx_tail = pidx_stop;
-		ns.flags = BUSY;
+		if (os.flags == IDLE)
+			ns.flags = ABDICATED;
 	} while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0);
 	critical_exit();
 	counter_u64_add(r->enqueues, n);
 
-	/*
-	 * Turn into a consumer if some other thread isn't active as a consumer
-	 * already.
-	 */
-	if (os.flags != BUSY)
-		drain_ring_lockless(r, ns, os.flags, budget);
-
 	return (0);
 }
 #endif
@@ -476,7 +474,9 @@
 	union ring_state os, ns;
 
 	os.state = r->state;
-	if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0)
+	if ((os.flags != STALLED && os.flags != ABDICATED) ||	// Only continue in STALLED and ABDICATED
+	    os.pidx_head != os.pidx_tail ||			// Require work to be available
+	    (os.flags != ABDICATED && r->can_drain(r) == 0))	// Can either drain, or everyone left
 		return;
 
 	MPASS(os.cidx != os.pidx_tail);	/* implied by STALLED */
Index: sys/sys/gtaskqueue.h
===================================================================
--- sys/sys/gtaskqueue.h
+++ sys/sys/gtaskqueue.h
@@ -58,7 +58,9 @@
 void	taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
 struct taskqgroup *taskqgroup_create(char *name);
 void	taskqgroup_destroy(struct taskqgroup *qgroup);
-int	taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
+int	taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri);
+int	taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri);
+void taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*));
 
 #define TASK_ENQUEUED			0x1
 #define TASK_SKIP_WAKEUP		0x2
@@ -80,27 +82,40 @@
 #define TASKQGROUP_DECLARE(name)			\
 extern struct taskqgroup *qgroup_##name
 
-#define TASKQGROUP_DEFINE(name, cnt, stride)				\
+
+#define TASKQGROUP_DEFINE(name, cnt, stride, intr, pri)			\
 									\
 struct taskqgroup *qgroup_##name;					\
 									\
 static void								\
-taskqgroup_define_##name(void *arg)					\
+taskqgroup_adjust_##name(void *arg)					\
 {									\
-	qgroup_##name = taskqgroup_create(#name);			\
+	int max = (intr) ? 1 : (cnt);					\
+	if (arg != NULL) {						\
+		uintptr_t maxcpu = (uintptr_t) arg;				\
+		max = maxcpu;						\
+	}								\
+									\
+	taskqgroup_adjust_once(qgroup_##name, max, (stride), (intr), (pri)); \
 }									\
 									\
-SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST,		\
-	taskqgroup_define_##name, NULL);				\
+SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,		\
+	taskqgroup_adjust_##name, NULL);				\
 									\
 static void								\
-taskqgroup_adjust_##name(void *arg)					\
+taskqgroup_define_##name(void *arg)					\
 {									\
-	taskqgroup_adjust(qgroup_##name, (cnt), (stride));		\
+	qgroup_##name = taskqgroup_create(#name);			\
+	taskqgroup_set_adjust(qgroup_##name, taskqgroup_adjust_##name); \
 }									\
-									\
-SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,		\
-	taskqgroup_adjust_##name, NULL)
+SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST,		\
+	taskqgroup_define_##name, NULL)
+
+
+
+
+
+
 
 TASKQGROUP_DECLARE(net);
 TASKQGROUP_DECLARE(softirq);