Index: sys/dev/bnxt/if_bnxt.c =================================================================== --- sys/dev/bnxt/if_bnxt.c +++ sys/dev/bnxt/if_bnxt.c @@ -1640,7 +1640,8 @@ } for (i=0; iscctx->isc_ntxqsets; i++) - iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, NULL, i, + /* TODO: Benchmark and see if tying to the RX irqs helps */ + iflib_softirq_alloc_generic(ctx, -1, IFLIB_INTR_TX, NULL, i, "tx_cp"); return rc; Index: sys/dev/e1000/e1000_80003es2lan.c =================================================================== --- sys/dev/e1000/e1000_80003es2lan.c +++ sys/dev/e1000/e1000_80003es2lan.c @@ -59,7 +59,6 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw); -static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex); static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw); static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw); @@ -68,7 +67,6 @@ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw); -static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw); static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw); @@ -299,7 +297,7 @@ DEBUGFUNC("e1000_acquire_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; - return e1000_acquire_swfw_sync_80003es2lan(hw, mask); + return e1000_acquire_swfw_sync(hw, mask); } /** @@ -315,7 +313,7 @@ DEBUGFUNC("e1000_release_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; - e1000_release_swfw_sync_80003es2lan(hw, mask); + e1000_release_swfw_sync(hw, mask); } /** @@ -333,7 +331,7 @@ mask = E1000_SWFW_CSR_SM; - return e1000_acquire_swfw_sync_80003es2lan(hw, mask); + return e1000_acquire_swfw_sync(hw, mask); } /** @@ -350,7 +348,7 @@ mask = E1000_SWFW_CSR_SM; - e1000_release_swfw_sync_80003es2lan(hw, mask); + e1000_release_swfw_sync(hw, mask); } /** @@ -365,14 +363,14 @@ DEBUGFUNC("e1000_acquire_nvm_80003es2lan"); - ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); if (ret_val) return ret_val; ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) - e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); return ret_val; } @@ -388,78 +386,7 @@ DEBUGFUNC("e1000_release_nvm_80003es2lan"); e1000_release_nvm_generic(hw); - e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 i = 0; - s32 timeout = 50; - - DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_generic(hw)) - return -E1000_ERR_SWFW_SYNC; - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - return -E1000_ERR_SWFW_SYNC; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_80003es2lan"); - - while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); } /** Index: sys/dev/e1000/e1000_82571.c =================================================================== --- sys/dev/e1000/e1000_82571.c +++ sys/dev/e1000/e1000_82571.c @@ -70,11 +70,8 @@ static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw); static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data); static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw); -static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw); static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw); static s32 e1000_get_phy_id_82571(struct e1000_hw *hw); -static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw); -static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw); static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw); static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw); static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, @@ -125,8 +122,8 @@ phy->ops.get_cable_length = e1000_get_cable_length_igp_2; phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.write_reg = e1000_write_phy_reg_igp; - phy->ops.acquire = e1000_get_hw_semaphore_82571; - phy->ops.release = e1000_put_hw_semaphore_82571; + phy->ops.acquire = e1000_get_hw_semaphore; + phy->ops.release = e1000_put_hw_semaphore; break; case e1000_82573: phy->type = e1000_phy_m88; @@ -138,12 +135,11 @@ phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.read_reg = e1000_read_phy_reg_m88; phy->ops.write_reg = e1000_write_phy_reg_m88; - phy->ops.acquire = e1000_get_hw_semaphore_82571; - phy->ops.release = e1000_put_hw_semaphore_82571; + phy->ops.acquire = e1000_get_hw_semaphore; + phy->ops.release = e1000_put_hw_semaphore; break; case e1000_82574: case e1000_82583: - E1000_MUTEX_INIT(&hw->dev_spec._82571.swflag_mutex); phy->type = e1000_phy_bm; phy->ops.get_cfg_done = e1000_get_cfg_done_generic; @@ -506,99 +502,21 @@ } /** - * e1000_get_hw_semaphore_82571 - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw) -{ - u32 swsm; - s32 sw_timeout = hw->nvm.word_size + 1; - s32 fw_timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_82571"); - - /* If we have timedout 3 times on trying to acquire - * the inter-port SMBI semaphore, there is old code - * operating on the other port, and it is not - * releasing SMBI. Modify the number of times that - * we try for the semaphore to interwork with this - * older code. - */ - if (hw->dev_spec._82571.smb_counter > 2) - sw_timeout = 1; - - /* Get the SW semaphore */ - while (i < sw_timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == sw_timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - hw->dev_spec._82571.smb_counter++; - } - /* Get the FW semaphore. */ - for (i = 0; i < fw_timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == fw_timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_82571(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_put_hw_semaphore_82571 - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - **/ -static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw) -{ - u32 swsm; - - DEBUGFUNC("e1000_put_hw_semaphore_generic"); - - swsm = E1000_READ_REG(hw, E1000_SWSM); - - swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); - - E1000_WRITE_REG(hw, E1000_SWSM, swsm); -} - -/** - * e1000_get_hw_semaphore_82573 - Acquire hardware semaphore + * e1000_get_hw_semaphore_82574 - Acquire hardware semaphore * @hw: pointer to the HW structure * * Acquire the HW semaphore during reset. * **/ -static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw) +static s32 +e1000_get_hw_semaphore_82574(struct e1000_hw *hw) { u32 extcnf_ctrl; s32 i = 0; - + /* XXX assert that mutex is held */ DEBUGFUNC("e1000_get_hw_semaphore_82573"); + ASSERT_CTX_LOCK_HELD(hw); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); do { extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; @@ -614,7 +532,7 @@ if (i == MDIO_OWNERSHIP_TIMEOUT) { /* Release semaphores */ - e1000_put_hw_semaphore_82573(hw); + e1000_put_hw_semaphore_82574(hw); DEBUGOUT("Driver can't access the PHY\n"); return -E1000_ERR_PHY; } @@ -623,17 +541,18 @@ } /** - * e1000_put_hw_semaphore_82573 - Release hardware semaphore + * e1000_put_hw_semaphore_82574 - Release hardware semaphore * @hw: pointer to the HW structure * * Release hardware semaphore used during reset. * **/ -static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw) +static void +e1000_put_hw_semaphore_82574(struct e1000_hw *hw) { u32 extcnf_ctrl; - DEBUGFUNC("e1000_put_hw_semaphore_82573"); + DEBUGFUNC("e1000_put_hw_semaphore_82574"); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; @@ -641,41 +560,6 @@ } /** - * e1000_get_hw_semaphore_82574 - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM. - * - **/ -static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw) -{ - s32 ret_val; - - DEBUGFUNC("e1000_get_hw_semaphore_82574"); - - E1000_MUTEX_LOCK(&hw->dev_spec._82571.swflag_mutex); - ret_val = e1000_get_hw_semaphore_82573(hw); - if (ret_val) - E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex); - return ret_val; -} - -/** - * e1000_put_hw_semaphore_82574 - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - * - **/ -static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_put_hw_semaphore_82574"); - - e1000_put_hw_semaphore_82573(hw); - E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex); -} - -/** * e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable @@ -746,7 +630,7 @@ DEBUGFUNC("e1000_acquire_nvm_82571"); - ret_val = e1000_get_hw_semaphore_82571(hw); + ret_val = e1000_get_hw_semaphore(hw); if (ret_val) return ret_val; @@ -759,7 +643,7 @@ } if (ret_val) - e1000_put_hw_semaphore_82571(hw); + e1000_put_hw_semaphore(hw); return ret_val; } @@ -775,7 +659,7 @@ DEBUGFUNC("e1000_release_nvm_82571"); e1000_release_nvm_generic(hw); - e1000_put_hw_semaphore_82571(hw); + e1000_put_hw_semaphore(hw); } /** @@ -1092,8 +976,6 @@ */ switch (hw->mac.type) { case e1000_82573: - ret_val = e1000_get_hw_semaphore_82573(hw); - break; case e1000_82574: case e1000_82583: ret_val = e1000_get_hw_semaphore_82574(hw); @@ -1110,10 +992,6 @@ /* Must release MDIO ownership and mutex after MAC reset. */ switch (hw->mac.type) { case e1000_82573: - /* Release mutex only if the hw semaphore is acquired */ - if (!ret_val) - e1000_put_hw_semaphore_82573(hw); - break; case e1000_82574: case e1000_82583: /* Release mutex only if the hw semaphore is acquired */ @@ -1121,6 +999,7 @@ e1000_put_hw_semaphore_82574(hw); break; default: + panic("unknown mac type %x\n", hw->mac.type); break; } Index: sys/dev/e1000/e1000_82575.c =================================================================== --- sys/dev/e1000/e1000_82575.c +++ sys/dev/e1000/e1000_82575.c @@ -79,11 +79,9 @@ static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw); -static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_get_phy_id_82575(struct e1000_hw *hw); -static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static bool e1000_sgmii_active_82575(struct e1000_hw *hw); static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); @@ -511,12 +509,8 @@ /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_82575; /* acquire SW_FW sync */ - mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575; - mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575; - if (mac->type >= e1000_i210) { - mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210; - mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210; - } + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync; + mac->ops.release_swfw_sync = e1000_release_swfw_sync; /* set lan id for port to determine which phy lock to use */ hw->mac.ops.set_lan_id(hw); @@ -988,7 +982,7 @@ DEBUGFUNC("e1000_acquire_nvm_82575"); - ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); if (ret_val) goto out; @@ -1019,7 +1013,7 @@ ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) - e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); out: return ret_val; @@ -1038,83 +1032,7 @@ e1000_release_nvm_generic(hw); - e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 ret_val = E1000_SUCCESS; - s32 i = 0, timeout = 200; - - DEBUGFUNC("e1000_acquire_swfw_sync_82575"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_generic(hw)) { - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - -out: - return ret_val; -} - -/** - * e1000_release_swfw_sync_82575 - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_82575"); - - while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); } /** Index: sys/dev/e1000/e1000_hw.h =================================================================== --- sys/dev/e1000/e1000_hw.h +++ sys/dev/e1000/e1000_hw.h @@ -934,7 +934,6 @@ struct e1000_dev_spec_82571 { bool laa_is_present; u32 smb_counter; - E1000_MUTEX swflag_mutex; }; struct e1000_dev_spec_80003es2lan { @@ -958,8 +957,6 @@ struct e1000_dev_spec_ich8lan { bool kmrn_lock_loss_workaround_enabled; struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS]; - E1000_MUTEX nvm_mutex; - E1000_MUTEX swflag_mutex; bool nvm_k1_enabled; bool disable_k1_off; bool eee_disable; Index: sys/dev/e1000/e1000_i210.h =================================================================== --- sys/dev/e1000/e1000_i210.h +++ sys/dev/e1000/e1000_i210.h @@ -43,8 +43,6 @@ u16 words, u16 *data); s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); -s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); -void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data); s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, Index: sys/dev/e1000/e1000_i210.c =================================================================== --- sys/dev/e1000/e1000_i210.c +++ sys/dev/e1000/e1000_i210.c @@ -37,7 +37,6 @@ static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw); static void e1000_release_nvm_i210(struct e1000_hw *hw); -static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw); static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw); @@ -58,7 +57,7 @@ DEBUGFUNC("e1000_acquire_nvm_i210"); - ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); return ret_val; } @@ -74,152 +73,7 @@ { DEBUGFUNC("e1000_release_nvm_i210"); - e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 ret_val = E1000_SUCCESS; - s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ - - DEBUGFUNC("e1000_acquire_swfw_sync_i210"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_i210(hw)) { - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - -out: - return ret_val; -} - -/** - * e1000_release_swfw_sync_i210 - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_i210"); - - while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); -} - -/** - * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw) -{ - u32 swsm; - s32 timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_i210"); - - /* Get the SW semaphore */ - while (i < timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == timeout) { - /* In rare circumstances, the SW semaphore may already be held - * unintentionally. Clear the semaphore once before giving up. - */ - if (hw->dev_spec._82575.clear_semaphore_once) { - hw->dev_spec._82575.clear_semaphore_once = FALSE; - e1000_put_hw_semaphore_generic(hw); - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - } - } - - /* If we do not have the semaphore here, we have to give up. */ - if (i == timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - return -E1000_ERR_NVM; - } - } - - /* Get the FW semaphore. */ - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_generic(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); } /** Index: sys/dev/e1000/e1000_ich8lan.c =================================================================== --- sys/dev/e1000/e1000_ich8lan.c +++ sys/dev/e1000/e1000_ich8lan.c @@ -694,9 +694,6 @@ dev_spec->shadow_ram[i].value = 0xFFFF; } - E1000_MUTEX_INIT(&dev_spec->nvm_mutex); - E1000_MUTEX_INIT(&dev_spec->swflag_mutex); - /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_ich8lan; nvm->ops.release = e1000_release_nvm_ich8lan; @@ -1847,7 +1844,7 @@ { DEBUGFUNC("e1000_acquire_nvm_ich8lan"); - E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.nvm_mutex); + ASSERT_CTX_LOCK_HELD(hw); return E1000_SUCCESS; } @@ -1862,9 +1859,7 @@ { DEBUGFUNC("e1000_release_nvm_ich8lan"); - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.nvm_mutex); - - return; + ASSERT_CTX_LOCK_HELD(hw); } /** @@ -1881,7 +1876,7 @@ DEBUGFUNC("e1000_acquire_swflag_ich8lan"); - E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.swflag_mutex); + ASSERT_CTX_LOCK_HELD(hw); while (timeout) { extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); @@ -1922,9 +1917,6 @@ } out: - if (ret_val) - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); - return ret_val; } @@ -1949,10 +1941,6 @@ } else { DEBUGOUT("Semaphore unexpectedly released by sw/fw/hw\n"); } - - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); - - return; } /** @@ -5022,8 +5010,6 @@ E1000_WRITE_REG(hw, E1000_FEXTNVM3, reg); } - if (!ret_val) - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); if (ctrl & E1000_CTRL_PHY_RST) { ret_val = hw->phy.ops.get_cfg_done(hw); Index: sys/dev/e1000/e1000_mac.h =================================================================== --- sys/dev/e1000/e1000_mac.h +++ sys/dev/e1000/e1000_mac.h @@ -60,7 +60,6 @@ s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); void e1000_set_lan_id_single_port(struct e1000_hw *hw); void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw); -s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex); s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, @@ -85,11 +84,15 @@ void e1000_clear_vfta_generic(struct e1000_hw *hw); void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw); -void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); void e1000_reset_adaptive_generic(struct e1000_hw *hw); void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); void e1000_update_adaptive_generic(struct e1000_hw *hw); void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); +s32 e1000_get_hw_semaphore(struct e1000_hw *hw); +void e1000_put_hw_semaphore(struct e1000_hw *hw); +s32 e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask); +void e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask); + #endif Index: sys/dev/e1000/e1000_mac.c =================================================================== --- sys/dev/e1000/e1000_mac.c +++ sys/dev/e1000/e1000_mac.c @@ -1707,76 +1707,6 @@ } /** - * e1000_get_hw_semaphore_generic - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) -{ - u32 swsm; - s32 timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_generic"); - - /* Get the SW semaphore */ - while (i < timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - return -E1000_ERR_NVM; - } - - /* Get the FW semaphore. */ - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_generic(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_put_hw_semaphore_generic - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - **/ -void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) -{ - u32 swsm; - - DEBUGFUNC("e1000_put_hw_semaphore_generic"); - - swsm = E1000_READ_REG(hw, E1000_SWSM); - - swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); - - E1000_WRITE_REG(hw, E1000_SWSM, swsm); -} - -/** * e1000_get_auto_rd_done_generic - Check for auto read completion * @hw: pointer to the HW structure * @@ -2251,3 +2181,186 @@ return E1000_SUCCESS; } + +/** + * e1000_get_hw_semaphore - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +s32 e1000_get_hw_semaphore(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore"); +#ifdef notyet + /* _82571 */ + /* If we have timedout 3 times on trying to acquire + * the inter-port SMBI semaphore, there is old code + * operating on the other port, and it is not + * releasing SMBI. Modify the number of times that + * we try for the semaphore to interwork with this + * older code. + */ + if (hw->dev_spec._82571.smb_counter > 2) + sw_timeout = 1; + +#endif + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { +#ifdef notyet + /* + * XXX This sounds more like a driver bug whereby we either + * recursed accidentally or missed clearing it previously + */ + /* In rare circumstances, the SW semaphore may already be held + * unintentionally. Clear the semaphore once before giving up. + */ + if (hw->dev_spec._82575.clear_semaphore_once) { + hw->dev_spec._82575.clear_semaphore_once = FALSE; + e1000_put_hw_semaphore_generic(hw); + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + } + } +#endif + + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_put_hw_semaphore - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + **/ +void e1000_put_hw_semaphore(struct e1000_hw *hw) +{ + u32 swsm; + + DEBUGFUNC("e1000_put_hw_semaphore"); + + swsm = E1000_READ_REG(hw, E1000_SWSM); + + swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); + + E1000_WRITE_REG(hw, E1000_SWSM, swsm); +} + + +/** + * e1000_acquire_swfw_sync - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +s32 +e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; + + DEBUGFUNC("e1000_acquire_swfw_sync"); + ASSERT_NO_LOCKS(); + while (i < timeout) { + if (e1000_get_hw_semaphore(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +void +e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync"); + + while (e1000_get_hw_semaphore(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore(hw); +} + Index: sys/dev/e1000/e1000_osdep.h =================================================================== --- sys/dev/e1000/e1000_osdep.h +++ sys/dev/e1000/e1000_osdep.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,14 @@ #include #include #include + +#include +#include +#include +#include + + + #include #include #include @@ -58,11 +67,41 @@ #define ASSERT(x) if(!(x)) panic("EM: x") +#define us_scale(x) max(1, (x/(1000000/hz))) +static inline int +ms_scale(int x) { + if (hz == 1000) { + return (x); + } else if (hz > 1000) { + return (x*(hz/1000)); + } else { + return (max(1, x/(1000/hz))); + } +} +extern int cold; + +static inline void +safe_pause_us(int x) { + if (cold) { + DELAY(x); + } else { + pause("e1000_delay", max(1, x/(1000000/hz))); + } +} + +static inline void +safe_pause_ms(int x) { + if (cold) { + DELAY(x*1000); + } else { + pause("e1000_delay", ms_scale(x)); + } +} -#define usec_delay(x) DELAY(x) +#define usec_delay(x) safe_pause_us(x) #define usec_delay_irq(x) usec_delay(x) -#define msec_delay(x) DELAY(1000*(x)) -#define msec_delay_irq(x) DELAY(1000*(x)) +#define msec_delay(x) safe_pause_ms(x) +#define msec_delay_irq(x) msec_delay(x) /* Enable/disable debugging statements in shared code */ #define DBG 0 @@ -81,16 +120,6 @@ #define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */ #define PCI_COMMAND_REGISTER PCIR_COMMAND -/* Mutex used in the shared code */ -#define E1000_MUTEX struct mtx -#define E1000_MUTEX_INIT(mutex) mtx_init((mutex), #mutex, \ - MTX_NETWORK_LOCK, \ - MTX_DEF | MTX_DUPOK) -#define E1000_MUTEX_DESTROY(mutex) mtx_destroy(mutex) -#define E1000_MUTEX_LOCK(mutex) mtx_lock(mutex) -#define E1000_MUTEX_TRYLOCK(mutex) mtx_trylock(mutex) -#define E1000_MUTEX_UNLOCK(mutex) mtx_unlock(mutex) - typedef uint64_t u64; typedef uint32_t u32; typedef uint16_t u16; @@ -116,6 +145,12 @@ #endif #endif /*__FreeBSD_version < 800000 */ +#ifdef INVARIANTS +#define ASSERT_CTX_LOCK_HELD(hw) (sx_assert(iflib_ctx_lock_get(((struct e1000_osdep *)hw->back)->ctx), SX_XLOCKED)) +#else +#define ASSERT_CTX_LOCK_HELD(hw) +#endif + #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) @@ -135,6 +170,7 @@ bus_space_tag_t flash_bus_space_tag; bus_space_handle_t flash_bus_space_handle; device_t dev; + if_ctx_t ctx; }; #define E1000_REGISTER(hw, reg) (((hw)->mac.type >= e1000_82543) \ @@ -216,5 +252,22 @@ bus_space_write_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value) + +#if defined(INVARIANTS) +#include + +#define ASSERT_NO_LOCKS() \ + do { \ + int unknown_locks = curthread->td_locks - mtx_owned(&Giant); \ + if (unknown_locks > 0) { \ + WITNESS_WARN(WARN_GIANTOK|WARN_SLEEPOK|WARN_PANIC, NULL, "unexpected non-sleepable lock"); \ + } \ + MPASS(curthread->td_rw_rlocks == 0); \ + MPASS(curthread->td_lk_slocks == 0); \ + } while (0) +#else +#define ASSERT_NO_LOCKS() +#endif + #endif /* _FREEBSD_OS_H_ */ Index: sys/dev/e1000/em_txrx.c =================================================================== --- sys/dev/e1000/em_txrx.c +++ sys/dev/e1000/em_txrx.c @@ -66,6 +66,7 @@ static int em_determine_rsstype(u32 pkt_info); extern int em_intr(void *arg); + struct if_txrx em_txrx = { em_isc_txd_encap, em_isc_txd_flush, @@ -74,7 +75,7 @@ em_isc_rxd_pkt_get, em_isc_rxd_refill, em_isc_rxd_flush, - em_intr + em_intr, }; struct if_txrx lem_txrx = { @@ -85,7 +86,7 @@ lem_isc_rxd_pkt_get, lem_isc_rxd_refill, em_isc_rxd_flush, - em_intr + em_intr, }; extern if_shared_ctx_t em_sctx; @@ -523,8 +524,8 @@ for (i = 0, next_pidx = pidx; i < count; i++) { rxd = &rxr->rx_base[next_pidx]; rxd->read.buffer_addr = htole64(paddrs[i]); - /* DD bits must be cleared */ - rxd->wb.upper.status_error = 0; + /* Zero out rx desc status */ + rxd->wb.upper.status_error &= htole32(~0xFF); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; @@ -551,14 +552,9 @@ struct e1000_rx_desc *rxd; u32 staterr = 0; int cnt, i; + budget = min(budget, scctx->isc_nrxd[0]); - if (budget == 1) { - rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx]; - staterr = rxd->status; - return (staterr & E1000_RXD_STAT_DD); - } - - for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { + for (cnt = 0, i = idx; cnt <= budget;) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; staterr = rxd->status; @@ -571,6 +567,7 @@ if (staterr & E1000_RXD_STAT_EOP) cnt++; } + MPASS(cnt <= scctx->isc_nrxd[0]); return (cnt); } @@ -584,14 +581,9 @@ union e1000_rx_desc_extended *rxd; u32 staterr = 0; int cnt, i; + budget = min(budget, scctx->isc_nrxd[0]); - if (budget == 1) { - rxd = &rxr->rx_base[idx]; - staterr = le32toh(rxd->wb.upper.status_error); - return (staterr & E1000_RXD_STAT_DD); - } - - for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { + for (cnt = 0, i = idx; cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); @@ -606,6 +598,7 @@ cnt++; } + MPASS(cnt <= scctx->isc_nrxd[0]); return (cnt); } @@ -694,7 +687,8 @@ pkt_info = le32toh(rxd->wb.lower.mrq); /* Error Checking then decrement count */ - MPASS ((staterr & E1000_RXD_STAT_DD) != 0); + KASSERT(staterr & E1000_RXD_STAT_DD, + ("cidx=%d i=%d iri_len=%d", cidx, i, ri->iri_len)); len = le16toh(rxd->wb.upper.length); ri->iri_len += len; Index: sys/dev/e1000/if_em.h =================================================================== --- sys/dev/e1000/if_em.h +++ sys/dev/e1000/if_em.h @@ -434,6 +434,7 @@ u32 eims; /* This queue's EIMS bit */ u32 me; struct tx_ring txr; + struct if_irq que_irq; }; struct em_rx_queue { @@ -443,7 +444,7 @@ u32 eims; struct rx_ring rxr; u64 irqs; - struct if_irq que_irq; + struct if_irq que_irq; }; /* Our adapter structure */ Index: sys/dev/e1000/if_em.c =================================================================== --- sys/dev/e1000/if_em.c +++ sys/dev/e1000/if_em.c @@ -483,7 +483,7 @@ .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, - .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, + .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, @@ -511,7 +511,7 @@ .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, - .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, + .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, @@ -723,7 +723,7 @@ return (ENXIO); } - adapter->ctx = ctx; + adapter->ctx = adapter->osdep.ctx = ctx; adapter->dev = adapter->osdep.dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); @@ -1405,7 +1405,9 @@ { struct adapter *adapter = arg; u32 reg_icr; + int is_igb; + is_igb = (adapter->hw.mac.type >= igb_mac_min); ++adapter->link_irq; MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); @@ -1413,26 +1415,29 @@ if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - em_handle_link(adapter->ctx); + if (is_igb) { + if (reg_icr & E1000_ICR_LSC) + em_handle_link(adapter->ctx); + E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); } else { + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + em_handle_link(adapter->ctx); + } E1000_WRITE_REG(&adapter->hw, E1000_IMS, - EM_MSIX_LINK | E1000_IMS_LSC); - if (adapter->hw.mac.type >= igb_mac_min) - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); - } + EM_MSIX_LINK | E1000_IMS_LSC); - /* - * Because we must read the ICR for this interrupt - * it may clear other causes using autoclear, for - * this reason we simply create a soft interrupt - * for all these vectors. - */ - if (reg_icr && adapter->hw.mac.type < igb_mac_min) { - E1000_WRITE_REG(&adapter->hw, - E1000_ICS, adapter->ims); + /* + * Because we must read the ICR for this interrupt + * it may clear other causes using autoclear, for + * this reason we simply create a soft interrupt + * for all these vectors. + */ + if (reg_icr) { + E1000_WRITE_REG(&adapter->hw, + E1000_ICS, adapter->ims); + } } - return (FILTER_HANDLED); } @@ -1670,13 +1675,6 @@ return; iflib_admin_intr_deferred(ctx); - /* Reset LAA into RAR[0] on 82571 */ - if ((adapter->hw.mac.type == e1000_82571) && - e1000_get_laa_state_82571(&adapter->hw)) - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - - if (adapter->hw.mac.type < em_mac_min) - lem_smartspeed(adapter); /* Mask to use in the irq trigger */ if (adapter->intr_type == IFLIB_INTR_MSIX) { @@ -1787,6 +1785,14 @@ } em_update_stats_counters(adapter); + /* Reset LAA into RAR[0] on 82571 */ + if ((adapter->hw.mac.type == e1000_82571) && + e1000_get_laa_state_82571(&adapter->hw)) + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + + if (adapter->hw.mac.type < em_mac_min) + lem_smartspeed(adapter); + E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } @@ -1902,6 +1908,87 @@ return (0); } +static int +igb_intr_assign(if_ctx_t ctx, int msix) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct em_rx_queue *rx_que = adapter->rx_queues; + struct em_tx_queue *tx_que = adapter->tx_queues; + int error, rid, i, vector = 0, rx_vectors; + char buf[16]; + + /* First set up ring resources */ + for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { + rid = vector + 1; + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, + em_msix_que, rx_que, rx_que->me, buf); + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d\n", i, error); + adapter->rx_num_queues = i; + goto fail; + } + + rx_que->msix = vector; + + /* + * Set the bit to enable interrupt + * in E1000_IMS -- bits 20 and 21 + * are for RX0 and RX1, note this has + * NOTHING to do with the MSIX vector + */ + if (adapter->hw.mac.type == e1000_82574) { + rx_que->eims = 1 << (20 + i); + adapter->ims |= rx_que->eims; + adapter->ivars |= (8 | rx_que->msix) << (i * 4); + } else if (adapter->hw.mac.type == e1000_82575) + rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; + else + rx_que->eims = 1 << vector; + } + rx_vectors = vector; + + vector = 0; + for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { + snprintf(buf, sizeof(buf), "txq%d", i); + tx_que = &adapter->tx_queues[i]; + tx_que->msix = adapter->rx_queues[i % adapter->rx_num_queues].msix; + rid = rman_get_start(adapter->rx_queues[i % adapter->rx_num_queues].que_irq.ii_res); + iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); + + if (adapter->hw.mac.type == e1000_82574) { + tx_que->eims = 1 << (22 + i); + adapter->ims |= tx_que->eims; + adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); + } else if (adapter->hw.mac.type == e1000_82575) { + tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); + } else { + tx_que->eims = 1 << (i % adapter->tx_num_queues); + } + } + + /* Link interrupt */ + rid = rx_vectors + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); + + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); + goto fail; + } + adapter->linkvec = rx_vectors; + if (adapter->hw.mac.type < igb_mac_min) { + adapter->ivars |= (8 | rx_vectors) << 16; + adapter->ivars |= 0x80000000; + } + return (0); +fail: + iflib_irq_free(ctx, &adapter->irq); + rx_que = adapter->rx_queues; + for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) + iflib_irq_free(ctx, &rx_que->que_irq); + return (error); +} + /********************************************************************* * * Setup the MSIX Interrupt handlers @@ -1913,14 +2000,18 @@ struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rx_que = adapter->rx_queues; struct em_tx_queue *tx_que = adapter->tx_queues; - int error, rid, i, vector = 0, rx_vectors; + int error, rid, i, vector = 0; char buf[16]; + if (adapter->hw.mac.type >= igb_mac_min) { + return igb_intr_assign(ctx, msix); + } + /* First set up ring resources */ for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); - error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->rx_num_queues = i + 1; @@ -1944,16 +2035,19 @@ else rx_que->eims = 1 << vector; } - rx_vectors = vector; - vector = 0; for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; - iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); - tx_que->msix = (vector % adapter->tx_num_queues); + error = iflib_irq_alloc_generic(ctx, &tx_que->que_irq, rid, IFLIB_INTR_TX, em_msix_que, tx_que, tx_que->me, buf); + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); + adapter->tx_num_queues = i + 1; + goto fail; + } + tx_que->msix = vector; /* * Set the bit to enable interrupt @@ -1966,23 +2060,24 @@ adapter->ims |= tx_que->eims; adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (adapter->hw.mac.type == e1000_82575) { - tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); + tx_que->eims = E1000_EICR_TX_QUEUE0 << vector; } else { - tx_que->eims = 1 << (i % adapter->tx_num_queues); + tx_que->eims = 1 << vector; } } /* Link interrupt */ - rid = rx_vectors + 1; + rid = vector + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } - adapter->linkvec = rx_vectors; + + adapter->linkvec = vector; if (adapter->hw.mac.type < igb_mac_min) { - adapter->ivars |= (8 | rx_vectors) << 16; + adapter->ivars |= (8 | vector) << 16; adapter->ivars |= 0x80000000; } return (0); @@ -2139,15 +2234,24 @@ em_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); - struct em_rx_queue *que = adapter->rx_queues; + struct em_rx_queue *rxque = adapter->rx_queues; + struct em_tx_queue *txque = adapter->tx_queues; device_t dev = iflib_get_dev(ctx); + int is_igb; + is_igb = (adapter->hw.mac.type >= igb_mac_min); /* Release all msix queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); - for (int i = 0; i < adapter->rx_num_queues; i++, que++) { - iflib_irq_free(ctx, &que->que_irq); + for (int i = 0; i < adapter->rx_num_queues; i++, rxque++) { + iflib_irq_free(ctx, &rxque->que_irq); + } + + if (!is_igb) { + for (int i = 0; i < adapter->tx_num_queues; i++, txque++) { + iflib_irq_free(ctx, &txque->que_irq); + } } /* First release all the interrupt resources */ Index: sys/kern/subr_gtaskqueue.c =================================================================== --- sys/kern/subr_gtaskqueue.c +++ sys/kern/subr_gtaskqueue.c @@ -48,17 +48,26 @@ #include #include -static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues"); +static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues"); static void gtaskqueue_thread_enqueue(void *); static void gtaskqueue_thread_loop(void *arg); - -TASKQGROUP_DEFINE(softirq, mp_ncpus, 1); +static int _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); +TASKQGROUP_DEFINE(softirq, mp_ncpus, 1, false, PI_SOFT); struct gtaskqueue_busy { struct gtask *tb_running; TAILQ_ENTRY(gtaskqueue_busy) tb_link; }; +struct gt_intr_thread { + int git_flags; /* (j) IT_* flags. */ + int git_need; /* Needs service. */ +}; + +/* Interrupt thread flags kept in it_flags */ +#define IT_DEAD 0x000001 /* Thread is waiting to exit. */ +#define IT_WAIT 0x000002 /* Thread is waiting for completion. */ + static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; struct gtaskqueue { @@ -69,6 +78,7 @@ TAILQ_HEAD(, gtaskqueue_busy) tq_active; struct mtx tq_mutex; struct thread **tq_threads; + struct gt_intr_thread *tq_gt_intrs; int tq_tcount; int tq_spin; int tq_flags; @@ -80,6 +90,7 @@ #define TQ_FLAGS_ACTIVE (1 << 0) #define TQ_FLAGS_BLOCKED (1 << 1) #define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2) +#define TQ_FLAGS_INTR (1 << 3) #define DT_CALLOUT_ARMED (1 << 0) @@ -180,6 +191,32 @@ free(queue, M_GTASKQUEUE); } +static void +schedule_ithread(struct gtaskqueue *queue) +{ + struct proc *p; + struct thread *td; + struct gt_intr_thread *git; + + MPASS(queue->tq_tcount == 1); + td = queue->tq_threads[0]; + git = &queue->tq_gt_intrs[0]; + p = td->td_proc; + + atomic_store_rel_int(&git->git_need, 1); + thread_lock(td); + if (TD_AWAITING_INTR(td)) { + CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, + td->td_name); + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); + } else { + CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", + __func__, p->p_pid, td->td_name, git->git_need, td->td_state); + } + thread_unlock(td); +} + int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) { @@ -197,8 +234,13 @@ STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); gtask->ta_flags |= TASK_ENQUEUED; TQ_UNLOCK(queue); - if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) - queue->tq_enqueue(queue->tq_context); + if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) { + if (queue->tq_flags & TQ_FLAGS_INTR) { + schedule_ithread(queue); + } else { + queue->tq_enqueue(queue->tq_context); + } + } return (0); } @@ -403,7 +445,7 @@ static int _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, - cpuset_t *mask, const char *name, va_list ap) + cpuset_t *mask, bool intr, const char *name, va_list ap) { char ktname[MAXCOMLEN + 1]; struct thread *td; @@ -422,6 +464,12 @@ printf("%s: no memory for %s threads\n", __func__, ktname); return (ENOMEM); } + tq->tq_gt_intrs = malloc(sizeof(struct gt_intr_thread) * count, M_GTASKQUEUE, + M_NOWAIT | M_ZERO); + if (tq->tq_gt_intrs == NULL) { + printf("%s: no memory for %s intr info\n", __func__, ktname); + return (ENOMEM); + } for (i = 0; i < count; i++) { if (count == 1) @@ -439,6 +487,9 @@ } else tq->tq_tcount++; } + if (intr) + tq->tq_flags |= TQ_FLAGS_INTR; + for (i = 0; i < count; i++) { if (tq->tq_threads[i] == NULL) continue; @@ -458,7 +509,14 @@ } thread_lock(td); sched_prio(td, pri); - sched_add(td, SRQ_BORING); + if (intr) { + /* we need to schedule the thread from the interrupt handler for this to work */ + TD_SET_IWAIT(td); + sched_class(td, PRI_ITHD); + td->td_pflags |= TDP_ITHREAD; + } else { + sched_add(td, SRQ_BORING); + } thread_unlock(td); } @@ -467,13 +525,13 @@ static int gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, - const char *name, ...) + bool intr, const char *name, ...) { va_list ap; int error; va_start(ap, name); - error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap); + error = _gtaskqueue_start_threads(tqp, count, pri, NULL, intr, name, ap); va_end(ap); return (error); } @@ -491,16 +549,58 @@ } static void -gtaskqueue_thread_loop(void *arg) +intr_thread_loop(struct gtaskqueue *tq) { - struct gtaskqueue **tqp, *tq; + struct gt_intr_thread *git; + struct thread *td; - tqp = arg; - tq = *tqp; - gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); - TQ_LOCK(tq); + git = &tq->tq_gt_intrs[0]; + td = tq->tq_threads[0]; + MPASS(tq->tq_tcount == 1); + + while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { + THREAD_NO_SLEEPING(); + while (atomic_cmpset_acq_int(&git->git_need, 1, 0) != 0) { + gtaskqueue_run_locked(tq); + } + THREAD_SLEEPING_OK(); + + /* + * Because taskqueue_run() can drop tq_mutex, we need to + * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the + * meantime, which means we missed a wakeup. + */ + if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) + break; + + TQ_UNLOCK(tq); + WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); + mtx_assert(&Giant, MA_NOTOWNED); + thread_lock(td); + if (atomic_load_acq_int(&git->git_need) == 0 && + (git->git_flags & (IT_DEAD | IT_WAIT)) == 0) { + TD_SET_IWAIT(td); + mi_switch(SW_VOL | SWT_IWAIT, NULL); + } +#if 0 + /* XXX is this something we want? */ + if (git->git_flags & IT_WAIT) { + wake = 1; + git->git_flags &= ~IT_WAIT; + } +#endif + thread_unlock(td); + TQ_LOCK(tq); + } + THREAD_NO_SLEEPING(); + gtaskqueue_run_locked(tq); + THREAD_SLEEPING_OK(); +} + +static void +timeshare_thread_loop(struct gtaskqueue *tq) +{ while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { - /* XXX ? */ gtaskqueue_run_locked(tq); /* * Because taskqueue_run() can drop tq_mutex, we need to @@ -512,6 +612,23 @@ TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); } gtaskqueue_run_locked(tq); +} + +static void +gtaskqueue_thread_loop(void *arg) +{ + struct gtaskqueue **tqp, *tq; + + tqp = arg; + tq = *tqp; + gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); + TQ_LOCK(tq); + if (curthread->td_pflags & TDP_ITHREAD) { + intr_thread_loop(tq); + } else { + timeshare_thread_loop(tq); + } + /* * This thread is on its way out, so just drop the lock temporarily * in order to call the shutdown callback. This allows the callback @@ -558,11 +675,17 @@ struct taskqgroup { struct taskqgroup_cpu tqg_queue[MAXCPU]; struct mtx tqg_lock; + void (*adjust_func)(void*); char * tqg_name; int tqg_adjusting; int tqg_stride; int tqg_cnt; + int tqg_pri; + int tqg_flags; + bool tqg_intr; }; +#define TQG_NEED_ADJUST 0x1 +#define TQG_ADJUSTED 0x2 struct taskq_bind_task { struct gtask bt_task; @@ -570,16 +693,16 @@ }; static void -taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu) +taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu, bool intr, int pri) { struct taskqgroup_cpu *qcpu; qcpu = &qgroup->tqg_queue[idx]; LIST_INIT(&qcpu->tgc_tasks); - qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK, + qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &qcpu->tgc_taskq); - gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, - "%s_%d", qgroup->tqg_name, idx); + gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, pri, + intr, "%s_%d", qgroup->tqg_name, idx); qcpu->tgc_cpu = cpu; } @@ -663,12 +786,20 @@ void *uniq, int irq, char *name) { cpuset_t mask; - int qid; + int qid, error; gtask->gt_uniq = uniq; gtask->gt_name = name; gtask->gt_irq = irq; gtask->gt_cpu = -1; + + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_flags |= TQG_NEED_ADJUST; + mtx_unlock(&qgroup->tqg_lock); + + if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) + qgroup->adjust_func(NULL); + mtx_lock(&qgroup->tqg_lock); qid = taskqgroup_find(qgroup, uniq); qgroup->tqg_queue[qid].tgc_cnt++; @@ -679,7 +810,9 @@ CPU_ZERO(&mask); CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); mtx_unlock(&qgroup->tqg_lock); - intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); + if (error) + printf("taskqgroup_attach: setaffinity failed: %d\n", error); } else mtx_unlock(&qgroup->tqg_lock); } @@ -688,7 +821,7 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { cpuset_t mask; - int qid, cpu; + int qid, cpu, error; mtx_lock(&qgroup->tqg_lock); qid = taskqgroup_find(qgroup, gtask->gt_uniq); @@ -698,9 +831,10 @@ CPU_ZERO(&mask); CPU_SET(cpu, &mask); - intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask); - + error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_INTRHANDLER, &mask); mtx_lock(&qgroup->tqg_lock); + if (error) + printf("taskqgroup_attach_deferred: setaffinity failed: %d\n", error); } qgroup->tqg_queue[qid].tgc_cnt++; @@ -711,27 +845,79 @@ mtx_unlock(&qgroup->tqg_lock); } +static int +taskqgroup_adjust_deferred(struct taskqgroup *qgroup, int cpu) +{ + int i, error = 0, cpu_max = -1; + + mtx_lock(&qgroup->tqg_lock); + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) + cpu_max = qgroup->tqg_queue[i].tgc_cpu; + if (cpu_max >= cpu) { + mtx_unlock(&qgroup->tqg_lock); + return (0); + } + MPASS(cpu <= mp_maxid); + error = _taskqgroup_adjust(qgroup, cpu + 1, qgroup->tqg_stride, + qgroup->tqg_intr, qgroup->tqg_pri); + if (error) { + printf("%s: _taskqgroup_adjust(%p, %d, %d, %d, %d) => %d\n\n", + __func__, qgroup, cpu + 1, qgroup->tqg_stride, qgroup->tqg_intr, + qgroup->tqg_pri, error); + goto out; + } + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) + cpu_max = qgroup->tqg_queue[i].tgc_cpu; + MPASS(cpu_max >= cpu); +out: + mtx_unlock(&qgroup->tqg_lock); + return (error); +} + int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, void *uniq, int cpu, int irq, char *name) { cpuset_t mask; - int i, qid; + int i, error, qid; qid = -1; gtask->gt_uniq = uniq; gtask->gt_name = name; gtask->gt_irq = irq; gtask->gt_cpu = cpu; + MPASS(cpu >= 0); + + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_flags |= TQG_NEED_ADJUST; + mtx_unlock(&qgroup->tqg_lock); + + if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) { + uintptr_t cpuid = cpu + 1; + qgroup->adjust_func((void *)cpuid); + } + if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) + return (error); + mtx_lock(&qgroup->tqg_lock); if (tqg_smp_started) { - for (i = 0; i < qgroup->tqg_cnt; i++) + for (i = 0; i < qgroup->tqg_cnt; i++) { if (qgroup->tqg_queue[i].tgc_cpu == cpu) { qid = i; break; } +#ifdef INVARIANTS + else + printf("qgroup->tqg_queue[%d].tgc_cpu=0x%x tgc_cnt=0x%x\n", + i, qgroup->tqg_queue[i].tgc_cpu, qgroup->tqg_queue[i].tgc_cnt); + +#endif + } if (qid == -1) { mtx_unlock(&qgroup->tqg_lock); + printf("%s: qid not found for cpu=%d\n", __func__, cpu); return (EINVAL); } } else @@ -744,8 +930,11 @@ CPU_ZERO(&mask); CPU_SET(cpu, &mask); - if (irq != -1 && tqg_smp_started) - intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + if (irq != -1 && tqg_smp_started) { + error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); + if (error) + printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); + } return (0); } @@ -753,13 +942,18 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { cpuset_t mask; - int i, qid, irq, cpu; + int i, qid, irq, cpu, error; qid = -1; irq = gtask->gt_irq; cpu = gtask->gt_cpu; MPASS(tqg_smp_started); + + if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) + return (error); mtx_lock(&qgroup->tqg_lock); + /* adjust as needed */ + MPASS(cpu <= mp_maxid); for (i = 0; i < qgroup->tqg_cnt; i++) if (qgroup->tqg_queue[i].tgc_cpu == cpu) { qid = i; @@ -767,6 +961,7 @@ } if (qid == -1) { mtx_unlock(&qgroup->tqg_lock); + printf("%s: qid not found for cpu=%d\n", __func__, cpu); return (EINVAL); } qgroup->tqg_queue[qid].tgc_cnt++; @@ -778,8 +973,11 @@ CPU_ZERO(&mask); CPU_SET(cpu, &mask); - if (irq != -1) - intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + if (irq != -1) { + error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); + if (error) + printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); + } return (0); } @@ -818,8 +1016,25 @@ printf("taskqgroup_binder: setaffinity failed: %d\n", error); free(gtask, M_DEVBUF); + } +static void +taskqgroup_ithread_binder(void *ctx) +{ + struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; + cpuset_t mask; + int error; + + CPU_ZERO(&mask); + CPU_SET(gtask->bt_cpuid, &mask); + error = cpuset_setthread(curthread->td_tid, &mask); + if (error) + printf("taskqgroup_binder: setaffinity failed: %d\n", + error); + free(gtask, M_DEVBUF); + +} static void taskqgroup_bind(struct taskqgroup *qgroup) { @@ -835,7 +1050,10 @@ for (i = 0; i < qgroup->tqg_cnt; i++) { gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK); - GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); + if (qgroup->tqg_intr) + GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_ithread_binder, gtask); + else + GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, >ask->bt_task); @@ -843,7 +1061,7 @@ } static int -_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) +_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) { LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); struct grouptask *gtask; @@ -858,14 +1076,22 @@ return (EINVAL); } if (qgroup->tqg_adjusting) { - printf("taskqgroup_adjust failed: adjusting\n"); + printf("%s: failed: adjusting\n", __func__); return (EBUSY); } + /* No work to be done */ + if (qgroup->tqg_cnt == cnt) + return (0); qgroup->tqg_adjusting = 1; old_cnt = qgroup->tqg_cnt; old_cpu = 0; - if (old_cnt < cnt) - old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu; + if (old_cnt < cnt) { + int old_max_idx = max(0, old_cnt-1); + old_cpu = qgroup->tqg_queue[old_max_idx].tgc_cpu; + if (old_cnt > 0) + for (k = 0; k < stride; k++) + old_cpu = CPU_NEXT(old_cpu); + } mtx_unlock(&qgroup->tqg_lock); /* * Set up queue for tasks added before boot. @@ -881,7 +1107,7 @@ */ cpu = old_cpu; for (i = old_cnt; i < cnt; i++) { - taskqgroup_cpu_create(qgroup, i, cpu); + taskqgroup_cpu_create(qgroup, i, cpu, ithread, pri); for (k = 0; k < stride; k++) cpu = CPU_NEXT(cpu); @@ -889,6 +1115,8 @@ mtx_lock(&qgroup->tqg_lock); qgroup->tqg_cnt = cnt; qgroup->tqg_stride = stride; + qgroup->tqg_intr = ithread; + qgroup->tqg_pri = pri; /* * Adjust drivers to use new taskqs. @@ -934,12 +1162,34 @@ } int -taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) +taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) { int error; mtx_lock(&qgroup->tqg_lock); - error = _taskqgroup_adjust(qgroup, cnt, stride); + error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); + mtx_unlock(&qgroup->tqg_lock); + + return (error); +} + +void +taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)) +{ + qgroup-> adjust_func = adjust_func; +} + +int +taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) +{ + int error = 0; + + mtx_lock(&qgroup->tqg_lock); + if ((qgroup->tqg_flags & (TQG_ADJUSTED|TQG_NEED_ADJUST)) == TQG_NEED_ADJUST) { + qgroup->tqg_flags |= TQG_ADJUSTED; + error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); + MPASS(error == 0); + } mtx_unlock(&qgroup->tqg_lock); return (error); @@ -954,7 +1204,9 @@ mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); qgroup->tqg_name = name; LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); - + MPASS(qgroup->tqg_queue[0].tgc_cnt == 0); + MPASS(qgroup->tqg_queue[0].tgc_cpu == 0); + MPASS(qgroup->tqg_queue[0].tgc_taskq == 0); return (qgroup); } Index: sys/net/iflib.h =================================================================== --- sys/net/iflib.h +++ sys/net/iflib.h @@ -119,6 +119,7 @@ qidx_t ipi_pidx; /* start pidx for encap */ qidx_t ipi_new_pidx; /* next available pidx post-encap */ /* offload handling */ + caddr_t ipi_hdr_data; /* raw header */ uint8_t ipi_ehdrlen; /* ether header length */ uint8_t ipi_ip_hlen; /* ip header length */ uint8_t ipi_tcp_hlen; /* tcp header length */ @@ -183,6 +184,7 @@ void (*ift_rxd_refill) (void * , if_rxd_update_t iru); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); + int (*ift_txd_errata) (void *, struct mbuf **mp); } *if_txrx_t; typedef struct if_softc_ctx { @@ -294,9 +296,9 @@ */ #define IFLIB_HAS_TXCQ 0x08 /* - * Interface does checksum in place + * */ -#define IFLIB_NEED_SCRATCH 0x10 +#define IFLIB_UNUSED___0 0x10 /* * Interface doesn't expect in_pseudo for th_sum */ @@ -305,6 +307,10 @@ * Interface doesn't align IP header */ #define IFLIB_DO_RX_FIXUP 0x40 +/* + * Driver needs csum zeroed for offloading + */ +#define IFLIB_NEED_ZERO_CSUM 0x80 @@ -381,7 +387,7 @@ void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); -struct mtx *iflib_ctx_lock_get(if_ctx_t); +struct sx *iflib_ctx_lock_get(if_ctx_t); struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t); void iflib_led_create(if_ctx_t ctx); Index: sys/net/iflib.c =================================================================== --- sys/net/iflib.c +++ sys/net/iflib.c @@ -51,7 +51,6 @@ #include #include - #include #include #include @@ -157,7 +156,7 @@ if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; - struct mtx ifc_mtx; + struct sx ifc_sx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; @@ -185,6 +184,8 @@ uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; + uint16_t ifc_cpuid_highest; + uint16_t ifc_sysctl_rx_budget; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; @@ -203,8 +204,66 @@ eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; + LIST_ENTRY(iflib_ctx) ifc_next; }; +static LIST_HEAD(ctx_head, iflib_ctx) ctx_list; +static struct mtx ctx_list_lock; + +TASKQGROUP_DEFINE(if_io, mp_ncpus, 1, true, PI_NET); +TASKQGROUP_DEFINE(if_config, 1, 1, false, PI_SOFT); + +static void +iflib_ctx_apply(void (*fn)(if_ctx_t ctx, void *arg), void *arg) +{ + if_ctx_t ctx; + + mtx_lock(&ctx_list_lock); + LIST_FOREACH(ctx, &ctx_list, ifc_next) { + (fn)(ctx, arg); + } + mtx_unlock(&ctx_list_lock); +} + +static void +_iflib_cpuid_highest(if_ctx_t ctx, void *arg) { + int *cpuid = arg; + + if (*cpuid < ctx->ifc_cpuid_highest) + *cpuid = ctx->ifc_cpuid_highest; +} + +static int +iflib_cpuid_highest(void) +{ + int cpuid = 0; + + iflib_ctx_apply(_iflib_cpuid_highest, &cpuid); + return (cpuid); +} + +static void +iflib_ctx_insert(if_ctx_t ctx) +{ + mtx_lock(&ctx_list_lock); + LIST_INSERT_HEAD(&ctx_list, ctx, ifc_next); + mtx_unlock(&ctx_list_lock); +} + +static void +iflib_ctx_remove(if_ctx_t ctx) +{ + int max_cpuid_prev, max_cpuid_new; + + max_cpuid_prev = iflib_cpuid_highest(); + mtx_lock(&ctx_list_lock); + LIST_REMOVE(ctx, ifc_next); + mtx_unlock(&ctx_list_lock); + max_cpuid_new = max(1, iflib_cpuid_highest()); + if (max_cpuid_new < max_cpuid_prev) { + taskqgroup_adjust(qgroup_if_io, max_cpuid_new, 1, true, PI_NET); + } +} void * iflib_get_softc(if_ctx_t ctx) @@ -263,9 +322,11 @@ #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) -#define TX_SW_DESC_MAP_CREATED (1 << 1) -#define RX_SW_DESC_INUSE (1 << 3) -#define TX_SW_DESC_MAPPED (1 << 4) +#define RX_SW_DESC_INUSE (1 << 1) +#define RX_NETMAP_INUSE (1 << 2) + +#define TX_SW_DESC_MAP_CREATED (1 << 0) +#define TX_SW_DESC_MAPPED (1 << 1) #define M_TOOBIG M_PROTO1 @@ -357,6 +418,7 @@ uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; + uint8_t ift_stall_count; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; @@ -448,9 +510,11 @@ struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; - + struct if_rxd_info ifr_ri; + struct if_rxd_update ifr_iru; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); + #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif @@ -465,11 +529,11 @@ /* multiple of word size */ #ifdef __LP64__ -#define PKT_INFO_SIZE 6 +#define PKT_INFO_SIZE 7 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else -#define PKT_INFO_SIZE 11 +#define PKT_INFO_SIZE 12 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif @@ -495,9 +559,10 @@ pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; + pi_pad->pkt_val[6] = 0; #ifndef __LP64__ - pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; - pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; + pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; + pi_pad->pkt_val[10] = 0; pi_pad->pkt_val[11] = 0; #endif } @@ -525,14 +590,24 @@ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 -#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) +static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, + "iflib driver parameters"); + +static int iflib_timer_int; +SYSCTL_INT(_net_iflib, OID_AUTO, timer_int, CTLFLAG_RW, &iflib_timer_int, + 0, "interval at which to run per-queue timers (in ticks)"); + +static int force_busdma = 0; +SYSCTL_INT(_net_iflib, OID_AUTO, force_busdma, CTLFLAG_RDTUN, &force_busdma, + 1, "force busdma"); -#define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) +#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) -#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) -#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) -#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) +#define CTX_LOCK_INIT(_sc, _name) sx_init(&(_sc)->ifc_sx, _name) +#define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_sx) +#define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_sx) +#define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_sx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) @@ -553,9 +628,6 @@ MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); -TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); -TASKQGROUP_DEFINE(if_config_tqg, 1, 1); - #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 @@ -564,9 +636,6 @@ #endif /* !INVARIANTS */ #endif -static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, - "iflib driver parameters"); - /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ @@ -689,7 +758,14 @@ static void iflib_debug_reset(void) {} #endif +typedef void async_gtask_fn_t(if_ctx_t ctx, void *arg); +struct async_task_arg { + async_gtask_fn_t *ata_fn; + if_ctx_t ata_ctx; + void *ata_arg; + struct grouptask *ata_gtask; +}; #define IFLIB_DEBUG 0 @@ -711,6 +787,12 @@ static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); +static int async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data); +static int iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg); +static void iflib_admin_reset_deferred(if_ctx_t ctx); + + + #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif @@ -784,6 +866,94 @@ return (status); } +static void +iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) +{ + iflib_fl_t fl; + + fl = &rxq->ifr_fl[flid]; + iru->iru_paddrs = fl->ifl_bus_addrs; + iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; + iru->iru_idxs = fl->ifl_rxd_idxs; + iru->iru_qsidx = rxq->ifr_id; + iru->iru_buf_size = fl->ifl_buf_size; + iru->iru_flidx = fl->ifl_id; +} + +static int +netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) +{ + struct netmap_adapter *na = kring->na; + u_int const lim = kring->nkr_num_slots - 1; + u_int head = kring->rhead; + struct netmap_ring *ring = kring->ring; + bus_dmamap_t *map; + if_rxd_update_t iru; + if_ctx_t ctx = rxq->ifr_ctx; + iflib_fl_t fl = &rxq->ifr_fl[0]; + uint32_t refill_pidx, nic_i; + + iru = &rxq->ifr_iru; + iru_init(iru, rxq, 0 /* flid */); + map = fl->ifl_sds.ifsd_map; + refill_pidx = netmap_idx_k2n(kring, nm_i); + if (init && (nm_i == head)) + head = nm_prev(head, lim); + for (int tmp_pidx = 0; nm_i != head; tmp_pidx++) { + struct netmap_slot *slot = &ring->slot[nm_i]; + void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); + uint32_t nic_i_dma = refill_pidx; + nic_i = netmap_idx_k2n(kring, nm_i); + + MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); + + if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ + return netmap_ring_reinit(kring); + + fl->ifl_vm_addrs[tmp_pidx] = addr; + if (__predict_false(init) && map) { + netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); + } else if (map && (slot->flags & NS_BUF_CHANGED)) { + /* buffer has changed, reload map */ + netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); + } + slot->flags &= ~NS_BUF_CHANGED; + + nm_i = nm_next(nm_i, lim); + fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); + if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) + continue; + + iru->iru_pidx = refill_pidx; + iru->iru_count = tmp_pidx+1; + ctx->isc_rxd_refill(ctx->ifc_softc, iru); + + tmp_pidx = 0; + refill_pidx = nic_i; + if (map == NULL) + continue; + + for (int n = 0; n < iru->iru_count; n++) { + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], + BUS_DMASYNC_PREREAD); + /* XXX - change this to not use the netmap func*/ + nic_i_dma = nm_next(nic_i_dma, lim); + } + } + kring->nr_hwcur = head; + + if (map) + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + /* + * IMPORTANT: we must leave one free slot in the ring, + * so move nic_i back by one unit + */ + nic_i = nm_prev(nic_i, lim); + ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); + return (0); +} + /* * Reconcile kernel and user view of the transmit ring. * @@ -941,18 +1111,20 @@ struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; uint32_t nm_i; /* index into the netmap ring */ - uint32_t nic_i, nic_i_start; /* index into the NIC ring */ + uint32_t nic_i; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - struct if_rxd_info ri; - struct if_rxd_update iru; + struct if_rxd_info *ri; + struct if_rxd_update *iru; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; + ri = &rxq->ifr_ri; + iru = &rxq->ifr_iru; if (head > lim) return netmap_ring_reinit(kring); @@ -988,14 +1160,14 @@ nm_i = netmap_idx_n2k(kring, nic_i); avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { - rxd_info_zero(&ri); - ri.iri_frags = rxq->ifr_frags; - ri.iri_qsidx = kring->ring_id; - ri.iri_ifp = ctx->ifc_ifp; - ri.iri_cidx = nic_i; + rxd_info_zero(ri); + ri->iri_frags = rxq->ifr_frags; + ri->iri_qsidx = kring->ring_id; + ri->iri_ifp = ctx->ifc_ifp; + ri->iri_cidx = nic_i; - error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); - ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; + error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, ri); + ring->slot[nm_i].len = error ? 0 : ri->iri_len - crclen; ring->slot[nm_i].flags = slot_flags; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, @@ -1028,63 +1200,7 @@ if (nm_i == head) return (0); - iru.iru_paddrs = fl->ifl_bus_addrs; - iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; - iru.iru_idxs = fl->ifl_rxd_idxs; - iru.iru_qsidx = rxq->ifr_id; - iru.iru_buf_size = fl->ifl_buf_size; - iru.iru_flidx = fl->ifl_id; - nic_i_start = nic_i = netmap_idx_k2n(kring, nm_i); - for (i = 0; nm_i != head; i++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); - - if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ - goto ring_reset; - - fl->ifl_vm_addrs[i] = addr; - if (fl->ifl_sds.ifsd_map && (slot->flags & NS_BUF_CHANGED)) { - /* buffer has changed, reload map */ - netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], addr); - } - slot->flags &= ~NS_BUF_CHANGED; - - nm_i = nm_next(nm_i, lim); - fl->ifl_rxd_idxs[i] = nic_i = nm_next(nic_i, lim); - if (nm_i != head && i < IFLIB_MAX_RX_REFRESH) - continue; - - iru.iru_pidx = nic_i_start; - iru.iru_count = i; - i = 0; - ctx->isc_rxd_refill(ctx->ifc_softc, &iru); - if (fl->ifl_sds.ifsd_map == NULL) { - nic_i_start = nic_i; - continue; - } - nic_i = nic_i_start; - for (n = 0; n < iru.iru_count; n++) { - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], - BUS_DMASYNC_PREREAD); - nic_i = nm_next(nic_i, lim); - } - nic_i_start = nic_i; - } - kring->nr_hwcur = head; - - if (fl->ifl_sds.ifsd_map) - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); - return 0; - -ring_reset: - return netmap_ring_reinit(kring); + return (netmap_fl_refill(rxq, kring, nm_i, false)); } static void @@ -1093,13 +1209,12 @@ struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; - CTX_LOCK(ctx); + /* XXX - do we need synchronization here?*/ if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } - CTX_UNLOCK(ctx); } @@ -1156,55 +1271,15 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); + struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; struct netmap_slot *slot; - struct if_rxd_update iru; - iflib_fl_t fl; - bus_dmamap_t *map; - int nrxd; - uint32_t i, j, pidx_start; + uint32_t nm_i; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; - fl = &rxq->ifr_fl[0]; - map = fl->ifl_sds.ifsd_map; - nrxd = ctx->ifc_softc_ctx.isc_nrxd[0]; - iru.iru_paddrs = fl->ifl_bus_addrs; - iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; - iru.iru_idxs = fl->ifl_rxd_idxs; - iru.iru_qsidx = rxq->ifr_id; - iru.iru_buf_size = rxq->ifr_fl[0].ifl_buf_size; - iru.iru_flidx = 0; - - for (pidx_start = i = j = 0; i < nrxd; i++, j++) { - int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i); - void *addr; - - fl->ifl_rxd_idxs[j] = i; - addr = fl->ifl_vm_addrs[j] = PNMB(na, slot + sj, &fl->ifl_bus_addrs[j]); - if (map) { - netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, *map, addr); - map++; - } - - if (j < IFLIB_MAX_RX_REFRESH && i < nrxd - 1) - continue; - - iru.iru_pidx = pidx_start; - pidx_start = i; - iru.iru_count = j; - j = 0; - MPASS(pidx_start + j <= nrxd); - /* Update descriptors and the cached value */ - ctx->isc_rxd_refill(ctx->ifc_softc, &iru); - } - /* preserve queue */ - if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; - int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); - ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t); - } else - ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1); + nm_i = netmap_idx_n2k(kring, 0); + netmap_fl_refill(rxq, kring, nm_i, true); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) @@ -1226,8 +1301,17 @@ { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } +static __inline void +prefetch2(void *x) +{ + __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); +#if (CACHE_LINE_SIZE < 128) + __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); +#endif +} #else #define prefetch(x) +#define prefetch2(x) #endif static void @@ -1343,6 +1427,25 @@ iflib_dma_free(*dmaiter); } +static void +txq_validate(iflib_txq_t txq) { +#ifdef INVARIANTS + uint32_t cidx = txq->ift_cidx; + struct mbuf **ifsd_m = txq->ift_sds.ifsd_m; + if (txq->ift_pidx > cidx) { + int i; + for (i = txq->ift_pidx; i < txq->ift_size; i++) + MPASS(ifsd_m[i] == NULL); + for (i = 0; i < cidx; i++) + MPASS(ifsd_m[i] == NULL); + } else if (txq->ift_pidx < cidx) { + int i; + for (i = txq->ift_pidx; i < cidx; i++) + MPASS(ifsd_m[i] == NULL); + } +#endif +} + #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else @@ -1371,6 +1474,7 @@ { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; + if (!iflib_started) return (FILTER_HANDLED); @@ -1383,6 +1487,35 @@ } static int +iflib_fast_intr_rx(void *arg) +{ + iflib_filter_info_t info = arg; + struct grouptask *gtask = info->ifi_task; + iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; + if_ctx_t ctx; + int cidx; + + if (!iflib_started) + return (FILTER_HANDLED); + + DBG_COUNTER_INC(fast_intrs); + if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) + return (FILTER_HANDLED); + + ctx = rxq->ifr_ctx; + if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) + cidx = rxq->ifr_cq_cidx; + else + cidx = rxq->ifr_fl[0].ifl_cidx; + if (iflib_rxd_avail(ctx, rxq, cidx, 1)) + GROUPTASK_ENQUEUE(gtask); + else + IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + return (FILTER_HANDLED); +} + + +static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; @@ -1398,11 +1531,10 @@ if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); + ctx = rxq->ifr_ctx; for (i = 0; i < rxq->ifr_ntxqirq; i++) { qidx_t txqid = rxq->ifr_txqid[i]; - ctx = rxq->ifr_ctx; - if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; @@ -1974,20 +2106,33 @@ if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); *sd_flags = 0; + } else if (*sd_flags & RX_NETMAP_INUSE) { + if (fl->ifl_sds.ifsd_map != NULL) { + bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; + bus_dmamap_unload(fl->ifl_desc_tag, sd_map); + bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); + } + *sd_flags = 0; + MPASS(*sd_cl == NULL); + MPASS(*sd_m == NULL); } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } + #if MEMORY_LOGGING - fl->ifl_m_dequeued++; - fl->ifl_cl_dequeued++; + if (*sd_m != NULL) + fl->ifl_m_dequeued++; + if (*sd_cl != NULL) + fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { - MPASS(fl->ifl_sds.ifsd_flags[i] == 0); + KASSERT(fl->ifl_sds.ifsd_flags[i] == 0, ("fl->ifl_sds.ifsd_flags[%d]=0x%x, expected 0", + i, fl->ifl_sds.ifsd_flags[i])); MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } @@ -2011,7 +2156,7 @@ if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; - bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size); + bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size-1); /* ** Free current RX buffer structs and their mbufs */ @@ -2090,6 +2235,19 @@ } } +/* CONFIG context only */ +static void +iflib_handle_hang(if_ctx_t ctx, void *arg __unused) +{ + + CTX_LOCK(ctx); + if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); + IFDI_WATCHDOG_RESET(ctx); + ctx->ifc_watchdog_events++; + iflib_if_init_locked(ctx); + CTX_UNLOCK(ctx); +} + /* * MI independent logic * @@ -2097,46 +2255,49 @@ static void iflib_timer(void *arg) { - iflib_txq_t txq = arg; + iflib_txq_t txq_i, txq = arg; if_ctx_t ctx = txq->ift_ctx; - if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; + /* handle any laggards */ + if (txq->ift_db_pending) + GROUPTASK_ENQUEUE(&txq->ift_task); + IFDI_TIMER(ctx, txq->ift_id); + + if (ifmp_ring_is_stalled(txq->ift_br) && + txq->ift_cleaned_prev == txq->ift_cleaned) + txq->ift_stall_count++; + txq->ift_cleaned_prev = txq->ift_cleaned; + if (txq->ift_stall_count > 2) { + txq->ift_qstatus = IFLIB_QUEUE_HUNG; + device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", + txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); + } + if (txq->ift_id != 0) { + if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); + return; + } /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ - IFDI_TIMER(ctx, txq->ift_id); - if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && - ((txq->ift_cleaned_prev == txq->ift_cleaned) || - (sctx->isc_pause_frames == 0))) - goto hung; + txq_i = ctx->ifc_txqs; + for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq_i++) { + if (txq_i->ift_qstatus == IFLIB_QUEUE_HUNG) { + iflib_config_async_gtask_dispatch(ctx, iflib_handle_hang, "hang handler", txq); + /* init will reset the callout */ + return; + } + } - if (ifmp_ring_is_stalled(txq->ift_br)) - txq->ift_qstatus = IFLIB_QUEUE_HUNG; - txq->ift_cleaned_prev = txq->ift_cleaned; - /* handle any laggards */ - if (txq->ift_db_pending) - GROUPTASK_ENQUEUE(&txq->ift_task); - sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) - callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); - return; -hung: - CTX_LOCK(ctx); - if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); - device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", - txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); - - IFDI_WATCHDOG_RESET(ctx); - ctx->ifc_watchdog_events++; - - ctx->ifc_flags |= IFC_DO_RESET; - iflib_admin_intr_deferred(ctx); - CTX_UNLOCK(ctx); + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); } static void @@ -2148,8 +2309,10 @@ iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; - int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; + int i, j, tx_ip_csum_flags, tx_ip6_csum_flags, running, reset; + running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); + reset = !!(ctx->ifc_flags & IFC_DO_RESET); if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); @@ -2173,19 +2336,20 @@ CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } - for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { - MPASS(rxq->ifr_id == i); - iflib_netmap_rxq_init(ctx, rxq); - } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); + if (!running && reset) + return; for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ - if (if_getcapenable(ifp) & IFCAP_NETMAP) + if (if_getcapenable(ifp) & IFCAP_NETMAP) { + MPASS(rxq->ifr_id == i); + iflib_netmap_rxq_init(ctx, rxq); continue; + } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); @@ -2198,10 +2362,11 @@ IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) - callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, - txq->ift_timer.c_cpu); + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); } +/* CONFIG context only */ static int iflib_media_change(if_t ifp) { @@ -2215,17 +2380,19 @@ return (err); } +/* CONFIG context only */ static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); + iflib_admin_intr_deferred(ctx); CTX_LOCK(ctx); - IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } +/* CONFIG context only */ static void iflib_stop(if_ctx_t ctx) { @@ -2240,9 +2407,7 @@ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); - DELAY(1000); IFDI_STOP(ctx); - DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ @@ -2255,11 +2420,13 @@ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } - txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; - txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; + /* XXX please rewrite to simply bzero this range */ + txq->ift_processed = txq->ift_cleaned = txq->ift_cleaned_prev = 0; + txq->ift_stall_count = txq->ift_cidx_processed = 0; + txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; - txq->ift_pullups = 0; + txq->ift_no_desc_avail = txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); @@ -2402,6 +2569,9 @@ } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; +#if MEMORY_LOGGING + sd->ifsd_fl->ifl_cl_dequeued++; +#endif /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); @@ -2471,20 +2641,12 @@ * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ - struct mbuf *m, *mh, *mt; + struct mbuf *m, *mh, *mt, *mf; ifp = ctx->ifc_ifp; -#ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - u_int work = 0; - if (netmap_rx_irq(ifp, rxq->ifr_id, &work)) - return (FALSE); - } -#endif - mh = mt = NULL; MPASS(budget > 0); - rx_pkts = rx_bytes = 0; + rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else @@ -2547,11 +2709,14 @@ } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) - __iflib_fl_refill_lt(ctx, fl, budget + 8); + __iflib_fl_refill_lt(ctx, fl, 2*budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); + mt = mf = NULL; while (mh != NULL) { m = mh; + if (mf == NULL) + mf = m; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT @@ -2561,15 +2726,25 @@ rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) - if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) + if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) { + if (mf == m) + mf = NULL; continue; + } #endif + if (mt != NULL) + mt->m_nextpkt = m; + mt = m; + } + if (mf != NULL) { + ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); - ifp->if_input(ifp, m); } - if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); - if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); + if (rx_pkts) { + if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); + } /* * Flush any outstanding LRO work @@ -2577,14 +2752,9 @@ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif - if (avail) - return true; - return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); + return (avail || iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: - CTX_LOCK(ctx); - ctx->ifc_flags |= IFC_DO_RESET; - iflib_admin_intr_deferred(ctx); - CTX_UNLOCK(ctx); + iflib_admin_reset_deferred(ctx); return (false); } @@ -2671,20 +2841,19 @@ static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { - if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; + if_ctx_t ctx = txq->ift_ctx; +#ifdef INET + if_shared_ctx_t sctx = ctx->ifc_sctx; +#endif + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; struct ether_vlan_header *eh; struct mbuf *m, *n; + int err; + if (scctx->isc_txrx->ift_txd_errata && + (err = scctx->isc_txrx->ift_txd_errata(ctx->ifc_softc, mp))) + return (err); n = m = *mp; - if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && - M_WRITABLE(m) == 0) { - if ((m = m_dup(m, M_NOWAIT)) == NULL) { - return (ENOMEM); - } else { - m_freem(*mp); - n = *mp = m; - } - } /* * Determine where frame payload starts. @@ -2705,6 +2874,10 @@ pi->ipi_ehdrlen = ETHER_HDR_LEN; } + if (if_getmtu(txq->ift_ctx->ifc_ifp) >= pi->ipi_len) { + pi->ipi_csum_flags &= ~(CSUM_IP_TSO|CSUM_IP6_TSO); + } + switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: @@ -2749,21 +2922,21 @@ pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; - if (pi->ipi_csum_flags & CSUM_IP) + if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; - if (pi->ipi_ipproto == IPPROTO_TCP) { - if (__predict_false(th == NULL)) { - txq->ift_pullups++; - if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) - return (ENOMEM); - th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); - } - pi->ipi_tcp_hflags = th->th_flags; - pi->ipi_tcp_hlen = th->th_off << 2; - pi->ipi_tcp_seq = th->th_seq; - } if (IS_TSO4(pi)) { + if (pi->ipi_ipproto == IPPROTO_TCP) { + if (__predict_false(th == NULL)) { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) + return (ENOMEM); + th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); + } + pi->ipi_tcp_hflags = th->th_flags; + pi->ipi_tcp_hlen = th->th_off << 2; + pi->ipi_tcp_seq = th->th_seq; + } if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, @@ -2794,15 +2967,15 @@ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; - if (pi->ipi_ipproto == IPPROTO_TCP) { - if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { - if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) - return (ENOMEM); - } - pi->ipi_tcp_hflags = th->th_flags; - pi->ipi_tcp_hlen = th->th_off << 2; - } if (IS_TSO6(pi)) { + if (pi->ipi_ipproto == IPPROTO_TCP) { + if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { + if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) + return (ENOMEM); + } + pi->ipi_tcp_hflags = th->th_flags; + pi->ipi_tcp_hlen = th->th_off << 2; + } if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); @@ -2911,9 +3084,9 @@ ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; - if (map != NULL) { + MPASS(ifsd_m[pidx] == NULL); + if (force_busdma || map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; - err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) @@ -3066,7 +3239,8 @@ next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } - } else if (txq->ift_sds.ifsd_map != NULL) + } + if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[pidx]; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { @@ -3079,18 +3253,19 @@ m_head = *m_headp; pkt_info_zero(&pi); - pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); - pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; - pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; + pi.ipi_len = m_head->m_pkthdr.len; + pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; + pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; + pi.ipi_hdr_data = mtod(m_head, caddr_t); } retry: @@ -3267,6 +3442,7 @@ gen = 0; } } + txq_validate(txq); txq->ift_cidx = cidx; txq->ift_gen = gen; } @@ -3316,10 +3492,10 @@ prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { - prefetch(&items[next]); - prefetch(items[(cidx + offset + 1) & (size-1)]); - prefetch(items[(cidx + offset + 2) & (size-1)]); - prefetch(items[(cidx + offset + 3) & (size-1)]); + prefetch2(&items[next]); + prefetch2(items[(cidx + offset + 1) & (size-1)]); + prefetch2(items[(cidx + offset + 2) & (size-1)]); + prefetch2(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } @@ -3500,7 +3676,7 @@ #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; - if ((ifp->if_capenable & IFCAP_NETMAP)) { + if (if_getcapenable(ifp) & IFCAP_NETMAP) { if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); @@ -3508,8 +3684,7 @@ } if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); - else - ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); + ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { @@ -3525,6 +3700,7 @@ if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; + uint16_t budget; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; @@ -3532,7 +3708,19 @@ DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; - if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) { + more = true; +#ifdef DEV_NETMAP + if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { + u_int work = 0; + if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { + more = false; + } + } +#endif + budget = ctx->ifc_sysctl_rx_budget; + if (budget == 0) + budget = 16; /* XXX */ + if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { @@ -3547,43 +3735,44 @@ GROUPTASK_ENQUEUE(&rxq->ifr_task); } +/* CONFIG context only */ static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; - int i; - - if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { - if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { - return; - } - } + int i, running; CTX_LOCK(ctx); + running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); + for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } - IFDI_UPDATE_ADMIN_STATUS(ctx); - for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) - callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); - IFDI_LINK_INTR_ENABLE(ctx); + if (running) { + for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); + IFDI_LINK_INTR_ENABLE(ctx); + } if (ctx->ifc_flags & IFC_DO_RESET) { - ctx->ifc_flags &= ~IFC_DO_RESET; iflib_if_init_locked(ctx); + ctx->ifc_flags &= ~IFC_DO_RESET; } + IFDI_UPDATE_ADMIN_STATUS(ctx); CTX_UNLOCK(ctx); - if (LINK_ACTIVE(ctx) == 0) + if (LINK_ACTIVE(ctx) == 0 || !running) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } +/* CONFIG context only */ static void _task_fn_iov(void *context) { @@ -3698,21 +3887,20 @@ DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); + GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { - GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); - } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) { - GROUPTASK_ENQUEUE(&txq->ift_task); } return (err); } +/* CONFIG context only */ static void iflib_if_qflush(if_t ifp) { @@ -3796,29 +3984,12 @@ CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: - CTX_LOCK(ctx); - if (if_getflags(ifp) & IFF_UP) { - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); - } - } else - reinit = 1; - } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - iflib_stop(ctx); - } - ctx->ifc_if_flags = if_getflags(ifp); - CTX_UNLOCK(ctx); + err = async_if_ioctl(ctx, command, data); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - CTX_LOCK(ctx); - IFDI_INTR_DISABLE(ctx); - IFDI_MULTI_SET(ctx); - IFDI_INTR_ENABLE(ctx); - CTX_UNLOCK(ctx); + err = async_if_ioctl(ctx, command, data); } break; case SIOCSIFMEDIA: @@ -3912,6 +4083,7 @@ * **********************************************************************/ +/* CONFIG context only */ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { @@ -3931,6 +4103,7 @@ CTX_UNLOCK(ctx); } +/* CONFIG context only */ static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { @@ -3950,6 +4123,7 @@ CTX_UNLOCK(ctx); } +/* CONFIG context only */ static void iflib_led_func(void *arg, int onoff) { @@ -4094,8 +4268,10 @@ scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } - - if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { + CTX_LOCK(ctx); + err = IFDI_ATTACH_PRE(ctx); + CTX_UNLOCK(ctx); + if (err) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } @@ -4123,6 +4299,8 @@ /* set unconditionally for !x86 */ ctx->ifc_flags |= IFC_DMAR; #endif + if (force_busdma) + ctx->ifc_flags |= IFC_DMAR; msix_bar = scctx->isc_msix_bar; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; @@ -4135,6 +4313,7 @@ if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); + err = EINVAL; goto fail; } @@ -4173,7 +4352,7 @@ GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ - taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); + taskqgroup_attach(qgroup_if_config, &ctx->ifc_admin_task, ctx, -1, "admin"); /* ** Now setup MSI or MSI/X, should ** return us the number of supported @@ -4232,7 +4411,10 @@ } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); - if ((err = IFDI_ATTACH_POST(ctx)) != 0) { + CTX_LOCK(ctx); + err = IFDI_ATTACH_POST(ctx); + CTX_UNLOCK(ctx); + if (err) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } @@ -4244,6 +4426,7 @@ if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); + iflib_ctx_insert(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); fail_detach: @@ -4254,7 +4437,9 @@ fail_queues: /* XXX free queues */ fail: + CTX_LOCK(ctx); IFDI_DETACH(ctx); + CTX_UNLOCK(ctx); return (err); } @@ -4302,12 +4487,10 @@ iflib_netmap_detach(ifp); ether_ifdetach(ifp); - /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ - CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) @@ -4321,13 +4504,16 @@ free(fl->ifl_rx_bitmap, M_IFLIB); } - tqg = qgroup_if_config_tqg; + tqg = qgroup_if_config; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); + CTX_LOCK(ctx); IFDI_DETACH(ctx); + CTX_UNLOCK(ctx); + CTX_LOCK_DESTROY(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); @@ -4348,6 +4534,7 @@ iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); + iflib_ctx_remove(ctx); free(ctx, M_IFLIB); return (0); } @@ -4443,13 +4630,14 @@ * **********************************************************************/ -/* - * - Start a fast taskqueue thread for each core - * - Start a taskqueue for control operations - */ static int iflib_module_init(void) { + + iflib_timer_int = hz / 2; + TUNABLE_INT_FETCH("net.iflib.timer_int", &iflib_timer_int); + LIST_INIT(&ctx_list); + mtx_init(&ctx_list_lock, "ctx list", NULL, MTX_DEF); return (0); } @@ -4893,25 +5081,124 @@ return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } +#ifdef SMP static int -find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid) +find_nth(if_ctx_t ctx, int qid) { + cpuset_t cpus; int i, cpuid, eqid, count; - CPU_COPY(&ctx->ifc_cpus, cpus); + CPU_COPY(&ctx->ifc_cpus, &cpus); count = CPU_COUNT(&ctx->ifc_cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { - cpuid = CPU_FFS(cpus); + cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); - CPU_CLR(cpuid-1, cpus); + CPU_CLR(cpuid-1, &cpus); } - cpuid = CPU_FFS(cpus); + cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } +static int +find_child_with_core(int cpu, struct cpu_group *grp) +{ + int i; + + if (grp->cg_children == 0) + return -1; + + MPASS(grp->cg_child); + for (i = 0; i < grp->cg_children; i++) { + if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) + return i; + } + + return -1; +} + +/* + * Find the nth thread on the specified core + */ +static int +find_thread(int cpu, int thread_num) +{ + struct cpu_group *grp; + int i; + cpuset_t cs; + + grp = smp_topo(); + if (grp == NULL) + return cpu; + i = 0; + while ((i = find_child_with_core(cpu, grp)) != -1) { + /* If the child only has one cpu, don't descend */ + if (grp->cg_child[i].cg_count <= 1) + break; + grp = &grp->cg_child[i]; + } + + /* If they don't share at least an L2 cache, use the same CPU */ + if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) + return cpu; + + /* Now pick one */ + CPU_COPY(&grp->cg_mask, &cs); + for (i = thread_num % grp->cg_count; i > 0; i--) { + MPASS(CPU_FFS(&cs)); + CPU_CLR(CPU_FFS(&cs) - 1, &cs); + } + MPASS(CPU_FFS(&cs)); + return CPU_FFS(&cs) - 1; +} + +static int +get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) +{ + switch (type) { + case IFLIB_INTR_TX: + /* TX queues get threads on the same core as the corresponding RX queue */ + /* XXX handle multiple RX threads per core and more than two threads per core */ + return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; + case IFLIB_INTR_RX: + case IFLIB_INTR_RXTX: + /* RX queues get the first thread on their core */ + return qid / CPU_COUNT(&ctx->ifc_cpus); + default: + return -1; + } +} +#else +#define get_thread_num(ctx, type, qid) 0 +#define find_thread(cpuid, tid) 0 +#define find_nth(ctx, gid) 0 +#endif + +/* Just to avoid copy/paste */ +static inline int +iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, + struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name) +{ + int cpuid; + int err, tid; + + cpuid = find_nth(ctx, qid); + tid = get_thread_num(ctx, type, qid); + MPASS(tid >= 0); + cpuid = find_thread(cpuid, tid); + err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); + if (err) { + device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); + return (err); + } + if (cpuid > ctx->ifc_cpuid_highest) + ctx->ifc_cpuid_highest = cpuid; + MPASS(gtask->gt_taskqueue != NULL); + return 0; +} + int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, @@ -4920,9 +5207,8 @@ struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; - cpuset_t cpus; gtask_fn_t *fn; - int tqrid, err, cpuid; + int tqrid, err; driver_filter_t *intr_fast; void *q; @@ -4935,7 +5221,7 @@ q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); @@ -4944,16 +5230,16 @@ q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_rx; - intr_fast = iflib_fast_intr; + intr_fast = iflib_fast_intr_rx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); @@ -4963,7 +5249,7 @@ tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; - tqg = qgroup_if_config_tqg; + tqg = qgroup_if_config; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; @@ -4985,8 +5271,9 @@ return (0); if (tqrid != -1) { - cpuid = find_nth(ctx, &cpus, qid); - taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name); + err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); + if (err) + return (err); } else { taskqgroup_attach(tqg, gtask, q, tqrid, name); } @@ -5001,24 +5288,25 @@ struct taskqgroup *tqg; gtask_fn_t *fn; void *q; + int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_tx; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_rx; break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; - tqg = qgroup_if_config_tqg; + tqg = qgroup_if_config; rid = -1; fn = _task_fn_iov; break; @@ -5026,7 +5314,14 @@ panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); - taskqgroup_attach(tqg, gtask, q, rid, name); + if (rid != -1) { + err = iflib_irq_set_affinity(ctx, rid, type, qid, gtask, tqg, q, name); + if (err) + taskqgroup_attach(tqg, gtask, q, rid, name); + } + else { + taskqgroup_attach(tqg, gtask, q, rid, name); + } } void @@ -5056,7 +5351,7 @@ q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; @@ -5073,7 +5368,7 @@ taskqgroup_attach(tqg, gtask, q, tqrid, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); - taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx"); + taskqgroup_attach(qgroup_if_io, &txq->ift_task, txq, tqrid, "tx"); return (0); } @@ -5106,12 +5401,28 @@ struct grouptask *gtask; gtask = &ctx->ifc_admin_task; - MPASS(gtask->gt_taskqueue != NULL); + MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } +/* CONFIG context only */ +static void +iflib_handle_reset(if_ctx_t ctx, void *arg) +{ + CTX_LOCK(ctx); + ctx->ifc_flags |= IFC_DO_RESET; + iflib_admin_intr_deferred(ctx); + CTX_UNLOCK(ctx); +} + +static void +iflib_admin_reset_deferred(if_ctx_t ctx) +{ + iflib_config_async_gtask_dispatch(ctx, iflib_handle_reset, "reset handler", NULL); +} + void iflib_iov_intr_deferred(if_ctx_t ctx) { @@ -5123,7 +5434,7 @@ iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { - taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); + taskqgroup_attach_cpu(qgroup_if_io, gt, uniq, cpu, -1, name); } void @@ -5132,14 +5443,104 @@ { GROUPTASK_INIT(gtask, 0, fn, ctx); - taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); + taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name); } +static void +iflib_multi_set(if_ctx_t ctx, void *arg) +{ + CTX_LOCK(ctx); + IFDI_INTR_DISABLE(ctx); + IFDI_MULTI_SET(ctx); + IFDI_INTR_ENABLE(ctx); + CTX_UNLOCK(ctx); +} + +static void +iflib_flags_set(if_ctx_t ctx, void *arg) +{ + int reinit, err; + if_t ifp = ctx->ifc_ifp; + + err = reinit = 0; + CTX_LOCK(ctx); + if (if_getflags(ifp) & IFF_UP) { + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { + if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) { + err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); + } + } else + reinit = 1; + } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { + iflib_stop(ctx); + } + ctx->ifc_if_flags = if_getflags(ifp); + if (reinit) + iflib_if_init_locked(ctx); + CTX_UNLOCK(ctx); + if (err) + log(LOG_WARNING, "IFDI_PROMISC_SET returned %d\n", err); +} + +static void +async_gtask(void *ctx) +{ + struct async_task_arg *at_arg = ctx; + if_ctx_t if_ctx = at_arg->ata_ctx; + void *arg = at_arg->ata_arg; + + at_arg->ata_fn(if_ctx, arg); + taskqgroup_detach(qgroup_if_config, at_arg->ata_gtask); + free(at_arg->ata_gtask, M_IFLIB); +} + +static int +iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg) +{ + struct grouptask *gtask; + struct async_task_arg *at_arg; + + if ((gtask = malloc(sizeof(struct grouptask) + sizeof(struct async_task_arg), M_IFLIB, M_NOWAIT|M_ZERO)) == NULL) + return (ENOMEM); + + at_arg = (struct async_task_arg *)(gtask + 1); + at_arg->ata_fn = fn; + at_arg->ata_ctx = ctx; + at_arg->ata_arg = arg; + at_arg->ata_gtask = gtask; + + GROUPTASK_INIT(gtask, 0, async_gtask, at_arg); + taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name); + GROUPTASK_ENQUEUE(gtask); + return (0); +} + +static int +async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data) +{ + int rc; + + switch (command) { + case SIOCADDMULTI: + case SIOCDELMULTI: + rc = iflib_config_async_gtask_dispatch(ctx, iflib_multi_set, "async_if_multi", NULL); + break; + case SIOCSIFFLAGS: + rc = iflib_config_async_gtask_dispatch(ctx, iflib_flags_set, "async_if_flags", NULL); + break; + default: + panic("unknown command %lx", command); + } + return (rc); +} + + void iflib_config_gtask_deinit(struct grouptask *gtask) { - taskqgroup_detach(qgroup_if_config_tqg, gtask); + taskqgroup_detach(qgroup_if_config, gtask); } void @@ -5206,11 +5607,11 @@ info, 0, iflib_sysctl_int_delay, "I", description); } -struct mtx * +struct sx * iflib_ctx_lock_get(if_ctx_t ctx) { - return (&ctx->ifc_mtx); + return (&ctx->ifc_sx); } static int @@ -5330,13 +5731,22 @@ rx_queues = min(rx_queues, tx_queues); } - device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); + device_printf(dev, "trying %d rx queues %d tx queues \n", rx_queues, tx_queues); - vectors = rx_queues + admincnt; + vectors = tx_queues + rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; + + if (vectors < tx_queues + rx_queues + admincnt) { + vectors -= admincnt; + if (vectors % 2 != 0) + vectors -= 1; + if (rx_queues > vectors / 2) + rx_queues = vectors / 2; + tx_queues = vectors - rx_queues; + } scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; @@ -5471,9 +5881,12 @@ SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", + SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSIX (default 0)"); + SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", + CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, + "set the rx budget"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", @@ -5484,6 +5897,10 @@ CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); + + SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "watchdog_events", + CTLFLAG_RD, &ctx->ifc_watchdog_events, 0, + "Watchdog events seen since load"); } static void Index: sys/net/mp_ring.c =================================================================== --- sys/net/mp_ring.c +++ sys/net/mp_ring.c @@ -226,11 +226,15 @@ if (cidx != pidx && pending < 64 && total < budget) continue; critical_enter(); - do { + os.state = ns.state = r->state; + ns.cidx = cidx; + ns.flags = state_to_flags(ns, total >= budget); + while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0) { + cpu_spinwait(); os.state = ns.state = r->state; ns.cidx = cidx; ns.flags = state_to_flags(ns, total >= budget); - } while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0); + } critical_exit(); if (ns.flags == ABDICATED) @@ -454,18 +458,12 @@ do { os.state = ns.state = r->state; ns.pidx_tail = pidx_stop; - ns.flags = BUSY; + if (os.flags == IDLE) + ns.flags = ABDICATED; } while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0); critical_exit(); counter_u64_add(r->enqueues, n); - /* - * Turn into a consumer if some other thread isn't active as a consumer - * already. - */ - if (os.flags != BUSY) - drain_ring_lockless(r, ns, os.flags, budget); - return (0); } #endif @@ -476,7 +474,9 @@ union ring_state os, ns; os.state = r->state; - if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0) + if ((os.flags != STALLED && os.flags != ABDICATED) || // Only continue in STALLED and ABDICATED + os.pidx_head != os.pidx_tail || // Require work to be available + (os.flags != ABDICATED && r->can_drain(r) == 0)) // Can either drain, or everyone left return; MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */ Index: sys/sys/gtaskqueue.h =================================================================== --- sys/sys/gtaskqueue.h +++ sys/sys/gtaskqueue.h @@ -58,7 +58,9 @@ void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); struct taskqgroup *taskqgroup_create(char *name); void taskqgroup_destroy(struct taskqgroup *qgroup); -int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); +int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); +int taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); +void taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)); #define TASK_ENQUEUED 0x1 #define TASK_SKIP_WAKEUP 0x2 @@ -80,27 +82,40 @@ #define TASKQGROUP_DECLARE(name) \ extern struct taskqgroup *qgroup_##name -#define TASKQGROUP_DEFINE(name, cnt, stride) \ + +#define TASKQGROUP_DEFINE(name, cnt, stride, intr, pri) \ \ struct taskqgroup *qgroup_##name; \ \ static void \ -taskqgroup_define_##name(void *arg) \ +taskqgroup_adjust_##name(void *arg) \ { \ - qgroup_##name = taskqgroup_create(#name); \ + int max = (intr) ? 1 : (cnt); \ + if (arg != NULL) { \ + uintptr_t maxcpu = (uintptr_t) arg; \ + max = maxcpu; \ + } \ + \ + taskqgroup_adjust_once(qgroup_##name, max, (stride), (intr), (pri)); \ } \ \ -SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST, \ - taskqgroup_define_##name, NULL); \ +SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ + taskqgroup_adjust_##name, NULL); \ \ static void \ -taskqgroup_adjust_##name(void *arg) \ +taskqgroup_define_##name(void *arg) \ { \ - taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ + qgroup_##name = taskqgroup_create(#name); \ + taskqgroup_set_adjust(qgroup_##name, taskqgroup_adjust_##name); \ } \ - \ -SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ - taskqgroup_adjust_##name, NULL) +SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL) + + + + + + TASKQGROUP_DECLARE(net); TASKQGROUP_DECLARE(softirq);