diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index 42fb7a60ce57..ffc28e2685de 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -1,5330 +1,5330 @@ /*- * Copyright (c) 2020-2023 The FreeBSD Foundation * Copyright (c) 2020-2022 Bjoern A. Zeeb * * This software was developed by Björn Zeeb under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Public functions are called linuxkpi_*(). * Internal (static) functions are called lkpi_*(). * * The internal structures holding metadata over public structures are also * called lkpi_xxx (usually with a member at the end called xxx). * Note: we do not replicate the structure names but the general variable names * for these (e.g., struct hw -> struct lkpi_hw, struct sta -> struct lkpi_sta). * There are macros to access one from the other. * We call the internal versions lxxx (e.g., hw -> lhw, sta -> lsta). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LINUXKPI_NET80211 #include #include #include "linux_80211.h" #define LKPI_80211_WME /* #define LKPI_80211_HW_CRYPTO */ static MALLOC_DEFINE(M_LKPI80211, "lkpi80211", "LinuxKPI 80211 compat"); /* XXX-BZ really want this and others in queue.h */ #define TAILQ_ELEM_INIT(elm, field) do { \ (elm)->field.tqe_next = NULL; \ (elm)->field.tqe_prev = NULL; \ } while (0) /* -------------------------------------------------------------------------- */ /* Keep public for as long as header files are using it too. */ int linuxkpi_debug_80211; #ifdef LINUXKPI_DEBUG_80211 SYSCTL_DECL(_compat_linuxkpi); SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, 80211, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "LinuxKPI 802.11 compatibility layer"); SYSCTL_INT(_compat_linuxkpi_80211, OID_AUTO, debug, CTLFLAG_RWTUN, &linuxkpi_debug_80211, 0, "LinuxKPI 802.11 debug level"); #define UNIMPLEMENTED if (linuxkpi_debug_80211 & D80211_TODO) \ printf("XXX-TODO %s:%d: UNIMPLEMENTED\n", __func__, __LINE__) #define TRACEOK() if (linuxkpi_debug_80211 & D80211_TRACEOK) \ printf("XXX-TODO %s:%d: TRACEPOINT\n", __func__, __LINE__) #else #define UNIMPLEMENTED do { } while (0) #define TRACEOK() do { } while (0) #endif /* #define PREP_TX_INFO_DURATION (IEEE80211_TRANS_WAIT * 1000) */ #ifndef PREP_TX_INFO_DURATION #define PREP_TX_INFO_DURATION 0 /* Let the driver do its thing. */ #endif /* This is DSAP | SSAP | CTRL | ProtoID/OrgCode{3}. */ const uint8_t rfc1042_header[6] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; /* IEEE 802.11-05/0257r1 */ const uint8_t bridge_tunnel_header[6] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; /* IEEE 802.11e Table 20i-UP-to-AC mappings. */ static const uint8_t ieee80211e_up_to_ac[] = { IEEE80211_AC_BE, IEEE80211_AC_BK, IEEE80211_AC_BK, IEEE80211_AC_BE, IEEE80211_AC_VI, IEEE80211_AC_VI, IEEE80211_AC_VO, IEEE80211_AC_VO, #if 0 IEEE80211_AC_VO, /* We treat MGMT as TID 8, which is set as AC_VO */ #endif }; const struct cfg80211_ops linuxkpi_mac80211cfgops = { /* * XXX TODO need a "glue layer" to link cfg80211 ops to * mac80211 and to the driver or net80211. * Can we pass some on 1:1? Need to compare the (*f)(). */ }; static struct lkpi_sta *lkpi_find_lsta_by_ni(struct lkpi_vif *, struct ieee80211_node *); static void lkpi_80211_txq_task(void *, int); static void lkpi_ieee80211_free_skb_mbuf(void *); #ifdef LKPI_80211_WME static int lkpi_wme_update(struct lkpi_hw *, struct ieee80211vap *, bool); #endif static void lkpi_lsta_dump(struct lkpi_sta *lsta, struct ieee80211_node *ni, const char *_f, int _l) { #ifdef LINUXKPI_DEBUG_80211 if ((linuxkpi_debug_80211 & D80211_TRACE_STA) == 0) return; if (lsta == NULL) return; printf("%s:%d lsta %p ni %p sta %p\n", _f, _l, lsta, ni, &lsta->sta); if (ni != NULL) ieee80211_dump_node(NULL, ni); printf("\ttxq_task txq len %d mtx\n", mbufq_len(&lsta->txq)); printf("\tkc %p state %d added_to_drv %d in_mgd %d\n", lsta->kc, lsta->state, lsta->added_to_drv, lsta->in_mgd); #endif } static void lkpi_lsta_remove(struct lkpi_sta *lsta, struct lkpi_vif *lvif) { struct ieee80211_node *ni; IMPROVE("XXX-BZ remove tqe_prev check once ni-sta-state-sync is fixed"); ni = lsta->ni; LKPI_80211_LVIF_LOCK(lvif); if (lsta->lsta_entry.tqe_prev != NULL) TAILQ_REMOVE(&lvif->lsta_head, lsta, lsta_entry); LKPI_80211_LVIF_UNLOCK(lvif); lsta->ni = NULL; ni->ni_drv_data = NULL; if (ni != NULL) ieee80211_free_node(ni); IMPROVE("more from lkpi_ic_node_free() should happen here."); free(lsta, M_LKPI80211); } static struct lkpi_sta * lkpi_lsta_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], struct ieee80211_hw *hw, struct ieee80211_node *ni) { struct lkpi_sta *lsta; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_sta *sta; int band, i, tid; lsta = malloc(sizeof(*lsta) + hw->sta_data_size, M_LKPI80211, M_NOWAIT | M_ZERO); if (lsta == NULL) return (NULL); lsta->added_to_drv = false; lsta->state = IEEE80211_STA_NOTEXIST; #if 0 /* * This needs to be done in node_init() as ieee80211_alloc_node() * will initialise the refcount after us. */ lsta->ni = ieee80211_ref_node(ni); #endif /* The back-pointer "drv_data" to net80211_node let's us get lsta. */ ni->ni_drv_data = lsta; lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); sta = LSTA_TO_STA(lsta); IEEE80211_ADDR_COPY(sta->addr, mac); /* TXQ */ for (tid = 0; tid < nitems(sta->txq); tid++) { struct lkpi_txq *ltxq; /* We are not limiting ourselves to hw.queues here. */ ltxq = malloc(sizeof(*ltxq) + hw->txq_data_size, M_LKPI80211, M_NOWAIT | M_ZERO); if (ltxq == NULL) goto cleanup; /* iwlwifi//mvm/sta.c::tid_to_mac80211_ac[] */ if (tid == IEEE80211_NUM_TIDS) { if (!ieee80211_hw_check(hw, STA_MMPDU_TXQ)) { free(ltxq, M_LKPI80211); continue; } IMPROVE("AP/if we support non-STA here too"); ltxq->txq.ac = IEEE80211_AC_VO; } else { ltxq->txq.ac = ieee80211e_up_to_ac[tid & 7]; } ltxq->seen_dequeue = false; ltxq->stopped = false; ltxq->txq.vif = vif; ltxq->txq.tid = tid; ltxq->txq.sta = sta; TAILQ_ELEM_INIT(ltxq, txq_entry); skb_queue_head_init(<xq->skbq); sta->txq[tid] = <xq->txq; } /* Deflink information. */ for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *supband; supband = hw->wiphy->bands[band]; if (supband == NULL) continue; for (i = 0; i < supband->n_bitrates; i++) { IMPROVE("Further supband->bitrates[i]* checks?"); /* or should we get them from the ni? */ sta->deflink.supp_rates[band] |= BIT(i); } } sta->deflink.smps_mode = IEEE80211_SMPS_OFF; IMPROVE("ht, vht, he, ... bandwidth, smps_mode, .."); /* bandwidth = IEEE80211_STA_RX_... */ /* Link configuration. */ IEEE80211_ADDR_COPY(sta->deflink.addr, sta->addr); sta->link[0] = &sta->deflink; for (i = 1; i < nitems(sta->link); i++) { IMPROVE("more links; only link[0] = deflink currently."); } /* Deferred TX path. */ mtx_init(&lsta->txq_mtx, "lsta_txq", NULL, MTX_DEF); TASK_INIT(&lsta->txq_task, 0, lkpi_80211_txq_task, lsta); mbufq_init(&lsta->txq, IFQ_MAXLEN); return (lsta); cleanup: for (; tid >= 0; tid--) free(sta->txq[tid], M_LKPI80211); free(lsta, M_LKPI80211); return (NULL); } static enum nl80211_band lkpi_net80211_chan_to_nl80211_band(struct ieee80211_channel *c) { if (IEEE80211_IS_CHAN_2GHZ(c)) return (NL80211_BAND_2GHZ); else if (IEEE80211_IS_CHAN_5GHZ(c)) return (NL80211_BAND_5GHZ); #ifdef __notyet__ else if () return (NL80211_BAND_6GHZ); else if () return (NL80211_BAND_60GHZ); else if (IEEE80211_IS_CHAN_GSM(c)) return (NL80211_BAND_XXX); #endif else panic("%s: unsupported band. c %p flags %#x\n", __func__, c, c->ic_flags); } static uint32_t lkpi_nl80211_band_to_net80211_band(enum nl80211_band band) { /* XXX-BZ this is just silly; net80211 is too convoluted. */ /* IEEE80211_CHAN_A / _G / .. doesn't really work either. */ switch (band) { case NL80211_BAND_2GHZ: return (IEEE80211_CHAN_2GHZ); break; case NL80211_BAND_5GHZ: return (IEEE80211_CHAN_5GHZ); break; case NL80211_BAND_60GHZ: break; case NL80211_BAND_6GHZ: break; default: panic("%s: unsupported band %u\n", __func__, band); break; } IMPROVE(); return (0x00); } #if 0 static enum ieee80211_ac_numbers lkpi_ac_net_to_l80211(int ac) { switch (ac) { case WME_AC_VO: return (IEEE80211_AC_VO); case WME_AC_VI: return (IEEE80211_AC_VI); case WME_AC_BE: return (IEEE80211_AC_BE); case WME_AC_BK: return (IEEE80211_AC_BK); default: printf("%s: invalid WME_AC_* input: ac = %d\n", __func__, ac); return (IEEE80211_AC_BE); } } #endif static enum nl80211_iftype lkpi_opmode_to_vif_type(enum ieee80211_opmode opmode) { switch (opmode) { case IEEE80211_M_IBSS: return (NL80211_IFTYPE_ADHOC); break; case IEEE80211_M_STA: return (NL80211_IFTYPE_STATION); break; case IEEE80211_M_WDS: return (NL80211_IFTYPE_WDS); break; case IEEE80211_M_HOSTAP: return (NL80211_IFTYPE_AP); break; case IEEE80211_M_MONITOR: return (NL80211_IFTYPE_MONITOR); break; case IEEE80211_M_MBSS: return (NL80211_IFTYPE_MESH_POINT); break; case IEEE80211_M_AHDEMO: /* FALLTHROUGH */ default: printf("ERROR: %s: unsupported opmode %d\n", __func__, opmode); /* FALLTHROUGH */ } return (NL80211_IFTYPE_UNSPECIFIED); } #ifdef LKPI_80211_HW_CRYPTO static uint32_t lkpi_l80211_to_net80211_cyphers(uint32_t wlan_cipher_suite) { switch (wlan_cipher_suite) { case WLAN_CIPHER_SUITE_WEP40: return (IEEE80211_CRYPTO_WEP); case WLAN_CIPHER_SUITE_TKIP: return (IEEE80211_CRYPTO_TKIP); case WLAN_CIPHER_SUITE_CCMP: return (IEEE80211_CIPHER_AES_CCM); case WLAN_CIPHER_SUITE_WEP104: return (IEEE80211_CRYPTO_WEP); case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: case WLAN_CIPHER_SUITE_BIP_CMAC_256: printf("%s: unsupported WLAN Cipher Suite %#08x | %u\n", __func__, wlan_cipher_suite >> 8, wlan_cipher_suite & 0xff); break; default: printf("%s: unknown WLAN Cipher Suite %#08x | %u\n", __func__, wlan_cipher_suite >> 8, wlan_cipher_suite & 0xff); } return (0); } static uint32_t lkpi_net80211_to_l80211_cipher_suite(uint32_t cipher, uint8_t keylen) { switch (cipher) { case IEEE80211_CIPHER_TKIP: return (WLAN_CIPHER_SUITE_TKIP); case IEEE80211_CIPHER_AES_CCM: return (WLAN_CIPHER_SUITE_CCMP); case IEEE80211_CIPHER_WEP: if (keylen < 8) return (WLAN_CIPHER_SUITE_WEP40); else return (WLAN_CIPHER_SUITE_WEP104); break; case IEEE80211_CIPHER_AES_OCB: case IEEE80211_CIPHER_TKIPMIC: case IEEE80211_CIPHER_CKIP: case IEEE80211_CIPHER_NONE: printf("%s: unsupported cipher %#010x\n", __func__, cipher); break; default: printf("%s: unknown cipher %#010x\n", __func__, cipher); }; return (0); } #endif #ifdef __notyet__ static enum ieee80211_sta_state lkpi_net80211_state_to_sta_state(enum ieee80211_state state) { /* * XXX-BZ The net80211 states are "try to ..", the lkpi8011 states are * "done". Also ASSOC/AUTHORIZED are both "RUN" then? */ switch (state) { case IEEE80211_S_INIT: return (IEEE80211_STA_NOTEXIST); case IEEE80211_S_SCAN: return (IEEE80211_STA_NONE); case IEEE80211_S_AUTH: return (IEEE80211_STA_AUTH); case IEEE80211_S_ASSOC: return (IEEE80211_STA_ASSOC); case IEEE80211_S_RUN: return (IEEE80211_STA_AUTHORIZED); case IEEE80211_S_CAC: case IEEE80211_S_CSA: case IEEE80211_S_SLEEP: default: UNIMPLEMENTED; }; return (IEEE80211_STA_NOTEXIST); } #endif static struct linuxkpi_ieee80211_channel * lkpi_find_lkpi80211_chan(struct lkpi_hw *lhw, struct ieee80211_channel *c) { struct ieee80211_hw *hw; struct linuxkpi_ieee80211_channel *channels; enum nl80211_band band; int i, nchans; hw = LHW_TO_HW(lhw); band = lkpi_net80211_chan_to_nl80211_band(c); if (hw->wiphy->bands[band] == NULL) return (NULL); nchans = hw->wiphy->bands[band]->n_channels; if (nchans <= 0) return (NULL); channels = hw->wiphy->bands[band]->channels; for (i = 0; i < nchans; i++) { if (channels[i].hw_value == c->ic_ieee) return (&channels[i]); } return (NULL); } static struct linuxkpi_ieee80211_channel * lkpi_get_lkpi80211_chan(struct ieee80211com *ic, struct ieee80211_node *ni) { struct linuxkpi_ieee80211_channel *chan; struct ieee80211_channel *c; struct lkpi_hw *lhw; chan = NULL; if (ni != NULL && ni->ni_chan != IEEE80211_CHAN_ANYC) c = ni->ni_chan; else if (ic->ic_bsschan != IEEE80211_CHAN_ANYC) c = ic->ic_bsschan; else if (ic->ic_curchan != IEEE80211_CHAN_ANYC) c = ic->ic_curchan; else c = NULL; if (c != NULL && c != IEEE80211_CHAN_ANYC) { lhw = ic->ic_softc; chan = lkpi_find_lkpi80211_chan(lhw, c); } return (chan); } struct linuxkpi_ieee80211_channel * linuxkpi_ieee80211_get_channel(struct wiphy *wiphy, uint32_t freq) { enum nl80211_band band; for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *supband; struct linuxkpi_ieee80211_channel *channels; int i; supband = wiphy->bands[band]; if (supband == NULL || supband->n_channels == 0) continue; channels = supband->channels; for (i = 0; i < supband->n_channels; i++) { if (channels[i].center_freq == freq) return (&channels[i]); } } return (NULL); } #ifdef LKPI_80211_HW_CRYPTO static int _lkpi_iv_key_set_delete(struct ieee80211vap *vap, const struct ieee80211_key *k, enum set_key_cmd cmd) { struct ieee80211com *ic; struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_sta *sta; struct ieee80211_node *ni; struct ieee80211_key_conf *kc; int error; /* XXX TODO Check (k->wk_flags & IEEE80211_KEY_SWENCRYPT) and don't upload to driver/hw? */ ic = vap->iv_ic; lhw = ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); memset(&kc, 0, sizeof(kc)); kc = malloc(sizeof(*kc) + k->wk_keylen, M_LKPI80211, M_WAITOK | M_ZERO); kc->cipher = lkpi_net80211_to_l80211_cipher_suite( k->wk_cipher->ic_cipher, k->wk_keylen); kc->keyidx = k->wk_keyix; #if 0 kc->hw_key_idx = /* set by hw and needs to be passed for TX */; #endif atomic64_set(&kc->tx_pn, k->wk_keytsc); kc->keylen = k->wk_keylen; memcpy(kc->key, k->wk_key, k->wk_keylen); switch (kc->cipher) { case WLAN_CIPHER_SUITE_CCMP: kc->iv_len = k->wk_cipher->ic_header; kc->icv_len = k->wk_cipher->ic_trailer; break; case WLAN_CIPHER_SUITE_TKIP: default: IMPROVE(); return (0); }; ni = vap->iv_bss; sta = ieee80211_find_sta(vif, ni->ni_bssid); if (sta != NULL) { struct lkpi_sta *lsta; lsta = STA_TO_LSTA(sta); lsta->kc = kc; } error = lkpi_80211_mo_set_key(hw, cmd, vif, sta, kc); if (error != 0) { /* XXX-BZ leaking kc currently */ ic_printf(ic, "%s: set_key failed: %d\n", __func__, error); return (0); } else { ic_printf(ic, "%s: set_key succeeded: keyidx %u hw_key_idx %u " "flags %#10x\n", __func__, kc->keyidx, kc->hw_key_idx, kc->flags); return (1); } } static int lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) { /* XXX-BZ one day we should replace this iterating over VIFs, or node list? */ return (_lkpi_iv_key_set_delete(vap, k, DISABLE_KEY)); } static int lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) { return (_lkpi_iv_key_set_delete(vap, k, SET_KEY)); } #endif static u_int lkpi_ic_update_mcast_copy(void *arg, struct sockaddr_dl *sdl, u_int cnt) { struct netdev_hw_addr_list *mc_list; struct netdev_hw_addr *addr; KASSERT(arg != NULL && sdl != NULL, ("%s: arg %p sdl %p cnt %u\n", __func__, arg, sdl, cnt)); mc_list = arg; /* If it is on the list already skip it. */ netdev_hw_addr_list_for_each(addr, mc_list) { if (!memcmp(addr->addr, LLADDR(sdl), sdl->sdl_alen)) return (0); } addr = malloc(sizeof(*addr), M_LKPI80211, M_NOWAIT | M_ZERO); if (addr == NULL) return (0); INIT_LIST_HEAD(&addr->addr_list); memcpy(addr->addr, LLADDR(sdl), sdl->sdl_alen); /* XXX this should be a netdev function? */ list_add(&addr->addr_list, &mc_list->addr_list); mc_list->count++; #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) printf("%s:%d: mc_list count %d: added %6D\n", __func__, __LINE__, mc_list->count, addr->addr, ":"); #endif return (1); } static void lkpi_update_mcast_filter(struct ieee80211com *ic, bool force) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct netdev_hw_addr_list mc_list; struct list_head *le, *next; struct netdev_hw_addr *addr; struct ieee80211vap *vap; u64 mc; unsigned int changed_flags, total_flags; lhw = ic->ic_softc; if (lhw->ops->prepare_multicast == NULL || lhw->ops->configure_filter == NULL) return; if (!lhw->update_mc && !force) return; changed_flags = total_flags = 0; mc_list.count = 0; INIT_LIST_HEAD(&mc_list.addr_list); if (ic->ic_allmulti == 0) { TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) if_foreach_llmaddr(vap->iv_ifp, lkpi_ic_update_mcast_copy, &mc_list); } else { changed_flags |= FIF_ALLMULTI; } hw = LHW_TO_HW(lhw); mc = lkpi_80211_mo_prepare_multicast(hw, &mc_list); /* * XXX-BZ make sure to get this sorted what is a change, * what gets all set; what was already set? */ total_flags = changed_flags; lkpi_80211_mo_configure_filter(hw, changed_flags, &total_flags, mc); #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) printf("%s: changed_flags %#06x count %d total_flags %#010x\n", __func__, changed_flags, mc_list.count, total_flags); #endif if (mc_list.count != 0) { list_for_each_safe(le, next, &mc_list.addr_list) { addr = list_entry(le, struct netdev_hw_addr, addr_list); free(addr, M_LKPI80211); mc_list.count--; } } KASSERT(mc_list.count == 0, ("%s: mc_list %p count %d != 0\n", __func__, &mc_list, mc_list.count)); } static enum ieee80211_bss_changed lkpi_update_dtim_tsf(struct ieee80211_vif *vif, struct ieee80211_node *ni, struct ieee80211vap *vap, const char *_f, int _l) { enum ieee80211_bss_changed bss_changed; bss_changed = 0; #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) printf("%s:%d [%s:%d] assoc %d aid %d beacon_int %u " "dtim_period %u sync_dtim_count %u sync_tsf %ju " "sync_device_ts %u bss_changed %#08x\n", __func__, __LINE__, _f, _l, vif->cfg.assoc, vif->cfg.aid, vif->bss_conf.beacon_int, vif->bss_conf.dtim_period, vif->bss_conf.sync_dtim_count, (uintmax_t)vif->bss_conf.sync_tsf, vif->bss_conf.sync_device_ts, bss_changed); #endif if (vif->bss_conf.beacon_int != ni->ni_intval) { vif->bss_conf.beacon_int = ni->ni_intval; /* iwlwifi FW bug workaround; iwl_mvm_mac_sta_state. */ if (vif->bss_conf.beacon_int < 16) vif->bss_conf.beacon_int = 16; bss_changed |= BSS_CHANGED_BEACON_INT; } if (vif->bss_conf.dtim_period != vap->iv_dtim_period && vap->iv_dtim_period > 0) { vif->bss_conf.dtim_period = vap->iv_dtim_period; bss_changed |= BSS_CHANGED_BEACON_INFO; } vif->bss_conf.sync_dtim_count = vap->iv_dtim_count; vif->bss_conf.sync_tsf = le64toh(ni->ni_tstamp.tsf); /* vif->bss_conf.sync_device_ts = set in linuxkpi_ieee80211_rx. */ #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) printf("%s:%d [%s:%d] assoc %d aid %d beacon_int %u " "dtim_period %u sync_dtim_count %u sync_tsf %ju " "sync_device_ts %u bss_changed %#08x\n", __func__, __LINE__, _f, _l, vif->cfg.assoc, vif->cfg.aid, vif->bss_conf.beacon_int, vif->bss_conf.dtim_period, vif->bss_conf.sync_dtim_count, (uintmax_t)vif->bss_conf.sync_tsf, vif->bss_conf.sync_device_ts, bss_changed); #endif return (bss_changed); } static void lkpi_stop_hw_scan(struct lkpi_hw *lhw, struct ieee80211_vif *vif) { struct ieee80211_hw *hw; int error; bool cancel; LKPI_80211_LHW_SCAN_LOCK(lhw); cancel = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); if (!cancel) return; hw = LHW_TO_HW(lhw); IEEE80211_UNLOCK(lhw->ic); LKPI_80211_LHW_LOCK(lhw); /* Need to cancel the scan. */ lkpi_80211_mo_cancel_hw_scan(hw, vif); LKPI_80211_LHW_UNLOCK(lhw); /* Need to make sure we see ieee80211_scan_completed. */ LKPI_80211_LHW_SCAN_LOCK(lhw); if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) error = msleep(lhw, &lhw->scan_mtx, 0, "lhwscanstop", hz/2); cancel = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); IEEE80211_LOCK(lhw->ic); if (cancel) ic_printf(lhw->ic, "%s: failed to cancel scan: %d (%p, %p)\n", __func__, error, lhw, vif); } static void lkpi_hw_conf_idle(struct ieee80211_hw *hw, bool new) { struct lkpi_hw *lhw; int error; bool old; old = hw->conf.flags & IEEE80211_CONF_IDLE; if (old == new) return; hw->conf.flags ^= IEEE80211_CONF_IDLE; error = lkpi_80211_mo_config(hw, IEEE80211_CONF_CHANGE_IDLE); if (error != 0 && error != EOPNOTSUPP) { lhw = HW_TO_LHW(hw); ic_printf(lhw->ic, "ERROR: %s: config %#0x returned %d\n", __func__, IEEE80211_CONF_CHANGE_IDLE, error); } } static void lkpi_disassoc(struct ieee80211_sta *sta, struct ieee80211_vif *vif, struct lkpi_hw *lhw) { sta->aid = 0; if (vif->cfg.assoc) { struct ieee80211_hw *hw; enum ieee80211_bss_changed changed; lhw->update_mc = true; lkpi_update_mcast_filter(lhw->ic, true); changed = 0; vif->cfg.assoc = false; vif->cfg.aid = 0; changed |= BSS_CHANGED_ASSOC; /* * This will remove the sta from firmware for iwlwifi. * So confusing that they use state and flags and ... ^%$%#%$^. */ IMPROVE(); hw = LHW_TO_HW(lhw); lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, changed); lkpi_hw_conf_idle(hw, true); } } static void lkpi_wake_tx_queues(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool dequeue_seen, bool no_emptyq) { struct lkpi_txq *ltxq; int tid; /* Wake up all queues to know they are allocated in the driver. */ for (tid = 0; tid < nitems(sta->txq); tid++) { if (tid == IEEE80211_NUM_TIDS) { IMPROVE("station specific?"); if (!ieee80211_hw_check(hw, STA_MMPDU_TXQ)) continue; } else if (tid >= hw->queues) continue; if (sta->txq[tid] == NULL) continue; ltxq = TXQ_TO_LTXQ(sta->txq[tid]); if (dequeue_seen && !ltxq->seen_dequeue) continue; if (no_emptyq && skb_queue_empty(<xq->skbq)) continue; lkpi_80211_mo_wake_tx_queue(hw, sta->txq[tid]); } } /* -------------------------------------------------------------------------- */ static int lkpi_sta_state_do_nada(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { return (0); } /* lkpi_iv_newstate() handles the stop scan case generally. */ #define lkpi_sta_scan_to_init(_v, _n, _a) lkpi_sta_state_do_nada(_v, _n, _a) static int lkpi_sta_scan_to_auth(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct linuxkpi_ieee80211_channel *chan; struct lkpi_chanctx *lchanctx; struct ieee80211_chanctx_conf *conf; struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; enum ieee80211_bss_changed bss_changed; struct ieee80211_prep_tx_info prep_tx_info; uint32_t changed; int error; chan = lkpi_get_lkpi80211_chan(vap->iv_ic, vap->iv_bss); if (chan == NULL) { ic_printf(vap->iv_ic, "%s: failed to get channel\n", __func__); return (ESRCH); } lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); ni = ieee80211_ref_node(vap->iv_bss); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); /* Add chanctx (or if exists, change it). */ if (vif->chanctx_conf != NULL) { conf = vif->chanctx_conf; lchanctx = CHANCTX_CONF_TO_LCHANCTX(conf); IMPROVE("diff changes for changed, working on live copy, rcu"); } else { /* Keep separate alloc as in Linux this is rcu managed? */ lchanctx = malloc(sizeof(*lchanctx) + hw->chanctx_data_size, M_LKPI80211, M_WAITOK | M_ZERO); conf = &lchanctx->conf; } conf->rx_chains_dynamic = 1; conf->rx_chains_static = 1; conf->radar_enabled = (chan->flags & IEEE80211_CHAN_RADAR) ? true : false; conf->def.chan = chan; conf->def.width = NL80211_CHAN_WIDTH_20_NOHT; conf->def.center_freq1 = chan->center_freq; conf->def.center_freq2 = 0; /* Responder ... */ conf->min_def.chan = chan; conf->min_def.width = NL80211_CHAN_WIDTH_20_NOHT; conf->min_def.center_freq1 = chan->center_freq; conf->min_def.center_freq2 = 0; IMPROVE("currently 20_NOHT only"); /* Set bss info (bss_info_changed). */ bss_changed = 0; vif->bss_conf.bssid = ni->ni_bssid; bss_changed |= BSS_CHANGED_BSSID; vif->bss_conf.txpower = ni->ni_txpower; bss_changed |= BSS_CHANGED_TXPOWER; vif->cfg.idle = false; bss_changed |= BSS_CHANGED_IDLE; /* vif->bss_conf.basic_rates ? Where exactly? */ /* Should almost assert it is this. */ vif->cfg.assoc = false; vif->cfg.aid = 0; bss_changed |= lkpi_update_dtim_tsf(vif, ni, vap, __func__, __LINE__); error = 0; if (vif->chanctx_conf != NULL) { changed = IEEE80211_CHANCTX_CHANGE_MIN_WIDTH; changed |= IEEE80211_CHANCTX_CHANGE_RADAR; changed |= IEEE80211_CHANCTX_CHANGE_RX_CHAINS; changed |= IEEE80211_CHANCTX_CHANGE_WIDTH; lkpi_80211_mo_change_chanctx(hw, conf, changed); } else { error = lkpi_80211_mo_add_chanctx(hw, conf); if (error == 0 || error == EOPNOTSUPP) { vif->bss_conf.chandef.chan = conf->def.chan; vif->bss_conf.chandef.width = conf->def.width; vif->bss_conf.chandef.center_freq1 = conf->def.center_freq1; vif->bss_conf.chandef.center_freq2 = conf->def.center_freq2; } else { ic_printf(vap->iv_ic, "%s:%d: mo_add_chanctx " "failed: %d\n", __func__, __LINE__, error); goto out; } vif->bss_conf.chanctx_conf = conf; /* Assign vif chanctx. */ if (error == 0) error = lkpi_80211_mo_assign_vif_chanctx(hw, vif, &vif->bss_conf, conf); if (error == EOPNOTSUPP) error = 0; if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_assign_vif_chanctx " "failed: %d\n", __func__, __LINE__, error); lkpi_80211_mo_remove_chanctx(hw, conf); lchanctx = CHANCTX_CONF_TO_LCHANCTX(conf); free(lchanctx, M_LKPI80211); goto out; } } IMPROVE("update radiotap chan fields too"); /* RATES */ IMPROVE("bss info: not all needs to come now and rates are missing"); lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed); /* * This is a bandaid for now. If we went through (*iv_update_bss)() * and then removed the lsta we end up here without a lsta and have * to manually allocate and link it in as lkpi_ic_node_alloc()/init() * would normally do. * XXX-BZ I do not like this but currently we have no good way of * intercepting the bss swap and state changes and packets going out * workflow so live with this. It is a compat layer after all. */ if (ni->ni_drv_data == NULL) { lsta = lkpi_lsta_alloc(vap, ni->ni_macaddr, hw, ni); if (lsta == NULL) { error = ENOMEM; ic_printf(vap->iv_ic, "%s:%d: lkpi_lsta_alloc " "failed: %d\n", __func__, __LINE__, error); goto out; } lsta->ni = ieee80211_ref_node(ni); } else { lsta = ni->ni_drv_data; } /* Insert the [l]sta into the list of known stations. */ LKPI_80211_LVIF_LOCK(lvif); TAILQ_INSERT_TAIL(&lvif->lsta_head, lsta, lsta_entry); LKPI_80211_LVIF_UNLOCK(lvif); /* Add (or adjust) sta and change state (from NOTEXIST) to NONE. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_NOTEXIST, ("%s: lsta %p state not " "NOTEXIST: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_NONE); if (error != 0) { IMPROVE("do we need to undo the chan ctx?"); ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(NONE) " "failed: %d\n", __func__, __LINE__, error); goto out; } #if 0 lsta->added_to_drv = true; /* mo manages. */ #endif lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* * Wakeup all queues now that sta is there so we have as much time to * possibly prepare the queue in the driver to be ready for the 1st * packet; lkpi_80211_txq_tx_one() still has a workaround as there * is no guarantee or way to check. * XXX-BZ and by now we know that this does not work on all drivers * for all queues. */ lkpi_wake_tx_queues(hw, LSTA_TO_STA(lsta), false, false); /* Start mgd_prepare_tx. */ memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.duration = PREP_TX_INFO_DURATION; lkpi_80211_mo_mgd_prepare_tx(hw, vif, &prep_tx_info); lsta->in_mgd = true; /* * What is going to happen next: * - .. we should end up in "auth_to_assoc" * - event_callback * - update sta_state (NONE to AUTH) * - mgd_complete_tx * (ideally we'd do that on a callback for something else ...) */ out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); if (ni != NULL) ieee80211_free_node(ni); return (error); } static int lkpi_sta_auth_to_scan(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; struct ieee80211_sta *sta; struct ieee80211_prep_tx_info prep_tx_info; int error; lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); /* Keep ni around. */ ni = ieee80211_ref_node(vap->iv_bss); lsta = ni->ni_drv_data; sta = LSTA_TO_STA(lsta); lkpi_lsta_dump(lsta, ni, __func__, __LINE__); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); /* flush, drop. */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), true); /* Wake tx queues to get packet(s) out. */ lkpi_wake_tx_queues(hw, sta, true, true); /* flush, no drop */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), false); /* End mgd_complete_tx. */ if (lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.success = false; lkpi_80211_mo_mgd_complete_tx(hw, vif, &prep_tx_info); lsta->in_mgd = false; } /* sync_rx_queues */ lkpi_80211_mo_sync_rx_queues(hw); /* sta_pre_rcu_remove */ lkpi_80211_mo_sta_pre_rcu_remove(hw, vif, sta); /* Take the station down. */ /* Adjust sta and change state (from NONE) to NOTEXIST. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_NONE, ("%s: lsta %p state not " "NONE: %#x, nstate %d arg %d\n", __func__, lsta, lsta->state, nstate, arg)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_NOTEXIST); if (error != 0) { IMPROVE("do we need to undo the chan ctx?"); ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(NOTEXIST) " "failed: %d\n", __func__, __LINE__, error); goto out; } #if 0 lsta->added_to_drv = false; /* mo manages. */ #endif lkpi_lsta_dump(lsta, ni, __func__, __LINE__); lkpi_lsta_remove(lsta, lvif); /* conf_tx */ /* Take the chan ctx down. */ if (vif->chanctx_conf != NULL) { struct lkpi_chanctx *lchanctx; struct ieee80211_chanctx_conf *conf; conf = vif->chanctx_conf; /* Remove vif context. */ lkpi_80211_mo_unassign_vif_chanctx(hw, vif, &vif->bss_conf, &vif->chanctx_conf); /* NB: vif->chanctx_conf is NULL now. */ /* Remove chan ctx. */ lkpi_80211_mo_remove_chanctx(hw, conf); lchanctx = CHANCTX_CONF_TO_LCHANCTX(conf); free(lchanctx, M_LKPI80211); } out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); if (ni != NULL) ieee80211_free_node(ni); return (error); } static int lkpi_sta_auth_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { int error; error = lkpi_sta_auth_to_scan(vap, nstate, arg); if (error == 0) error = lkpi_sta_scan_to_init(vap, nstate, arg); return (error); } static int lkpi_sta_auth_to_assoc(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; struct ieee80211_prep_tx_info prep_tx_info; int error; lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); ni = NULL; /* Finish auth. */ IMPROVE("event callback"); /* Update sta_state (NONE to AUTH). */ ni = ieee80211_ref_node(vap->iv_bss); lsta = ni->ni_drv_data; KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_NONE, ("%s: lsta %p state not " "NONE: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_AUTH); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(AUTH) " "failed: %d\n", __func__, __LINE__, error); goto out; } /* End mgd_complete_tx. */ if (lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.success = true; lkpi_80211_mo_mgd_complete_tx(hw, vif, &prep_tx_info); lsta->in_mgd = false; } /* Now start assoc. */ /* Start mgd_prepare_tx. */ if (!lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.duration = PREP_TX_INFO_DURATION; lkpi_80211_mo_mgd_prepare_tx(hw, vif, &prep_tx_info); lsta->in_mgd = true; } /* Wake tx queue to get packet out. */ lkpi_wake_tx_queues(hw, LSTA_TO_STA(lsta), true, true); /* * .. we end up in "assoc_to_run" * - update sta_state (AUTH to ASSOC) * - conf_tx [all] * - bss_info_changed (assoc, aid, ssid, ..) * - change_chanctx (if needed) * - event_callback * - mgd_complete_tx */ out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); if (ni != NULL) ieee80211_free_node(ni); return (error); } /* auth_to_auth, assoc_to_assoc. */ static int lkpi_sta_a_to_a(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; struct ieee80211_prep_tx_info prep_tx_info; lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); ni = ieee80211_ref_node(vap->iv_bss); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); lsta = ni->ni_drv_data; IMPROVE("event callback?"); /* End mgd_complete_tx. */ if (lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.success = false; lkpi_80211_mo_mgd_complete_tx(hw, vif, &prep_tx_info); lsta->in_mgd = false; } /* Now start assoc. */ /* Start mgd_prepare_tx. */ if (!lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.duration = PREP_TX_INFO_DURATION; lkpi_80211_mo_mgd_prepare_tx(hw, vif, &prep_tx_info); lsta->in_mgd = true; } LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); if (ni != NULL) ieee80211_free_node(ni); return (0); } static int _lkpi_sta_assoc_to_down(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; struct ieee80211_sta *sta; struct ieee80211_prep_tx_info prep_tx_info; enum ieee80211_bss_changed bss_changed; int error; lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); /* Keep ni around. */ ni = ieee80211_ref_node(vap->iv_bss); lsta = ni->ni_drv_data; sta = LSTA_TO_STA(lsta); lkpi_lsta_dump(lsta, ni, __func__, __LINE__); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); /* flush, drop. */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), true); IMPROVE("What are the proper conditions for DEAUTH_NEED_MGD_TX_PREP?"); if (ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP) && !lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.duration = PREP_TX_INFO_DURATION; lkpi_80211_mo_mgd_prepare_tx(hw, vif, &prep_tx_info); lsta->in_mgd = true; } LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); /* Call iv_newstate first so we get potential DISASSOC packet out. */ error = lvif->iv_newstate(vap, nstate, arg); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: iv_newstate(%p, %d, %d) " "failed: %d\n", __func__, __LINE__, vap, nstate, arg, error); goto outni; } IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Wake tx queues to get packet(s) out. */ lkpi_wake_tx_queues(hw, sta, true, true); /* flush, no drop */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), false); /* End mgd_complete_tx. */ if (lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.success = false; lkpi_80211_mo_mgd_complete_tx(hw, vif, &prep_tx_info); lsta->in_mgd = false; } /* sync_rx_queues */ lkpi_80211_mo_sync_rx_queues(hw); /* sta_pre_rcu_remove */ lkpi_80211_mo_sta_pre_rcu_remove(hw, vif, sta); /* Take the station down. */ /* Update sta and change state (from AUTH) to NONE. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_AUTH, ("%s: lsta %p state not " "AUTH: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_NONE); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(NONE) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Update bss info (bss_info_changed) (assoc, aid, ..). */ /* * We need to do this now, before sta changes to IEEE80211_STA_NOTEXIST * as otherwise drivers (iwlwifi at least) will silently not remove * the sta from the firmware and when we will add a new one trigger * a fw assert. */ lkpi_disassoc(sta, vif, lhw); /* Adjust sta and change state (from NONE) to NOTEXIST. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_NONE, ("%s: lsta %p state not " "NONE: %#x, nstate %d arg %d\n", __func__, lsta, lsta->state, nstate, arg)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_NOTEXIST); if (error != 0) { IMPROVE("do we need to undo the chan ctx?"); ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(NOTEXIST) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* sta no longer save to use. */ IMPROVE("Any bss_info changes to announce?"); bss_changed = 0; vif->bss_conf.qos = 0; bss_changed |= BSS_CHANGED_QOS; vif->cfg.ssid_len = 0; memset(vif->cfg.ssid, '\0', sizeof(vif->cfg.ssid)); bss_changed |= BSS_CHANGED_BSSID; lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed); lkpi_lsta_remove(lsta, lvif); /* conf_tx */ /* Take the chan ctx down. */ if (vif->chanctx_conf != NULL) { struct lkpi_chanctx *lchanctx; struct ieee80211_chanctx_conf *conf; conf = vif->chanctx_conf; /* Remove vif context. */ lkpi_80211_mo_unassign_vif_chanctx(hw, vif, &vif->bss_conf, &vif->chanctx_conf); /* NB: vif->chanctx_conf is NULL now. */ /* Remove chan ctx. */ lkpi_80211_mo_remove_chanctx(hw, conf); lchanctx = CHANCTX_CONF_TO_LCHANCTX(conf); free(lchanctx, M_LKPI80211); } error = EALREADY; out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); outni: if (ni != NULL) ieee80211_free_node(ni); return (error); } static int lkpi_sta_assoc_to_auth(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { int error; error = _lkpi_sta_assoc_to_down(vap, nstate, arg); if (error != 0 && error != EALREADY) return (error); /* At this point iv_bss is long a new node! */ error |= lkpi_sta_scan_to_auth(vap, nstate, 0); return (error); } static int lkpi_sta_assoc_to_scan(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { int error; error = _lkpi_sta_assoc_to_down(vap, nstate, arg); return (error); } static int lkpi_sta_assoc_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { int error; error = _lkpi_sta_assoc_to_down(vap, nstate, arg); return (error); } static int lkpi_sta_assoc_to_run(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; struct ieee80211_sta *sta; struct ieee80211_prep_tx_info prep_tx_info; enum ieee80211_bss_changed bss_changed; int error; lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); ni = NULL; IMPROVE("ponder some of this moved to ic_newassoc, scan_assoc_success, " "and to lesser extend ieee80211_notify_node_join"); /* Finish assoc. */ /* Update sta_state (AUTH to ASSOC) and set aid. */ ni = ieee80211_ref_node(vap->iv_bss); lsta = ni->ni_drv_data; KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_AUTH, ("%s: lsta %p state not " "AUTH: %#x\n", __func__, lsta, lsta->state)); sta = LSTA_TO_STA(lsta); sta->aid = IEEE80211_NODE_AID(ni); #ifdef LKPI_80211_WME if (vap->iv_flags & IEEE80211_F_WME) sta->wme = true; #endif error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_ASSOC); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(ASSOC) " "failed: %d\n", __func__, __LINE__, error); goto out; } IMPROVE("wme / conf_tx [all]"); /* Update bss info (bss_info_changed) (assoc, aid, ..). */ bss_changed = 0; #ifdef LKPI_80211_WME bss_changed |= lkpi_wme_update(lhw, vap, true); #endif if (!vif->cfg.assoc || vif->cfg.aid != IEEE80211_NODE_AID(ni)) { vif->cfg.assoc = true; vif->cfg.aid = IEEE80211_NODE_AID(ni); bss_changed |= BSS_CHANGED_ASSOC; } /* We set SSID but this is not BSSID! */ vif->cfg.ssid_len = ni->ni_esslen; memcpy(vif->cfg.ssid, ni->ni_essid, ni->ni_esslen); if ((vap->iv_flags & IEEE80211_F_SHPREAMBLE) != vif->bss_conf.use_short_preamble) { vif->bss_conf.use_short_preamble ^= 1; /* bss_changed |= BSS_CHANGED_??? */ } if ((vap->iv_flags & IEEE80211_F_SHSLOT) != vif->bss_conf.use_short_slot) { vif->bss_conf.use_short_slot ^= 1; /* bss_changed |= BSS_CHANGED_??? */ } if ((ni->ni_flags & IEEE80211_NODE_QOS) != vif->bss_conf.qos) { vif->bss_conf.qos ^= 1; bss_changed |= BSS_CHANGED_QOS; } bss_changed |= lkpi_update_dtim_tsf(vif, ni, vap, __func__, __LINE__); lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed); /* - change_chanctx (if needed) * - event_callback */ /* End mgd_complete_tx. */ if (lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.success = true; lkpi_80211_mo_mgd_complete_tx(hw, vif, &prep_tx_info); lsta->in_mgd = false; } lkpi_hw_conf_idle(hw, false); /* * And then: * - (more packets)? * - set_key * - set_default_unicast_key * - set_key (?) * - ipv6_addr_change (?) */ /* Prepare_multicast && configure_filter. */ lhw->update_mc = true; lkpi_update_mcast_filter(vap->iv_ic, true); if (!ieee80211_node_is_authorized(ni)) { IMPROVE("net80211 does not consider node authorized"); } /* Update sta_state (ASSOC to AUTHORIZED). */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_ASSOC, ("%s: lsta %p state not " "ASSOC: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_AUTHORIZED); if (error != 0) { IMPROVE("undo some changes?"); ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(AUTHORIZED) " "failed: %d\n", __func__, __LINE__, error); goto out; } /* - drv_config (?) * - bss_info_changed * - set_rekey_data (?) * * And now we should be passing packets. */ IMPROVE("Need that bssid setting, and the keys"); bss_changed = 0; bss_changed |= lkpi_update_dtim_tsf(vif, ni, vap, __func__, __LINE__); lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed); out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); if (ni != NULL) ieee80211_free_node(ni); return (error); } static int lkpi_sta_auth_to_run(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { int error; error = lkpi_sta_auth_to_assoc(vap, nstate, arg); if (error == 0) error = lkpi_sta_assoc_to_run(vap, nstate, arg); return (error); } static int lkpi_sta_run_to_assoc(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; struct ieee80211_sta *sta; struct ieee80211_prep_tx_info prep_tx_info; #if 0 enum ieee80211_bss_changed bss_changed; #endif int error; lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); /* Keep ni around. */ ni = ieee80211_ref_node(vap->iv_bss); lsta = ni->ni_drv_data; sta = LSTA_TO_STA(lsta); lkpi_lsta_dump(lsta, ni, __func__, __LINE__); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); /* flush, drop. */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), true); IMPROVE("What are the proper conditions for DEAUTH_NEED_MGD_TX_PREP?"); if (ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP) && !lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.duration = PREP_TX_INFO_DURATION; lkpi_80211_mo_mgd_prepare_tx(hw, vif, &prep_tx_info); lsta->in_mgd = true; } LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); /* Call iv_newstate first so we get potential DISASSOC packet out. */ error = lvif->iv_newstate(vap, nstate, arg); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: iv_newstate(%p, %d, %d) " "failed: %d\n", __func__, __LINE__, vap, nstate, arg, error); goto outni; } IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Wake tx queues to get packet(s) out. */ lkpi_wake_tx_queues(hw, sta, true, true); /* flush, no drop */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), false); /* End mgd_complete_tx. */ if (lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.success = false; lkpi_80211_mo_mgd_complete_tx(hw, vif, &prep_tx_info); lsta->in_mgd = false; } #if 0 /* sync_rx_queues */ lkpi_80211_mo_sync_rx_queues(hw); /* sta_pre_rcu_remove */ lkpi_80211_mo_sta_pre_rcu_remove(hw, vif, sta); #endif /* Take the station down. */ /* Adjust sta and change state (from AUTHORIZED) to ASSOC. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_AUTHORIZED, ("%s: lsta %p state not " "AUTHORIZED: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_ASSOC); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(ASSOC) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Update sta_state (ASSOC to AUTH). */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_ASSOC, ("%s: lsta %p state not " "ASSOC: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_AUTH); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(AUTH) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); #if 0 /* Update bss info (bss_info_changed) (assoc, aid, ..). */ lkpi_disassoc(sta, vif, lhw); #endif error = EALREADY; out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); outni: if (ni != NULL) ieee80211_free_node(ni); return (error); } static int lkpi_sta_run_to_init(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_node *ni; struct lkpi_sta *lsta; struct ieee80211_sta *sta; struct ieee80211_prep_tx_info prep_tx_info; enum ieee80211_bss_changed bss_changed; int error; lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); /* Keep ni around. */ ni = ieee80211_ref_node(vap->iv_bss); lsta = ni->ni_drv_data; sta = LSTA_TO_STA(lsta); lkpi_lsta_dump(lsta, ni, __func__, __LINE__); IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); /* flush, drop. */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), true); IMPROVE("What are the proper conditions for DEAUTH_NEED_MGD_TX_PREP?"); if (ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP) && !lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.duration = PREP_TX_INFO_DURATION; lkpi_80211_mo_mgd_prepare_tx(hw, vif, &prep_tx_info); lsta->in_mgd = true; } LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); /* Call iv_newstate first so we get potential DISASSOC packet out. */ error = lvif->iv_newstate(vap, nstate, arg); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: iv_newstate(%p, %d, %d) " "failed: %d\n", __func__, __LINE__, vap, nstate, arg, error); goto outni; } IEEE80211_UNLOCK(vap->iv_ic); LKPI_80211_LHW_LOCK(lhw); lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Wake tx queues to get packet(s) out. */ lkpi_wake_tx_queues(hw, sta, true, true); /* flush, no drop */ lkpi_80211_mo_flush(hw, vif, nitems(sta->txq), false); /* End mgd_complete_tx. */ if (lsta->in_mgd) { memset(&prep_tx_info, 0, sizeof(prep_tx_info)); prep_tx_info.success = false; lkpi_80211_mo_mgd_complete_tx(hw, vif, &prep_tx_info); lsta->in_mgd = false; } /* sync_rx_queues */ lkpi_80211_mo_sync_rx_queues(hw); /* sta_pre_rcu_remove */ lkpi_80211_mo_sta_pre_rcu_remove(hw, vif, sta); /* Take the station down. */ /* Adjust sta and change state (from AUTHORIZED) to ASSOC. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_AUTHORIZED, ("%s: lsta %p state not " "AUTHORIZED: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_ASSOC); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(ASSOC) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Update sta_state (ASSOC to AUTH). */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_ASSOC, ("%s: lsta %p state not " "ASSOC: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_AUTH); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(AUTH) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Update sta and change state (from AUTH) to NONE. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_AUTH, ("%s: lsta %p state not " "AUTH: %#x\n", __func__, lsta, lsta->state)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_NONE); if (error != 0) { ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(NONE) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* Update bss info (bss_info_changed) (assoc, aid, ..). */ /* * One would expect this to happen when going off AUTHORIZED. * See comment there; removes the sta from fw. */ lkpi_disassoc(sta, vif, lhw); /* Adjust sta and change state (from NONE) to NOTEXIST. */ KASSERT(lsta != NULL, ("%s: ni %p lsta is NULL\n", __func__, ni)); KASSERT(lsta->state == IEEE80211_STA_NONE, ("%s: lsta %p state not " "NONE: %#x, nstate %d arg %d\n", __func__, lsta, lsta->state, nstate, arg)); error = lkpi_80211_mo_sta_state(hw, vif, lsta, IEEE80211_STA_NOTEXIST); if (error != 0) { IMPROVE("do we need to undo the chan ctx?"); ic_printf(vap->iv_ic, "%s:%d: mo_sta_state(NOTEXIST) " "failed: %d\n", __func__, __LINE__, error); goto out; } lkpi_lsta_dump(lsta, ni, __func__, __LINE__); /* sta no longer save to use. */ IMPROVE("Any bss_info changes to announce?"); bss_changed = 0; vif->bss_conf.qos = 0; bss_changed |= BSS_CHANGED_QOS; vif->cfg.ssid_len = 0; memset(vif->cfg.ssid, '\0', sizeof(vif->cfg.ssid)); bss_changed |= BSS_CHANGED_BSSID; lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, bss_changed); lkpi_lsta_remove(lsta, lvif); /* conf_tx */ /* Take the chan ctx down. */ if (vif->chanctx_conf != NULL) { struct lkpi_chanctx *lchanctx; struct ieee80211_chanctx_conf *conf; conf = vif->chanctx_conf; /* Remove vif context. */ lkpi_80211_mo_unassign_vif_chanctx(hw, vif, &vif->bss_conf, &vif->chanctx_conf); /* NB: vif->chanctx_conf is NULL now. */ /* Remove chan ctx. */ lkpi_80211_mo_remove_chanctx(hw, conf); lchanctx = CHANCTX_CONF_TO_LCHANCTX(conf); free(lchanctx, M_LKPI80211); } error = EALREADY; out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); outni: if (ni != NULL) ieee80211_free_node(ni); return (error); } static int lkpi_sta_run_to_scan(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { return (lkpi_sta_run_to_init(vap, nstate, arg)); } static int lkpi_sta_run_to_auth(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { int error; error = lkpi_sta_run_to_init(vap, nstate, arg); if (error != 0 && error != EALREADY) return (error); /* At this point iv_bss is long a new node! */ error |= lkpi_sta_scan_to_auth(vap, nstate, 0); return (error); } /* -------------------------------------------------------------------------- */ /* * The matches the documented state changes in net80211::sta_newstate(). * XXX (1) without CSA and SLEEP yet, * XXX (2) not all unhandled cases * there are "invalid" (so there is a room for failure here). */ struct fsm_state { /* INIT, SCAN, AUTH, ASSOC, CAC, RUN, CSA, SLEEP */ enum ieee80211_state ostate; enum ieee80211_state nstate; int (*handler)(struct ieee80211vap *, enum ieee80211_state, int); } sta_state_fsm[] = { { IEEE80211_S_INIT, IEEE80211_S_INIT, lkpi_sta_state_do_nada }, { IEEE80211_S_SCAN, IEEE80211_S_INIT, lkpi_sta_state_do_nada }, /* scan_to_init */ { IEEE80211_S_AUTH, IEEE80211_S_INIT, lkpi_sta_auth_to_init }, /* not explicitly in sta_newstate() */ { IEEE80211_S_ASSOC, IEEE80211_S_INIT, lkpi_sta_assoc_to_init }, /* Send DEAUTH. */ { IEEE80211_S_RUN, IEEE80211_S_INIT, lkpi_sta_run_to_init }, /* Send DISASSOC. */ { IEEE80211_S_INIT, IEEE80211_S_SCAN, lkpi_sta_state_do_nada }, { IEEE80211_S_SCAN, IEEE80211_S_SCAN, lkpi_sta_state_do_nada }, { IEEE80211_S_AUTH, IEEE80211_S_SCAN, lkpi_sta_auth_to_scan }, { IEEE80211_S_ASSOC, IEEE80211_S_SCAN, lkpi_sta_assoc_to_scan }, { IEEE80211_S_RUN, IEEE80211_S_SCAN, lkpi_sta_run_to_scan }, /* Beacon miss. */ { IEEE80211_S_INIT, IEEE80211_S_AUTH, lkpi_sta_scan_to_auth }, /* Send AUTH. */ { IEEE80211_S_SCAN, IEEE80211_S_AUTH, lkpi_sta_scan_to_auth }, /* Send AUTH. */ { IEEE80211_S_AUTH, IEEE80211_S_AUTH, lkpi_sta_a_to_a }, /* Send ?AUTH. */ { IEEE80211_S_ASSOC, IEEE80211_S_AUTH, lkpi_sta_assoc_to_auth }, /* Send ?AUTH. */ { IEEE80211_S_RUN, IEEE80211_S_AUTH, lkpi_sta_run_to_auth }, /* Send ?AUTH. */ { IEEE80211_S_AUTH, IEEE80211_S_ASSOC, lkpi_sta_auth_to_assoc }, /* Send ASSOCREQ. */ { IEEE80211_S_ASSOC, IEEE80211_S_ASSOC, lkpi_sta_a_to_a }, /* Send ASSOCREQ. */ { IEEE80211_S_RUN, IEEE80211_S_ASSOC, lkpi_sta_run_to_assoc }, /* Send ASSOCREQ/REASSOCREQ. */ { IEEE80211_S_AUTH, IEEE80211_S_RUN, lkpi_sta_auth_to_run }, { IEEE80211_S_ASSOC, IEEE80211_S_RUN, lkpi_sta_assoc_to_run }, { IEEE80211_S_RUN, IEEE80211_S_RUN, lkpi_sta_state_do_nada }, /* Dummy at the end without handler. */ { IEEE80211_S_INIT, IEEE80211_S_INIT, NULL }, }; static int lkpi_iv_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ieee80211com *ic; struct lkpi_hw *lhw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct fsm_state *s; enum ieee80211_state ostate; int error; ic = vap->iv_ic; IEEE80211_LOCK_ASSERT(ic); ostate = vap->iv_state; #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) ic_printf(vap->iv_ic, "%s:%d: vap %p nstate %#x arg %#x\n", __func__, __LINE__, vap, nstate, arg); #endif if (vap->iv_opmode == IEEE80211_M_STA) { lhw = ic->ic_softc; lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); /* No need to replicate this in most state handlers. */ if (ostate == IEEE80211_S_SCAN && nstate != IEEE80211_S_SCAN) lkpi_stop_hw_scan(lhw, vif); s = sta_state_fsm; } else { ic_printf(vap->iv_ic, "%s: only station mode currently supported: " "cap %p iv_opmode %d\n", __func__, vap, vap->iv_opmode); return (ENOSYS); } error = 0; for (; s->handler != NULL; s++) { if (ostate == s->ostate && nstate == s->nstate) { #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) ic_printf(vap->iv_ic, "%s: new state %d (%s) ->" " %d (%s): arg %d.\n", __func__, ostate, ieee80211_state_name[ostate], nstate, ieee80211_state_name[nstate], arg); #endif error = s->handler(vap, nstate, arg); break; } } IEEE80211_LOCK_ASSERT(vap->iv_ic); if (s->handler == NULL) { IMPROVE("turn this into a KASSERT\n"); ic_printf(vap->iv_ic, "%s: unsupported state transition " "%d (%s) -> %d (%s)\n", __func__, ostate, ieee80211_state_name[ostate], nstate, ieee80211_state_name[nstate]); return (ENOSYS); } if (error == EALREADY) { #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) ic_printf(vap->iv_ic, "%s: state transition %d (%s) -> " "%d (%s): iv_newstate already handled: %d.\n", __func__, ostate, ieee80211_state_name[ostate], nstate, ieee80211_state_name[nstate], error); #endif return (0); } if (error != 0) { ic_printf(vap->iv_ic, "%s: error %d during state transition " "%d (%s) -> %d (%s)\n", __func__, error, ostate, ieee80211_state_name[ostate], nstate, ieee80211_state_name[nstate]); return (error); } #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) ic_printf(vap->iv_ic, "%s:%d: vap %p nstate %#x arg %#x " "calling net80211 parent\n", __func__, __LINE__, vap, nstate, arg); #endif return (lvif->iv_newstate(vap, nstate, arg)); } /* -------------------------------------------------------------------------- */ /* * We overload (*iv_update_bss) as otherwise we have cases in, e.g., * net80211::ieee80211_sta_join1() where vap->iv_bss gets replaced by a * new node without us knowing and thus our ni/lsta are out of sync. */ static struct ieee80211_node * lkpi_iv_update_bss(struct ieee80211vap *vap, struct ieee80211_node *ni) { struct lkpi_vif *lvif; struct ieee80211_node *obss; struct lkpi_sta *lsta; struct ieee80211_sta *sta; obss = vap->iv_bss; #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE) ic_printf(vap->iv_ic, "%s: obss %p ni_drv_data %p " "ni %p ni_drv_data %p\n", __func__, obss, (obss != NULL) ? obss->ni_drv_data : NULL, ni, (ni != NULL) ? ni->ni_drv_data : NULL); #endif /* Nothing to copy from. Just return. */ if (obss == NULL || obss->ni_drv_data == NULL) goto out; /* Nothing to copy to. Just return. */ IMPROVE("clearing the obss might still be needed?"); if (ni == NULL) goto out; /* Nothing changed? panic? */ if (obss == ni) goto out; lsta = obss->ni_drv_data; obss->ni_drv_data = ni->ni_drv_data; ni->ni_drv_data = lsta; if (lsta != NULL) { lsta->ni = ni; sta = LSTA_TO_STA(lsta); IEEE80211_ADDR_COPY(sta->addr, lsta->ni->ni_macaddr); IEEE80211_ADDR_COPY(sta->deflink.addr, sta->addr); } lsta = obss->ni_drv_data; if (lsta != NULL) { lsta->ni = obss; sta = LSTA_TO_STA(lsta); IEEE80211_ADDR_COPY(sta->addr, lsta->ni->ni_macaddr); IEEE80211_ADDR_COPY(sta->deflink.addr, sta->addr); } out: lvif = VAP_TO_LVIF(vap); return (lvif->iv_update_bss(vap, ni)); } #ifdef LKPI_80211_WME static int lkpi_wme_update(struct lkpi_hw *lhw, struct ieee80211vap *vap, bool planned) { struct ieee80211com *ic; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct chanAccParams chp; struct wmeParams wmeparr[WME_NUM_AC]; struct ieee80211_tx_queue_params txqp; enum ieee80211_bss_changed changed; int error; uint16_t ac; IMPROVE(); KASSERT(WME_NUM_AC == IEEE80211_NUM_ACS, ("%s: WME_NUM_AC %d != " "IEEE80211_NUM_ACS %d\n", __func__, WME_NUM_AC, IEEE80211_NUM_ACS)); if (vap == NULL) return (0); if ((vap->iv_flags & IEEE80211_F_WME) == 0) return (0); if (lhw->ops->conf_tx == NULL) return (0); if (!planned && (vap->iv_state != IEEE80211_S_RUN)) { lhw->update_wme = true; return (0); } lhw->update_wme = false; ic = lhw->ic; ieee80211_wme_ic_getparams(ic, &chp); IEEE80211_LOCK(ic); for (ac = 0; ac < WME_NUM_AC; ac++) wmeparr[ac] = chp.cap_wmeParams[ac]; IEEE80211_UNLOCK(ic); hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); /* Configure tx queues (conf_tx) & send BSS_CHANGED_QOS. */ LKPI_80211_LHW_LOCK(lhw); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct wmeParams *wmep; wmep = &wmeparr[ac]; bzero(&txqp, sizeof(txqp)); txqp.cw_min = wmep->wmep_logcwmin; txqp.cw_max = wmep->wmep_logcwmax; txqp.txop = wmep->wmep_txopLimit; txqp.aifs = wmep->wmep_aifsn; error = lkpi_80211_mo_conf_tx(hw, vif, /* link_id */0, ac, &txqp); if (error != 0) ic_printf(ic, "%s: conf_tx ac %u failed %d\n", __func__, ac, error); } LKPI_80211_LHW_UNLOCK(lhw); changed = BSS_CHANGED_QOS; if (!planned) lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, changed); return (changed); } #endif static int lkpi_ic_wme_update(struct ieee80211com *ic) { #ifdef LKPI_80211_WME struct ieee80211vap *vap; struct lkpi_hw *lhw; IMPROVE("Use the per-VAP callback in net80211."); vap = TAILQ_FIRST(&ic->ic_vaps); if (vap == NULL) return (0); lhw = ic->ic_softc; lkpi_wme_update(lhw, vap, false); #endif return (0); /* unused */ } static struct ieee80211vap * lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211vap *vap; struct ieee80211_vif *vif; struct ieee80211_tx_queue_params txqp; enum ieee80211_bss_changed changed; size_t len; int error, i; uint16_t ac; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* 1 so far. Add once this works. */ return (NULL); lhw = ic->ic_softc; hw = LHW_TO_HW(lhw); len = sizeof(*lvif); len += hw->vif_data_size; /* vif->drv_priv */ lvif = malloc(len, M_80211_VAP, M_WAITOK | M_ZERO); mtx_init(&lvif->mtx, "lvif", NULL, MTX_DEF); TAILQ_INIT(&lvif->lsta_head); vap = LVIF_TO_VAP(lvif); vif = LVIF_TO_VIF(lvif); memcpy(vif->addr, mac, IEEE80211_ADDR_LEN); vif->p2p = false; vif->probe_req_reg = false; vif->type = lkpi_opmode_to_vif_type(opmode); lvif->wdev.iftype = vif->type; /* Need to fill in other fields as well. */ IMPROVE(); /* XXX-BZ hardcoded for now! */ #if 1 vif->chanctx_conf = NULL; vif->bss_conf.vif = vif; /* vap->iv_myaddr is not set until net80211::vap_setup or vap_attach. */ IEEE80211_ADDR_COPY(vif->bss_conf.addr, mac); vif->bss_conf.link_id = 0; /* Non-MLO operation. */ vif->bss_conf.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; vif->bss_conf.use_short_preamble = false; /* vap->iv_flags IEEE80211_F_SHPREAMBLE */ vif->bss_conf.use_short_slot = false; /* vap->iv_flags IEEE80211_F_SHSLOT */ vif->bss_conf.qos = false; vif->bss_conf.use_cts_prot = false; /* vap->iv_protmode */ vif->bss_conf.ht_operation_mode = IEEE80211_HT_OP_MODE_PROTECTION_NONE; vif->cfg.aid = 0; vif->cfg.assoc = false; vif->cfg.idle = true; vif->cfg.ps = false; IMPROVE("Check other fields and then figure out whats is left elsewhere of them"); /* * We need to initialize it to something as the bss_info_changed call * will try to copy from it in iwlwifi and NULL is a panic. * We will set the proper one in scan_to_auth() before being assoc. */ vif->bss_conf.bssid = ieee80211broadcastaddr; #endif #if 0 vif->bss_conf.dtim_period = 0; /* IEEE80211_DTIM_DEFAULT ; must stay 0. */ IEEE80211_ADDR_COPY(vif->bss_conf.bssid, bssid); vif->bss_conf.beacon_int = ic->ic_bintval; /* iwlwifi bug. */ if (vif->bss_conf.beacon_int < 16) vif->bss_conf.beacon_int = 16; #endif /* Link Config */ vif->link_conf[0] = &vif->bss_conf; for (i = 0; i < nitems(vif->link_conf); i++) { IMPROVE("more than 1 link one day"); } /* Setup queue defaults; driver may override in (*add_interface). */ for (i = 0; i < IEEE80211_NUM_ACS; i++) { if (ieee80211_hw_check(hw, QUEUE_CONTROL)) vif->hw_queue[i] = IEEE80211_INVAL_HW_QUEUE; else if (hw->queues >= IEEE80211_NUM_ACS) vif->hw_queue[i] = i; else vif->hw_queue[i] = 0; /* Initialize the queue to running. Stopped? */ lvif->hw_queue_stopped[i] = false; } vif->cab_queue = IEEE80211_INVAL_HW_QUEUE; IMPROVE(); error = lkpi_80211_mo_start(hw); if (error != 0) { ic_printf(ic, "%s: failed to start hw: %d\n", __func__, error); mtx_destroy(&lvif->mtx); free(lvif, M_80211_VAP); return (NULL); } error = lkpi_80211_mo_add_interface(hw, vif); if (error != 0) { IMPROVE(); /* XXX-BZ mo_stop()? */ ic_printf(ic, "%s: failed to add interface: %d\n", __func__, error); mtx_destroy(&lvif->mtx); free(lvif, M_80211_VAP); return (NULL); } LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_INSERT_TAIL(&lhw->lvif_head, lvif, lvif_entry); LKPI_80211_LHW_LVIF_UNLOCK(lhw); /* Set bss_info. */ changed = 0; lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, changed); /* Configure tx queues (conf_tx), default WME & send BSS_CHANGED_QOS. */ IMPROVE("Hardcoded values; to fix see 802.11-2016, 9.4.2.29 EDCA Parameter Set element"); LKPI_80211_LHW_LOCK(lhw); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { bzero(&txqp, sizeof(txqp)); txqp.cw_min = 15; txqp.cw_max = 1023; txqp.txop = 0; txqp.aifs = 2; error = lkpi_80211_mo_conf_tx(hw, vif, /* link_id */0, ac, &txqp); if (error != 0) ic_printf(ic, "%s: conf_tx ac %u failed %d\n", __func__, ac, error); } LKPI_80211_LHW_UNLOCK(lhw); changed = BSS_CHANGED_QOS; lkpi_80211_mo_bss_info_changed(hw, vif, &vif->bss_conf, changed); /* Force MC init. */ lkpi_update_mcast_filter(ic, true); IMPROVE(); ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* Override with LinuxKPI method so we can drive mac80211/cfg80211. */ lvif->iv_newstate = vap->iv_newstate; vap->iv_newstate = lkpi_iv_newstate; lvif->iv_update_bss = vap->iv_update_bss; vap->iv_update_bss = lkpi_iv_update_bss; /* Key management. */ if (lhw->ops->set_key != NULL) { #ifdef LKPI_80211_HW_CRYPTO vap->iv_key_set = lkpi_iv_key_set; vap->iv_key_delete = lkpi_iv_key_delete; #endif } ieee80211_ratectl_init(vap); /* Complete setup. */ ieee80211_vap_attach(vap, ieee80211_media_change, ieee80211_media_status, mac); if (hw->max_listen_interval == 0) hw->max_listen_interval = 7 * (ic->ic_lintval / ic->ic_bintval); hw->conf.listen_interval = hw->max_listen_interval; ic->ic_set_channel(ic); /* XXX-BZ do we need to be able to update these? */ hw->wiphy->frag_threshold = vap->iv_fragthreshold; lkpi_80211_mo_set_frag_threshold(hw, vap->iv_fragthreshold); hw->wiphy->rts_threshold = vap->iv_rtsthreshold; lkpi_80211_mo_set_rts_threshold(hw, vap->iv_rtsthreshold); /* any others? */ IMPROVE(); return (vap); } void linuxkpi_ieee80211_unregister_hw(struct ieee80211_hw *hw) { wiphy_unregister(hw->wiphy); linuxkpi_ieee80211_ifdetach(hw); IMPROVE(); } void linuxkpi_ieee80211_restart_hw(struct ieee80211_hw *hw) { TODO(); } static void lkpi_ic_vap_delete(struct ieee80211vap *vap) { struct ieee80211com *ic; struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); ic = vap->iv_ic; lhw = ic->ic_softc; hw = LHW_TO_HW(lhw); LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_REMOVE(&lhw->lvif_head, lvif, lvif_entry); LKPI_80211_LHW_LVIF_UNLOCK(lhw); ieee80211_ratectl_deinit(vap); ieee80211_vap_detach(vap); IMPROVE("clear up other bits in this state"); lkpi_80211_mo_remove_interface(hw, vif); /* Single VAP, so we can do this here. */ lkpi_80211_mo_stop(hw); mtx_destroy(&lvif->mtx); free(lvif, M_80211_VAP); } static void lkpi_ic_update_mcast(struct ieee80211com *ic) { lkpi_update_mcast_filter(ic, false); TRACEOK(); } static void lkpi_ic_update_promisc(struct ieee80211com *ic) { UNIMPLEMENTED; } static void lkpi_ic_update_chw(struct ieee80211com *ic) { UNIMPLEMENTED; } /* Start / stop device. */ static void lkpi_ic_parent(struct ieee80211com *ic) { struct lkpi_hw *lhw; #ifdef HW_START_STOP struct ieee80211_hw *hw; int error; #endif bool start_all; IMPROVE(); lhw = ic->ic_softc; #ifdef HW_START_STOP hw = LHW_TO_HW(lhw); #endif start_all = false; /* IEEE80211_UNLOCK(ic); */ LKPI_80211_LHW_LOCK(lhw); if (ic->ic_nrunning > 0) { #ifdef HW_START_STOP error = lkpi_80211_mo_start(hw); if (error == 0) #endif start_all = true; } else { #ifdef HW_START_STOP lkpi_80211_mo_stop(hw); #endif } LKPI_80211_LHW_UNLOCK(lhw); /* IEEE80211_LOCK(ic); */ if (start_all) ieee80211_start_all(ic); } bool linuxkpi_ieee80211_is_ie_id_in_ie_buf(const u8 ie, const u8 *ie_ids, size_t ie_ids_len) { int i; for (i = 0; i < ie_ids_len; i++) { if (ie == *ie_ids) return (true); } return (false); } /* Return true if skipped; false if error. */ bool linuxkpi_ieee80211_ie_advance(size_t *xp, const u8 *ies, size_t ies_len) { size_t x; uint8_t l; x = *xp; KASSERT(x < ies_len, ("%s: x %zu ies_len %zu ies %p\n", __func__, x, ies_len, ies)); l = ies[x + 1]; x += 2 + l; if (x > ies_len) return (false); *xp = x; return (true); } static uint8_t * lkpi_scan_ies_add(uint8_t *p, struct ieee80211_scan_ies *scan_ies, uint32_t band_mask, struct ieee80211vap *vap, struct ieee80211_hw *hw) { struct ieee80211_supported_band *supband; struct linuxkpi_ieee80211_channel *channels; struct ieee80211com *ic; const struct ieee80211_channel *chan; const struct ieee80211_rateset *rs; uint8_t *pb; int band, i; ic = vap->iv_ic; for (band = 0; band < NUM_NL80211_BANDS; band++) { if ((band_mask & (1 << band)) == 0) continue; supband = hw->wiphy->bands[band]; /* * This should not happen; * band_mask is a bitmask of valid bands to scan on. */ if (supband == NULL || supband->n_channels == 0) continue; /* Find a first channel to get the mode and rates from. */ channels = supband->channels; chan = NULL; for (i = 0; i < supband->n_channels; i++) { if (channels[i].flags & IEEE80211_CHAN_DISABLED) continue; chan = ieee80211_find_channel(ic, channels[i].center_freq, 0); if (chan != NULL) break; } /* This really should not happen. */ if (chan == NULL) continue; pb = p; rs = ieee80211_get_suprates(ic, chan); /* calls chan2mode */ p = ieee80211_add_rates(p, rs); p = ieee80211_add_xrates(p, rs); #if defined(LKPI_80211_HT) if ((vap->iv_flags_ht & IEEE80211_FHT_HT) != 0) { struct ieee80211_channel *c; c = ieee80211_ht_adjust_channel(ic, ic->ic_curchan, vap->iv_flags_ht); p = ieee80211_add_htcap_ch(p, vap, c); } #endif #if defined(LKPI_80211_VHT) if ((vap->iv_vht_flags & IEEE80211_FVHT_VHT) != 0) { struct ieee80211_channel *c; c = ieee80211_ht_adjust_channel(ic, ic->ic_curchan, vap->iv_flags_ht); c = ieee80211_vht_adjust_channel(ic, c, vap->iv_vht_flags); p = ieee80211_add_vhtcap_ch(p, vap, c); } #endif scan_ies->ies[band] = pb; scan_ies->len[band] = p - pb; } /* Add common_ies */ pb = p; if ((vap->iv_flags & IEEE80211_F_WPA1) != 0 && vap->iv_wpa_ie != NULL) { memcpy(p, vap->iv_wpa_ie, 2 + vap->iv_wpa_ie[1]); p += 2 + vap->iv_wpa_ie[1]; } if (vap->iv_appie_probereq != NULL) { memcpy(p, vap->iv_appie_probereq->ie_data, vap->iv_appie_probereq->ie_len); p += vap->iv_appie_probereq->ie_len; } scan_ies->common_ies = pb; scan_ies->common_ie_len = p - pb; return (p); } static void lkpi_ic_scan_start(struct ieee80211com *ic) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_scan_state *ss; struct ieee80211vap *vap; int error; bool is_hw_scan; lhw = ic->ic_softc; LKPI_80211_LHW_SCAN_LOCK(lhw); if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) { /* A scan is still running. */ LKPI_80211_LHW_SCAN_UNLOCK(lhw); return; } is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); ss = ic->ic_scan; vap = ss->ss_vap; if (vap->iv_state != IEEE80211_S_SCAN) { IMPROVE("We need to be able to scan if not in S_SCAN"); return; } hw = LHW_TO_HW(lhw); if (!is_hw_scan) { /* If hw_scan is cleared clear FEXT_SCAN_OFFLOAD too. */ vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD; sw_scan: lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); if (vap->iv_state == IEEE80211_S_SCAN) lkpi_hw_conf_idle(hw, false); lkpi_80211_mo_sw_scan_start(hw, vif, vif->addr); /* net80211::scan_start() handled PS for us. */ IMPROVE(); /* XXX Also means it is too late to flush queues? * need to check iv_sta_ps or overload? */ /* XXX want to adjust ss end time/ maxdwell? */ } else { struct ieee80211_channel *c; struct ieee80211_scan_request *hw_req; struct linuxkpi_ieee80211_channel *lc, **cpp; struct cfg80211_ssid *ssids; struct cfg80211_scan_6ghz_params *s6gp; size_t chan_len, nchan, ssids_len, s6ghzlen; int band, i, ssid_count, common_ie_len; uint32_t band_mask; uint8_t *ie, *ieend; bool running; ssid_count = min(ss->ss_nssid, hw->wiphy->max_scan_ssids); ssids_len = ssid_count * sizeof(*ssids); s6ghzlen = 0 * (sizeof(*s6gp)); /* XXX-BZ */ band_mask = 0; nchan = 0; for (i = ss->ss_next; i < ss->ss_last; i++) { nchan++; band = lkpi_net80211_chan_to_nl80211_band( ss->ss_chans[ss->ss_next + i]); band_mask |= (1 << band); } if (!ieee80211_hw_check(hw, SINGLE_SCAN_ON_ALL_BANDS)) { IMPROVE("individual band scans not yet supported, only scanning first band"); /* In theory net80211 should drive this. */ /* Probably we need to add local logic for now; * need to deal with scan_complete * and cancel_scan and keep local state. * Also cut the nchan down above. */ /* XXX-BZ ath10k does not set this but still does it? &$%^ */ } chan_len = nchan * (sizeof(lc) + sizeof(*lc)); common_ie_len = 0; if ((vap->iv_flags & IEEE80211_F_WPA1) != 0 && vap->iv_wpa_ie != NULL) common_ie_len += vap->iv_wpa_ie[1]; if (vap->iv_appie_probereq != NULL) common_ie_len += vap->iv_appie_probereq->ie_len; /* We would love to check this at an earlier stage... */ if (common_ie_len > hw->wiphy->max_scan_ie_len) { ic_printf(ic, "WARNING: %s: common_ie_len %d > " "wiphy->max_scan_ie_len %d\n", __func__, common_ie_len, hw->wiphy->max_scan_ie_len); } hw_req = malloc(sizeof(*hw_req) + ssids_len + s6ghzlen + chan_len + lhw->supbands * lhw->scan_ie_len + common_ie_len, M_LKPI80211, M_WAITOK | M_ZERO); hw_req->req.flags = 0; /* XXX ??? */ /* hw_req->req.wdev */ hw_req->req.wiphy = hw->wiphy; hw_req->req.no_cck = false; /* XXX */ #if 0 /* This seems to pessimise default scanning behaviour. */ hw_req->req.duration_mandatory = TICKS_2_USEC(ss->ss_mindwell); hw_req->req.duration = TICKS_2_USEC(ss->ss_maxdwell); #endif #ifdef __notyet__ hw_req->req.flags |= NL80211_SCAN_FLAG_RANDOM_ADDR; memcpy(hw_req->req.mac_addr, xxx, IEEE80211_ADDR_LEN); memset(hw_req->req.mac_addr_mask, 0xxx, IEEE80211_ADDR_LEN); #endif eth_broadcast_addr(hw_req->req.bssid); hw_req->req.n_channels = nchan; cpp = (struct linuxkpi_ieee80211_channel **)(hw_req + 1); lc = (struct linuxkpi_ieee80211_channel *)(cpp + nchan); for (i = 0; i < nchan; i++) { *(cpp + i) = (struct linuxkpi_ieee80211_channel *)(lc + i); } for (i = 0; i < nchan; i++) { c = ss->ss_chans[ss->ss_next + i]; lc->hw_value = c->ic_ieee; lc->center_freq = c->ic_freq; /* XXX */ /* lc->flags */ lc->band = lkpi_net80211_chan_to_nl80211_band(c); lc->max_power = c->ic_maxpower; /* lc-> ... */ lc++; } hw_req->req.n_ssids = ssid_count; if (hw_req->req.n_ssids > 0) { ssids = (struct cfg80211_ssid *)lc; hw_req->req.ssids = ssids; for (i = 0; i < ssid_count; i++) { ssids->ssid_len = ss->ss_ssid[i].len; memcpy(ssids->ssid, ss->ss_ssid[i].ssid, ss->ss_ssid[i].len); ssids++; } s6gp = (struct cfg80211_scan_6ghz_params *)ssids; } else { s6gp = (struct cfg80211_scan_6ghz_params *)lc; } /* 6GHz one day. */ hw_req->req.n_6ghz_params = 0; hw_req->req.scan_6ghz_params = NULL; hw_req->req.scan_6ghz = false; /* Weird boolean; not what you think. */ /* s6gp->... */ ie = ieend = (uint8_t *)s6gp; /* Copy per-band IEs, copy common IEs */ ieend = lkpi_scan_ies_add(ie, &hw_req->ies, band_mask, vap, hw); hw_req->req.ie = ie; hw_req->req.ie_len = ieend - ie; lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); LKPI_80211_LHW_SCAN_LOCK(lhw); /* Re-check under lock. */ running = (lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0; if (!running) { KASSERT(lhw->hw_req == NULL, ("%s: ic %p lhw %p hw_req %p " "!= NULL\n", __func__, ic, lhw, lhw->hw_req)); lhw->scan_flags |= LKPI_LHW_SCAN_RUNNING; lhw->hw_req = hw_req; } LKPI_80211_LHW_SCAN_UNLOCK(lhw); if (running) { free(hw_req, M_LKPI80211); return; } error = lkpi_80211_mo_hw_scan(hw, vif, hw_req); if (error != 0) { ieee80211_cancel_scan(vap); /* * ieee80211_scan_completed must be called in either * case of error or none. So let the free happen there * and only there. * That would be fine in theory but in practice drivers * behave differently: * ath10k does not return hw_scan until after scan_complete * and can then still return an error. * rtw88 can return 1 or -EBUSY without scan_complete * iwlwifi can return various errors before scan starts * ... * So we cannot rely on that behaviour and have to check * and balance between both code paths. */ LKPI_80211_LHW_SCAN_LOCK(lhw); if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) != 0) { free(lhw->hw_req, M_LKPI80211); lhw->hw_req = NULL; lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING; } LKPI_80211_LHW_SCAN_UNLOCK(lhw); /* * XXX-SIGH magic number. * rtw88 has a magic "return 1" if offloading scan is * not possible. Fall back to sw scan in that case. */ if (error == 1) { LKPI_80211_LHW_SCAN_LOCK(lhw); lhw->scan_flags &= ~LKPI_LHW_SCAN_HW; LKPI_80211_LHW_SCAN_UNLOCK(lhw); /* * XXX If we clear this now and later a driver * thinks it * can do a hw_scan again, we will * currently not re-enable it? */ vap->iv_flags_ext &= ~IEEE80211_FEXT_SCAN_OFFLOAD; ieee80211_start_scan(vap, IEEE80211_SCAN_ACTIVE | IEEE80211_SCAN_NOPICK | IEEE80211_SCAN_ONCE, IEEE80211_SCAN_FOREVER, ss->ss_mindwell ? ss->ss_mindwell : msecs_to_ticks(20), ss->ss_maxdwell ? ss->ss_maxdwell : msecs_to_ticks(200), vap->iv_des_nssid, vap->iv_des_ssid); goto sw_scan; } ic_printf(ic, "ERROR: %s: hw_scan returned %d\n", __func__, error); } } } static void lkpi_ic_scan_end(struct ieee80211com *ic) { struct lkpi_hw *lhw; bool is_hw_scan; lhw = ic->ic_softc; LKPI_80211_LHW_SCAN_LOCK(lhw); if ((lhw->scan_flags & LKPI_LHW_SCAN_RUNNING) == 0) { LKPI_80211_LHW_SCAN_UNLOCK(lhw); return; } is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); if (!is_hw_scan) { struct ieee80211_scan_state *ss; struct ieee80211vap *vap; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; ss = ic->ic_scan; vap = ss->ss_vap; hw = LHW_TO_HW(lhw); lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); lkpi_80211_mo_sw_scan_complete(hw, vif); /* Send PS to stop buffering if n80211 does not for us? */ if (vap->iv_state == IEEE80211_S_SCAN) lkpi_hw_conf_idle(hw, true); } } static void lkpi_ic_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { struct lkpi_hw *lhw; bool is_hw_scan; lhw = ss->ss_ic->ic_softc; LKPI_80211_LHW_SCAN_LOCK(lhw); is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); if (!is_hw_scan) lhw->ic_scan_curchan(ss, maxdwell); } static void lkpi_ic_scan_mindwell(struct ieee80211_scan_state *ss) { struct lkpi_hw *lhw; bool is_hw_scan; lhw = ss->ss_ic->ic_softc; LKPI_80211_LHW_SCAN_LOCK(lhw); is_hw_scan = (lhw->scan_flags & LKPI_LHW_SCAN_HW) != 0; LKPI_80211_LHW_SCAN_UNLOCK(lhw); if (!is_hw_scan) lhw->ic_scan_mindwell(ss); } static void lkpi_ic_set_channel(struct ieee80211com *ic) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct ieee80211_channel *c; struct linuxkpi_ieee80211_channel *chan; int error; bool hw_scan_running; lhw = ic->ic_softc; /* If we do not support (*config)() save us the work. */ if (lhw->ops->config == NULL) return; /* If we have a hw_scan running do not switch channels. */ LKPI_80211_LHW_SCAN_LOCK(lhw); hw_scan_running = (lhw->scan_flags & (LKPI_LHW_SCAN_RUNNING|LKPI_LHW_SCAN_HW)) == (LKPI_LHW_SCAN_RUNNING|LKPI_LHW_SCAN_HW); LKPI_80211_LHW_SCAN_UNLOCK(lhw); if (hw_scan_running) return; c = ic->ic_curchan; if (c == NULL || c == IEEE80211_CHAN_ANYC) { ic_printf(ic, "%s: c %p ops->config %p\n", __func__, c, lhw->ops->config); return; } chan = lkpi_find_lkpi80211_chan(lhw, c); if (chan == NULL) { ic_printf(ic, "%s: c %p chan %p\n", __func__, c, chan); return; } /* XXX max power for scanning? */ IMPROVE(); hw = LHW_TO_HW(lhw); cfg80211_chandef_create(&hw->conf.chandef, chan, NL80211_CHAN_NO_HT); error = lkpi_80211_mo_config(hw, IEEE80211_CONF_CHANGE_CHANNEL); if (error != 0 && error != EOPNOTSUPP) { ic_printf(ic, "ERROR: %s: config %#0x returned %d\n", __func__, IEEE80211_CONF_CHANGE_CHANNEL, error); /* XXX should we unroll to the previous chandef? */ IMPROVE(); } else { /* Update radiotap channels as well. */ lhw->rtap_tx.wt_chan_freq = htole16(c->ic_freq); lhw->rtap_tx.wt_chan_flags = htole16(c->ic_flags); lhw->rtap_rx.wr_chan_freq = htole16(c->ic_freq); lhw->rtap_rx.wr_chan_flags = htole16(c->ic_flags); } /* Currently PS is hard coded off! Not sure it belongs here. */ IMPROVE(); if (ieee80211_hw_check(hw, SUPPORTS_PS) && (hw->conf.flags & IEEE80211_CONF_PS) != 0) { hw->conf.flags &= ~IEEE80211_CONF_PS; error = lkpi_80211_mo_config(hw, IEEE80211_CONF_CHANGE_PS); if (error != 0 && error != EOPNOTSUPP) ic_printf(ic, "ERROR: %s: config %#0x returned " "%d\n", __func__, IEEE80211_CONF_CHANGE_PS, error); } } static struct ieee80211_node * lkpi_ic_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ieee80211com *ic; struct lkpi_hw *lhw; struct ieee80211_node *ni; struct ieee80211_hw *hw; struct lkpi_sta *lsta; ic = vap->iv_ic; lhw = ic->ic_softc; /* We keep allocations de-coupled so we can deal with the two worlds. */ if (lhw->ic_node_alloc == NULL) return (NULL); ni = lhw->ic_node_alloc(vap, mac); if (ni == NULL) return (NULL); hw = LHW_TO_HW(lhw); lsta = lkpi_lsta_alloc(vap, mac, hw, ni); if (lsta == NULL) { if (lhw->ic_node_free != NULL) lhw->ic_node_free(ni); return (NULL); } return (ni); } static int lkpi_ic_node_init(struct ieee80211_node *ni) { struct ieee80211com *ic; struct lkpi_hw *lhw; struct lkpi_sta *lsta; int error; ic = ni->ni_ic; lhw = ic->ic_softc; if (lhw->ic_node_init != NULL) { error = lhw->ic_node_init(ni); if (error != 0) return (error); } lsta = ni->ni_drv_data; /* Now take the reference before linking it to the table. */ lsta->ni = ieee80211_ref_node(ni); /* XXX-BZ Sync other state over. */ IMPROVE(); return (0); } static void lkpi_ic_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic; struct lkpi_hw *lhw; ic = ni->ni_ic; lhw = ic->ic_softc; /* XXX-BZ remove from driver, ... */ IMPROVE(); if (lhw->ic_node_cleanup != NULL) lhw->ic_node_cleanup(ni); } static void lkpi_ic_node_free(struct ieee80211_node *ni) { struct ieee80211com *ic; struct lkpi_hw *lhw; struct lkpi_sta *lsta; ic = ni->ni_ic; lhw = ic->ic_softc; lsta = ni->ni_drv_data; if (lsta == NULL) goto out; /* XXX-BZ free resources, ... */ IMPROVE(); /* Flush mbufq (make sure to release ni refs!). */ #ifdef __notyet__ - KASSERT(mbufq_len(&lsta->txq) == 0, ("%s: lsta %p has txq len %d != 0\n", + KASSERT(mbufq_empty(&lsta->txq), ("%s: lsta %p has txq len %d != 0\n", __func__, lsta, mbufq_len(&lsta->txq))); #endif /* Drain taskq. */ /* Drain sta->txq[] */ mtx_destroy(&lsta->txq_mtx); /* Remove lsta if added_to_drv. */ /* Remove lsta from vif */ /* Remove ref from lsta node... */ /* Free lsta. */ lkpi_lsta_remove(lsta, VAP_TO_LVIF(ni->ni_vap)); out: if (lhw->ic_node_free != NULL) lhw->ic_node_free(ni); } static int lkpi_ic_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params __unused) { struct lkpi_sta *lsta; lsta = ni->ni_drv_data; /* Queue the packet and enqueue the task to handle it. */ LKPI_80211_LSTA_LOCK(lsta); mbufq_enqueue(&lsta->txq, m); LKPI_80211_LSTA_UNLOCK(lsta); #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_TX) printf("%s:%d lsta %p ni %p %6D mbuf_qlen %d\n", __func__, __LINE__, lsta, ni, ni->ni_macaddr, ":", mbufq_len(&lsta->txq)); #endif taskqueue_enqueue(taskqueue_thread, &lsta->txq_task); return (0); } static void lkpi_80211_txq_tx_one(struct lkpi_sta *lsta, struct mbuf *m) { struct ieee80211_node *ni; #ifndef LKPI_80211_HW_CRYPTO struct ieee80211_frame *wh; #endif struct ieee80211_key *k; struct sk_buff *skb; struct ieee80211com *ic; struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_channel *c; struct ieee80211_tx_control control; struct ieee80211_tx_info *info; struct ieee80211_sta *sta; struct ieee80211_hdr *hdr; void *buf; uint8_t ac, tid; M_ASSERTPKTHDR(m); #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_TX_DUMP) hexdump(mtod(m, const void *), m->m_len, "RAW TX (plain) ", 0); #endif ni = lsta->ni; k = NULL; #ifndef LKPI_80211_HW_CRYPTO /* Encrypt the frame if need be; XXX-BZ info->control.hw_key. */ wh = mtod(m, struct ieee80211_frame *); if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { /* Retrieve key for TX && do software encryption. */ k = ieee80211_crypto_encap(ni, m); if (k == NULL) { ieee80211_free_node(ni); m_freem(m); return; } } #endif ic = ni->ni_ic; lhw = ic->ic_softc; hw = LHW_TO_HW(lhw); c = ni->ni_chan; if (ieee80211_radiotap_active_vap(ni->ni_vap)) { struct lkpi_radiotap_tx_hdr *rtap; rtap = &lhw->rtap_tx; rtap->wt_flags = 0; if (k != NULL) rtap->wt_flags |= IEEE80211_RADIOTAP_F_WEP; if (m->m_flags & M_FRAG) rtap->wt_flags |= IEEE80211_RADIOTAP_F_FRAG; IMPROVE(); rtap->wt_rate = 0; if (c != NULL && c != IEEE80211_CHAN_ANYC) { rtap->wt_chan_freq = htole16(c->ic_freq); rtap->wt_chan_flags = htole16(c->ic_flags); } ieee80211_radiotap_tx(ni->ni_vap, m); } /* * net80211 should handle hw->extra_tx_headroom. * Though for as long as we are copying we don't mind. * XXX-BZ rtw88 asks for too much headroom for ipv6+tcp: * https://lists.freebsd.org/archives/freebsd-transport/2022-February/000012.html */ skb = dev_alloc_skb(hw->extra_tx_headroom + m->m_pkthdr.len); if (skb == NULL) { ic_printf(ic, "ERROR %s: skb alloc failed\n", __func__); ieee80211_free_node(ni); m_freem(m); return; } skb_reserve(skb, hw->extra_tx_headroom); /* XXX-BZ we need a SKB version understanding mbuf. */ /* Save the mbuf for ieee80211_tx_complete(). */ skb->m_free_func = lkpi_ieee80211_free_skb_mbuf; skb->m = m; #if 0 skb_put_data(skb, m->m_data, m->m_pkthdr.len); #else buf = skb_put(skb, m->m_pkthdr.len); m_copydata(m, 0, m->m_pkthdr.len, buf); #endif /* Save the ni. */ m->m_pkthdr.PH_loc.ptr = ni; lvif = VAP_TO_LVIF(ni->ni_vap); vif = LVIF_TO_VIF(lvif); hdr = (void *)skb->data; tid = linuxkpi_ieee80211_get_tid(hdr, true); if (tid == IEEE80211_NONQOS_TID) { /* == IEEE80211_NUM_TIDS */ skb->priority = 0; ac = IEEE80211_AC_BE; } else { skb->priority = tid & IEEE80211_QOS_CTL_TID_MASK; ac = ieee80211e_up_to_ac[tid & 7]; } skb_set_queue_mapping(skb, ac); info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; /* Slight delay; probably only happens on scanning so fine? */ if (c == NULL || c == IEEE80211_CHAN_ANYC) c = ic->ic_curchan; info->band = lkpi_net80211_chan_to_nl80211_band(c); info->hw_queue = vif->hw_queue[ac]; if (m->m_flags & M_EAPOL) info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; info->control.vif = vif; /* XXX-BZ info->control.rates */ lsta = lkpi_find_lsta_by_ni(lvif, ni); if (lsta != NULL) { sta = LSTA_TO_STA(lsta); #ifdef LKPI_80211_HW_CRYPTO info->control.hw_key = lsta->kc; #endif } else { sta = NULL; } IMPROVE(); if (sta != NULL) { struct lkpi_txq *ltxq; ltxq = NULL; if (!ieee80211_is_data_present(hdr->frame_control)) { if (vif->type == NL80211_IFTYPE_STATION && lsta->added_to_drv && sta->txq[IEEE80211_NUM_TIDS] != NULL) ltxq = TXQ_TO_LTXQ(sta->txq[IEEE80211_NUM_TIDS]); } else if (lsta->added_to_drv && sta->txq[skb->priority] != NULL) { ltxq = TXQ_TO_LTXQ(sta->txq[skb->priority]); } if (ltxq == NULL) goto ops_tx; KASSERT(ltxq != NULL, ("%s: lsta %p sta %p m %p skb %p " "ltxq %p != NULL\n", __func__, lsta, sta, m, skb, ltxq)); skb_queue_tail(<xq->skbq, skb); #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_TX) printf("%s:%d mo_wake_tx_queue :: %d %u lsta %p sta %p " "ni %p %6D skb %p lxtq %p { qlen %u, ac %d tid %u } " "WAKE_TX_Q ac %d prio %u qmap %u\n", __func__, __LINE__, curthread->td_tid, (unsigned int)ticks, lsta, sta, ni, ni->ni_macaddr, ":", skb, ltxq, skb_queue_len(<xq->skbq), ltxq->txq.ac, ltxq->txq.tid, ac, skb->priority, skb->qmap); #endif lkpi_80211_mo_wake_tx_queue(hw, <xq->txq); return; } ops_tx: #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_TX) printf("%s:%d mo_tx :: lsta %p sta %p ni %p %6D skb %p " "TX ac %d prio %u qmap %u\n", __func__, __LINE__, lsta, sta, ni, ni->ni_macaddr, ":", skb, ac, skb->priority, skb->qmap); #endif memset(&control, 0, sizeof(control)); control.sta = sta; lkpi_80211_mo_tx(hw, &control, skb); return; } static void lkpi_80211_txq_task(void *ctx, int pending) { struct lkpi_sta *lsta; struct mbufq mq; struct mbuf *m; lsta = ctx; #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_TX) printf("%s:%d lsta %p ni %p %6D pending %d mbuf_qlen %d\n", __func__, __LINE__, lsta, lsta->ni, lsta->ni->ni_macaddr, ":", pending, mbufq_len(&lsta->txq)); #endif mbufq_init(&mq, IFQ_MAXLEN); LKPI_80211_LSTA_LOCK(lsta); mbufq_concat(&mq, &lsta->txq); LKPI_80211_LSTA_UNLOCK(lsta); m = mbufq_dequeue(&mq); while (m != NULL) { lkpi_80211_txq_tx_one(lsta, m); m = mbufq_dequeue(&mq); } } static int lkpi_ic_transmit(struct ieee80211com *ic, struct mbuf *m) { /* XXX TODO */ IMPROVE(); /* Quick and dirty cheating hack. */ struct ieee80211_node *ni; ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; return (lkpi_ic_raw_xmit(ni, m, NULL)); } static void lkpi_ic_getradiocaps(struct ieee80211com *ic, int maxchan, int *n, struct ieee80211_channel *c) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct linuxkpi_ieee80211_channel *channels; uint8_t bands[IEEE80211_MODE_BYTES]; int chan_flags, error, i, nchans; /* Channels */ lhw = ic->ic_softc; hw = LHW_TO_HW(lhw); /* NL80211_BAND_2GHZ */ nchans = 0; if (hw->wiphy->bands[NL80211_BAND_2GHZ] != NULL) nchans = hw->wiphy->bands[NL80211_BAND_2GHZ]->n_channels; if (nchans > 0) { memset(bands, 0, sizeof(bands)); chan_flags = 0; setbit(bands, IEEE80211_MODE_11B); /* XXX-BZ unclear how to check for 11g. */ setbit(bands, IEEE80211_MODE_11G); #ifdef __notyet__ if (hw->wiphy->bands[NL80211_BAND_2GHZ]->ht_cap.ht_supported) { setbit(bands, IEEE80211_MODE_11NG); chan_flags |= NET80211_CBW_FLAG_HT40; } #endif channels = hw->wiphy->bands[NL80211_BAND_2GHZ]->channels; for (i = 0; i < nchans && *n < maxchan; i++) { uint32_t nflags = 0; int cflags = chan_flags; if (channels[i].flags & IEEE80211_CHAN_DISABLED) { ic_printf(ic, "%s: Skipping disabled chan " "[%u/%u/%#x]\n", __func__, channels[i].hw_value, channels[i].center_freq, channels[i].flags); continue; } if (channels[i].flags & IEEE80211_CHAN_NO_IR) nflags |= (IEEE80211_CHAN_NOADHOC|IEEE80211_CHAN_PASSIVE); if (channels[i].flags & IEEE80211_CHAN_RADAR) nflags |= IEEE80211_CHAN_DFS; if (channels[i].flags & IEEE80211_CHAN_NO_160MHZ) cflags &= ~(NET80211_CBW_FLAG_VHT160|NET80211_CBW_FLAG_VHT80P80); if (channels[i].flags & IEEE80211_CHAN_NO_80MHZ) cflags &= ~NET80211_CBW_FLAG_VHT80; /* XXX how to map the remaining enum ieee80211_channel_flags? */ if (channels[i].flags & IEEE80211_CHAN_NO_HT40) cflags &= ~NET80211_CBW_FLAG_HT40; error = ieee80211_add_channel_cbw(c, maxchan, n, channels[i].hw_value, channels[i].center_freq, channels[i].max_power, nflags, bands, cflags); /* net80211::ENOBUFS: *n >= maxchans */ if (error != 0 && error != ENOBUFS) ic_printf(ic, "%s: Adding chan %u/%u/%#x/%#x/%#x/%#x " "returned error %d\n", __func__, channels[i].hw_value, channels[i].center_freq, channels[i].flags, nflags, chan_flags, cflags, error); if (error != 0) break; } } /* NL80211_BAND_5GHZ */ nchans = 0; if (hw->wiphy->bands[NL80211_BAND_5GHZ] != NULL) nchans = hw->wiphy->bands[NL80211_BAND_5GHZ]->n_channels; if (nchans > 0) { memset(bands, 0, sizeof(bands)); chan_flags = 0; setbit(bands, IEEE80211_MODE_11A); #ifdef __not_yet__ if (hw->wiphy->bands[NL80211_BAND_5GHZ]->ht_cap.ht_supported) { setbit(bands, IEEE80211_MODE_11NA); chan_flags |= NET80211_CBW_FLAG_HT40; } if (hw->wiphy->bands[NL80211_BAND_5GHZ]->vht_cap.vht_supported){ ic->ic_flags_ext |= IEEE80211_FEXT_VHT; ic->ic_vht_cap.vht_cap_info = hw->wiphy->bands[NL80211_BAND_5GHZ]->vht_cap.cap; setbit(bands, IEEE80211_MODE_VHT_5GHZ); chan_flags |= NET80211_CBW_FLAG_VHT80; if (IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_IS_160MHZ( ic->ic_vht_cap.vht_cap_info)) chan_flags |= NET80211_CBW_FLAG_VHT160; if (IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_IS_160_80P80MHZ( ic->ic_vht_cap.vht_cap_info)) chan_flags |= NET80211_CBW_FLAG_VHT80P80; } #endif channels = hw->wiphy->bands[NL80211_BAND_5GHZ]->channels; for (i = 0; i < nchans && *n < maxchan; i++) { uint32_t nflags = 0; int cflags = chan_flags; if (channels[i].flags & IEEE80211_CHAN_DISABLED) { ic_printf(ic, "%s: Skipping disabled chan " "[%u/%u/%#x]\n", __func__, channels[i].hw_value, channels[i].center_freq, channels[i].flags); continue; } if (channels[i].flags & IEEE80211_CHAN_NO_IR) nflags |= (IEEE80211_CHAN_NOADHOC|IEEE80211_CHAN_PASSIVE); if (channels[i].flags & IEEE80211_CHAN_RADAR) nflags |= IEEE80211_CHAN_DFS; if (channels[i].flags & IEEE80211_CHAN_NO_160MHZ) cflags &= ~(NET80211_CBW_FLAG_VHT160|NET80211_CBW_FLAG_VHT80P80); if (channels[i].flags & IEEE80211_CHAN_NO_80MHZ) cflags &= ~NET80211_CBW_FLAG_VHT80; /* XXX hwo to map the remaining enum ieee80211_channel_flags? */ if (channels[i].flags & IEEE80211_CHAN_NO_HT40) cflags &= ~NET80211_CBW_FLAG_HT40; error = ieee80211_add_channel_cbw(c, maxchan, n, channels[i].hw_value, channels[i].center_freq, channels[i].max_power, nflags, bands, cflags); /* net80211::ENOBUFS: *n >= maxchans */ if (error != 0 && error != ENOBUFS) ic_printf(ic, "%s: Adding chan %u/%u/%#x/%#x/%#x/%#x " "returned error %d\n", __func__, channels[i].hw_value, channels[i].center_freq, channels[i].flags, nflags, chan_flags, cflags, error); if (error != 0) break; } } } static void * lkpi_ieee80211_ifalloc(void) { struct ieee80211com *ic; ic = malloc(sizeof(*ic), M_LKPI80211, M_WAITOK | M_ZERO); if (ic == NULL) return (NULL); /* Setting these happens later when we have device information. */ ic->ic_softc = NULL; ic->ic_name = "linuxkpi"; return (ic); } struct ieee80211_hw * linuxkpi_ieee80211_alloc_hw(size_t priv_len, const struct ieee80211_ops *ops) { struct ieee80211_hw *hw; struct lkpi_hw *lhw; struct wiphy *wiphy; int ac; /* Get us and the driver data also allocated. */ wiphy = wiphy_new(&linuxkpi_mac80211cfgops, sizeof(*lhw) + priv_len); if (wiphy == NULL) return (NULL); lhw = wiphy_priv(wiphy); lhw->ops = ops; LKPI_80211_LHW_LOCK_INIT(lhw); LKPI_80211_LHW_SCAN_LOCK_INIT(lhw); sx_init_flags(&lhw->lvif_sx, "lhw-lvif", SX_RECURSE | SX_DUPOK); TAILQ_INIT(&lhw->lvif_head); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { lhw->txq_generation[ac] = 1; TAILQ_INIT(&lhw->scheduled_txqs[ac]); } /* * XXX-BZ TODO make sure there is a "_null" function to all ops * not initialized. */ hw = LHW_TO_HW(lhw); hw->wiphy = wiphy; hw->conf.flags |= IEEE80211_CONF_IDLE; hw->priv = (void *)(lhw + 1); /* BSD Specific. */ lhw->ic = lkpi_ieee80211_ifalloc(); if (lhw->ic == NULL) { ieee80211_free_hw(hw); return (NULL); } IMPROVE(); return (hw); } void linuxkpi_ieee80211_iffree(struct ieee80211_hw *hw) { struct lkpi_hw *lhw; lhw = HW_TO_LHW(hw); free(lhw->ic, M_LKPI80211); lhw->ic = NULL; /* Cleanup more of lhw here or in wiphy_free()? */ sx_destroy(&lhw->lvif_sx); LKPI_80211_LHW_LOCK_DESTROY(lhw); LKPI_80211_LHW_SCAN_LOCK_DESTROY(lhw); IMPROVE(); } void linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw, char *name) { struct lkpi_hw *lhw; struct ieee80211com *ic; lhw = HW_TO_LHW(hw); ic = lhw->ic; /* Now set a proper name before ieee80211_ifattach(). */ ic->ic_softc = lhw; ic->ic_name = name; /* XXX-BZ do we also need to set wiphy name? */ } struct ieee80211_hw * linuxkpi_wiphy_to_ieee80211_hw(struct wiphy *wiphy) { struct lkpi_hw *lhw; lhw = wiphy_priv(wiphy); return (LHW_TO_HW(lhw)); } static void lkpi_radiotap_attach(struct lkpi_hw *lhw) { struct ieee80211com *ic; ic = lhw->ic; ieee80211_radiotap_attach(ic, &lhw->rtap_tx.wt_ihdr, sizeof(lhw->rtap_tx), LKPI_RTAP_TX_FLAGS_PRESENT, &lhw->rtap_rx.wr_ihdr, sizeof(lhw->rtap_rx), LKPI_RTAP_RX_FLAGS_PRESENT); } int linuxkpi_ieee80211_ifattach(struct ieee80211_hw *hw) { struct ieee80211com *ic; struct lkpi_hw *lhw; int band, i; lhw = HW_TO_LHW(hw); ic = lhw->ic; /* We do it this late as wiphy->dev should be set for the name. */ lhw->workq = alloc_ordered_workqueue(wiphy_name(hw->wiphy), 0); if (lhw->workq == NULL) return (-EAGAIN); /* XXX-BZ figure this out how they count his... */ if (!is_zero_ether_addr(hw->wiphy->perm_addr)) { IEEE80211_ADDR_COPY(ic->ic_macaddr, hw->wiphy->perm_addr); } else if (hw->wiphy->n_addresses > 0) { /* We take the first one. */ IEEE80211_ADDR_COPY(ic->ic_macaddr, hw->wiphy->addresses[0].addr); } else { ic_printf(ic, "%s: warning, no hardware address!\n", __func__); } #ifdef __not_yet__ /* See comment in lkpi_80211_txq_tx_one(). */ ic->ic_headroom = hw->extra_tx_headroom; #endif ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ ic->ic_opmode = IEEE80211_M_STA; /* Set device capabilities. */ /* XXX-BZ we need to get these from linux80211/drivers and convert. */ ic->ic_caps = IEEE80211_C_STA | IEEE80211_C_MONITOR | IEEE80211_C_WPA | /* WPA/RSN */ #ifdef LKPI_80211_WME IEEE80211_C_WME | #endif #if 0 IEEE80211_C_PMGT | #endif IEEE80211_C_SHSLOT | /* short slot time supported */ IEEE80211_C_SHPREAMBLE /* short preamble supported */ ; #if 0 /* Scanning is a different kind of beast to re-work. */ ic->ic_caps |= IEEE80211_C_BGSCAN; #endif if (lhw->ops->hw_scan) { /* * Advertise full-offload scanning. * * Not limiting to SINGLE_SCAN_ON_ALL_BANDS here as otherwise * we essentially disable hw_scan for all drivers not setting * the flag. */ ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD; lhw->scan_flags |= LKPI_LHW_SCAN_HW; } #ifdef __notyet__ ic->ic_htcaps = IEEE80211_HTC_HT /* HT operation */ | IEEE80211_HTC_AMPDU /* A-MPDU tx/rx */ | IEEE80211_HTC_AMSDU /* A-MSDU tx/rx */ | IEEE80211_HTCAP_MAXAMSDU_3839 /* max A-MSDU length */ | IEEE80211_HTCAP_SMPS_OFF; /* SM power save off */ ic->ic_htcaps |= IEEE80211_HTCAP_SHORTGI20; ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40 | IEEE80211_HTCAP_SHORTGI40; ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC; #endif /* * The wiphy variables report bitmasks of avail antennas. * (*get_antenna) get the current bitmask sets which can be * altered by (*set_antenna) for some drivers. * XXX-BZ will the count alone do us much good long-term in net80211? */ if (hw->wiphy->available_antennas_rx || hw->wiphy->available_antennas_tx) { uint32_t rxs, txs; if (lkpi_80211_mo_get_antenna(hw, &txs, &rxs) == 0) { ic->ic_rxstream = bitcount32(rxs); ic->ic_txstream = bitcount32(txs); } } ic->ic_cryptocaps = 0; #ifdef LKPI_80211_HW_CRYPTO if (hw->wiphy->n_cipher_suites > 0) { for (i = 0; i < hw->wiphy->n_cipher_suites; i++) ic->ic_cryptocaps |= lkpi_l80211_to_net80211_cyphers( hw->wiphy->cipher_suites[i]); } #endif lkpi_ic_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); ieee80211_ifattach(ic); ic->ic_update_mcast = lkpi_ic_update_mcast; ic->ic_update_promisc = lkpi_ic_update_promisc; ic->ic_update_chw = lkpi_ic_update_chw; ic->ic_parent = lkpi_ic_parent; ic->ic_scan_start = lkpi_ic_scan_start; ic->ic_scan_end = lkpi_ic_scan_end; ic->ic_set_channel = lkpi_ic_set_channel; ic->ic_transmit = lkpi_ic_transmit; ic->ic_raw_xmit = lkpi_ic_raw_xmit; ic->ic_vap_create = lkpi_ic_vap_create; ic->ic_vap_delete = lkpi_ic_vap_delete; ic->ic_getradiocaps = lkpi_ic_getradiocaps; ic->ic_wme.wme_update = lkpi_ic_wme_update; lhw->ic_scan_curchan = ic->ic_scan_curchan; ic->ic_scan_curchan = lkpi_ic_scan_curchan; lhw->ic_scan_mindwell = ic->ic_scan_mindwell; ic->ic_scan_mindwell = lkpi_ic_scan_mindwell; lhw->ic_node_alloc = ic->ic_node_alloc; ic->ic_node_alloc = lkpi_ic_node_alloc; lhw->ic_node_init = ic->ic_node_init; ic->ic_node_init = lkpi_ic_node_init; lhw->ic_node_cleanup = ic->ic_node_cleanup; ic->ic_node_cleanup = lkpi_ic_node_cleanup; lhw->ic_node_free = ic->ic_node_free; ic->ic_node_free = lkpi_ic_node_free; lkpi_radiotap_attach(lhw); /* * Assign the first possible channel for now; seems Realtek drivers * expect one. * Also remember the amount of bands we support and the most rates * in any band so we can scale [(ext) sup rates] IE(s) accordingly. */ lhw->supbands = lhw->max_rates = 0; for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *supband; struct linuxkpi_ieee80211_channel *channels; supband = hw->wiphy->bands[band]; if (supband == NULL || supband->n_channels == 0) continue; lhw->supbands++; lhw->max_rates = max(lhw->max_rates, supband->n_bitrates); /* If we have a channel, we need to keep counting supbands. */ if (hw->conf.chandef.chan != NULL) continue; channels = supband->channels; for (i = 0; i < supband->n_channels; i++) { if (channels[i].flags & IEEE80211_CHAN_DISABLED) continue; cfg80211_chandef_create(&hw->conf.chandef, &channels[i], NL80211_CHAN_NO_HT); break; } } IMPROVE("see net80211::ieee80211_chan_init vs. wiphy->bands[].bitrates possibly in lkpi_ic_getradiocaps?"); /* Make sure we do not support more than net80211 is willing to take. */ if (lhw->max_rates > IEEE80211_RATE_MAXSIZE) { ic_printf(ic, "%s: limiting max_rates %d to %d!\n", __func__, lhw->max_rates, IEEE80211_RATE_MAXSIZE); lhw->max_rates = IEEE80211_RATE_MAXSIZE; } /* * The maximum supported bitrates on any band + size for * DSSS Parameter Set give our per-band IE size. * XXX-BZ FIXME add HT VHT ... later * SSID is the responsibility of the driver and goes on the side. * The user specified bits coming from the vap go into the * "common ies" fields. */ lhw->scan_ie_len = 2 + IEEE80211_RATE_SIZE; if (lhw->max_rates > IEEE80211_RATE_SIZE) lhw->scan_ie_len += 2 + (lhw->max_rates - IEEE80211_RATE_SIZE); if (hw->wiphy->features & NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) { /* * net80211 does not seem to support the DSSS Parameter Set but * some of the drivers insert it so calculate the extra fixed * space in. */ lhw->scan_ie_len += 2 + 1; } /* Reduce the max_scan_ie_len "left" by the amount we consume already. */ if (hw->wiphy->max_scan_ie_len > 0) { if (lhw->scan_ie_len > hw->wiphy->max_scan_ie_len) goto err; hw->wiphy->max_scan_ie_len -= lhw->scan_ie_len; } if (bootverbose) ieee80211_announce(ic); return (0); err: IMPROVE("TODO FIXME CLEANUP"); return (-EAGAIN); } void linuxkpi_ieee80211_ifdetach(struct ieee80211_hw *hw) { struct lkpi_hw *lhw; struct ieee80211com *ic; lhw = HW_TO_LHW(hw); ic = lhw->ic; ieee80211_ifdetach(ic); } void linuxkpi_ieee80211_iterate_interfaces(struct ieee80211_hw *hw, enum ieee80211_iface_iter flags, void(*iterfunc)(void *, uint8_t *, struct ieee80211_vif *), void *arg) { struct lkpi_hw *lhw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; bool active, atomic, nin_drv; lhw = HW_TO_LHW(hw); if (flags & ~(IEEE80211_IFACE_ITER_NORMAL| IEEE80211_IFACE_ITER_RESUME_ALL| IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER| IEEE80211_IFACE_ITER_ACTIVE|IEEE80211_IFACE_ITER__ATOMIC)) { ic_printf(lhw->ic, "XXX TODO %s flags(%#x) not yet supported.\n", __func__, flags); } active = (flags & IEEE80211_IFACE_ITER_ACTIVE) != 0; atomic = (flags & IEEE80211_IFACE_ITER__ATOMIC) != 0; nin_drv = (flags & IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER) != 0; if (atomic) LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { struct ieee80211vap *vap; vif = LVIF_TO_VIF(lvif); /* * If we want "active" interfaces, we need to distinguish on * whether the driver knows about them or not to be able to * handle the "resume" case correctly. Skip the ones the * driver does not know about. */ if (active && !lvif->added_to_drv && (flags & IEEE80211_IFACE_ITER_RESUME_ALL) != 0) continue; /* * If we shall skip interfaces not added to the driver do so * if we haven't yet. */ if (nin_drv && !lvif->added_to_drv) continue; /* * Run the iterator function if we are either not asking * asking for active only or if the VAP is "running". */ /* XXX-BZ probably should have state in the lvif as well. */ vap = LVIF_TO_VAP(lvif); if (!active || (vap->iv_state != IEEE80211_S_INIT)) iterfunc(arg, vif->addr, vif); } if (atomic) LKPI_80211_LHW_LVIF_UNLOCK(lhw); } void linuxkpi_ieee80211_iterate_keys(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void(*iterfunc)(struct ieee80211_hw *, struct ieee80211_vif *, struct ieee80211_sta *, struct ieee80211_key_conf *, void *), void *arg) { UNIMPLEMENTED; } void linuxkpi_ieee80211_iterate_chan_contexts(struct ieee80211_hw *hw, void(*iterfunc)(struct ieee80211_hw *, struct ieee80211_chanctx_conf *, void *), void *arg) { struct lkpi_hw *lhw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct lkpi_chanctx *lchanctx; KASSERT(hw != NULL && iterfunc != NULL, ("%s: hw %p iterfunc %p arg %p\n", __func__, hw, iterfunc, arg)); lhw = HW_TO_LHW(hw); IMPROVE("lchanctx should be its own list somewhere"); LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { vif = LVIF_TO_VIF(lvif); if (vif->chanctx_conf == NULL) continue; lchanctx = CHANCTX_CONF_TO_LCHANCTX(vif->chanctx_conf); if (!lchanctx->added_to_drv) continue; iterfunc(hw, &lchanctx->conf, arg); } LKPI_80211_LHW_LVIF_UNLOCK(lhw); } void linuxkpi_ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, void (*iterfunc)(void *, struct ieee80211_sta *), void *arg) { struct lkpi_hw *lhw; struct lkpi_vif *lvif; struct lkpi_sta *lsta; struct ieee80211_sta *sta; KASSERT(hw != NULL && iterfunc != NULL, ("%s: hw %p iterfunc %p arg %p\n", __func__, hw, iterfunc, arg)); lhw = HW_TO_LHW(hw); LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { LKPI_80211_LVIF_LOCK(lvif); TAILQ_FOREACH(lsta, &lvif->lsta_head, lsta_entry) { if (!lsta->added_to_drv) continue; sta = LSTA_TO_STA(lsta); iterfunc(arg, sta); } LKPI_80211_LVIF_UNLOCK(lvif); } LKPI_80211_LHW_LVIF_UNLOCK(lhw); } struct linuxkpi_ieee80211_regdomain * lkpi_get_linuxkpi_ieee80211_regdomain(size_t n) { struct linuxkpi_ieee80211_regdomain *regd; regd = kzalloc(sizeof(*regd) + n * sizeof(struct ieee80211_reg_rule), GFP_KERNEL); return (regd); } int linuxkpi_regulatory_set_wiphy_regd_sync(struct wiphy *wiphy, struct linuxkpi_ieee80211_regdomain *regd) { struct lkpi_hw *lhw; struct ieee80211com *ic; struct ieee80211_regdomain *rd; lhw = wiphy_priv(wiphy); ic = lhw->ic; rd = &ic->ic_regdomain; if (rd->isocc[0] == '\0') { rd->isocc[0] = regd->alpha2[0]; rd->isocc[1] = regd->alpha2[1]; } TODO(); /* XXX-BZ finish the rest. */ return (0); } void linuxkpi_ieee80211_scan_completed(struct ieee80211_hw *hw, struct cfg80211_scan_info *info) { struct lkpi_hw *lhw; struct ieee80211com *ic; struct ieee80211_scan_state *ss; lhw = wiphy_priv(hw->wiphy); ic = lhw->ic; ss = ic->ic_scan; ieee80211_scan_done(ss->ss_vap); LKPI_80211_LHW_SCAN_LOCK(lhw); free(lhw->hw_req, M_LKPI80211); lhw->hw_req = NULL; lhw->scan_flags &= ~LKPI_LHW_SCAN_RUNNING; wakeup(lhw); LKPI_80211_LHW_SCAN_UNLOCK(lhw); return; } /* For %list see comment towards the end of the function. */ void linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_sta *sta, struct napi_struct *napi __unused, struct list_head *list __unused) { struct epoch_tracker et; struct lkpi_hw *lhw; struct ieee80211com *ic; struct mbuf *m; struct skb_shared_info *shinfo; struct ieee80211_rx_status *rx_status; struct ieee80211_rx_stats rx_stats; struct ieee80211_node *ni; struct ieee80211vap *vap; struct ieee80211_hdr *hdr; struct lkpi_sta *lsta; int i, offset, ok; int8_t rssi; bool is_beacon; if (skb->len < 2) { /* Need 80211 stats here. */ IMPROVE(); goto err; } /* * For now do the data copy; we can later improve things. Might even * have an mbuf backing the skb data then? */ m = m_get2(skb->len, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto err; m_copyback(m, 0, skb->tail - skb->data, skb->data); shinfo = skb_shinfo(skb); offset = m->m_len; for (i = 0; i < shinfo->nr_frags; i++) { m_copyback(m, offset, shinfo->frags[i].size, (uint8_t *)linux_page_address(shinfo->frags[i].page) + shinfo->frags[i].offset); offset += shinfo->frags[i].size; } rx_status = IEEE80211_SKB_RXCB(skb); hdr = (void *)skb->data; is_beacon = ieee80211_is_beacon(hdr->frame_control); #ifdef LINUXKPI_DEBUG_80211 if (is_beacon && (linuxkpi_debug_80211 & D80211_TRACE_RX_BEACONS) == 0) goto no_trace_beacons; if (linuxkpi_debug_80211 & D80211_TRACE_RX) printf("TRACE-RX: %s: skb %p a/l/d/t-len (%u/%u/%u/%u) " "h %p d %p t %p e %p sh %p (%u) m %p plen %u len %u%s\n", __func__, skb, skb->_alloc_len, skb->len, skb->data_len, skb->truesize, skb->head, skb->data, skb->tail, skb->end, shinfo, shinfo->nr_frags, m, m->m_pkthdr.len, m->m_len, is_beacon ? " beacon" : ""); if (linuxkpi_debug_80211 & D80211_TRACE_RX_DUMP) hexdump(mtod(m, const void *), m->m_len, "RX (raw) ", 0); /* Implement a dump_rxcb() !!! */ if (linuxkpi_debug_80211 & D80211_TRACE_RX) printf("TRACE %s: RXCB: %ju %ju %u, %#0x, %u, %#0x, %#0x, " "%u band %u, %u { %d %d %d %d }, %d, %#x %#x %#x %#x %u %u %u\n", __func__, (uintmax_t)rx_status->boottime_ns, (uintmax_t)rx_status->mactime, rx_status->device_timestamp, rx_status->flag, rx_status->freq, rx_status->bw, rx_status->encoding, rx_status->ampdu_reference, rx_status->band, rx_status->chains, rx_status->chain_signal[0], rx_status->chain_signal[1], rx_status->chain_signal[2], rx_status->chain_signal[3], rx_status->signal, rx_status->enc_flags, rx_status->he_dcm, rx_status->he_gi, rx_status->he_ru, rx_status->zero_length_psdu_type, rx_status->nss, rx_status->rate_idx); no_trace_beacons: #endif memset(&rx_stats, 0, sizeof(rx_stats)); rx_stats.r_flags = IEEE80211_R_NF | IEEE80211_R_RSSI; /* XXX-BZ correct hardcoded rssi and noise floor, how? survey? */ rx_stats.c_nf = -96; if (ieee80211_hw_check(hw, SIGNAL_DBM) && !(rx_status->flag & RX_FLAG_NO_SIGNAL_VAL)) rssi = rx_status->signal; else rssi = rx_stats.c_nf; /* * net80211 signal strength data are in .5 dBm units relative to * the current noise floor (see comment in ieee80211_node.h). */ rssi -= rx_stats.c_nf; rx_stats.c_rssi = rssi * 2; rx_stats.r_flags |= IEEE80211_R_BAND; rx_stats.c_band = lkpi_nl80211_band_to_net80211_band(rx_status->band); rx_stats.r_flags |= IEEE80211_R_FREQ | IEEE80211_R_IEEE; rx_stats.c_freq = rx_status->freq; rx_stats.c_ieee = ieee80211_mhz2ieee(rx_stats.c_freq, rx_stats.c_band); /* XXX (*sta_statistics)() to get to some of that? */ /* XXX-BZ dump the FreeBSD version of rx_stats as well! */ lhw = HW_TO_LHW(hw); ic = lhw->ic; ok = ieee80211_add_rx_params(m, &rx_stats); if (ok == 0) { m_freem(m); counter_u64_add(ic->ic_ierrors, 1); goto err; } if (sta != NULL) { lsta = STA_TO_LSTA(sta); ni = ieee80211_ref_node(lsta->ni); } else { struct ieee80211_frame_min *wh; wh = mtod(m, struct ieee80211_frame_min *); ni = ieee80211_find_rxnode(ic, wh); if (ni != NULL) lsta = ni->ni_drv_data; } if (ni != NULL) vap = ni->ni_vap; else /* * XXX-BZ can we improve this by looking at the frame hdr * or other meta-data passed up? */ vap = TAILQ_FIRST(&ic->ic_vaps); #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_RX) printf("TRACE %s: sta %p lsta %p state %d ni %p vap %p%s\n", __func__, sta, lsta, (lsta != NULL) ? lsta->state : -1, ni, vap, is_beacon ? " beacon" : ""); #endif if (ni != NULL && vap != NULL && is_beacon && rx_status->device_timestamp > 0 && m->m_pkthdr.len >= sizeof(struct ieee80211_frame)) { struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct ieee80211_frame *wh; wh = mtod(m, struct ieee80211_frame *); if (!IEEE80211_ADDR_EQ(wh->i_addr2, ni->ni_bssid)) goto skip_device_ts; lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); IMPROVE("TIMING_BEACON_ONLY?"); /* mac80211 specific (not net80211) so keep it here. */ vif->bss_conf.sync_device_ts = rx_status->device_timestamp; /* * net80211 should take care of the other information (sync_tsf, * sync_dtim_count) as otherwise we need to parse the beacon. */ } skip_device_ts: if (vap != NULL && vap->iv_state > IEEE80211_S_INIT && ieee80211_radiotap_active_vap(vap)) { struct lkpi_radiotap_rx_hdr *rtap; rtap = &lhw->rtap_rx; rtap->wr_tsft = rx_status->device_timestamp; rtap->wr_flags = 0; if (rx_status->enc_flags & RX_ENC_FLAG_SHORTPRE) rtap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE; if (rx_status->enc_flags & RX_ENC_FLAG_SHORT_GI) rtap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTGI; #if 0 /* .. or it does not given we strip it below. */ if (ieee80211_hw_check(hw, RX_INCLUDES_FCS)) rtap->wr_flags |= IEEE80211_RADIOTAP_F_FCS; #endif if (rx_status->flag & RX_FLAG_FAILED_FCS_CRC) rtap->wr_flags |= IEEE80211_RADIOTAP_F_BADFCS; rtap->wr_rate = 0; IMPROVE(); /* XXX TODO status->encoding / rate_index / bw */ rtap->wr_chan_freq = htole16(rx_stats.c_freq); if (ic->ic_curchan->ic_ieee == rx_stats.c_ieee) rtap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags); rtap->wr_dbm_antsignal = rssi; rtap->wr_dbm_antnoise = rx_stats.c_nf; } if (ieee80211_hw_check(hw, RX_INCLUDES_FCS)) m_adj(m, -IEEE80211_CRC_LEN); #if 0 if (list != NULL) { /* * Normally this would be queued up and delivered by * netif_receive_skb_list(), napi_gro_receive(), or the like. * See mt76::mac80211.c as only current possible consumer. */ IMPROVE("we simply pass the packet to net80211 to deal with."); } #endif NET_EPOCH_ENTER(et); if (ni != NULL) { ok = ieee80211_input_mimo(ni, m); ieee80211_free_node(ni); if (ok < 0) m_freem(m); } else { ok = ieee80211_input_mimo_all(ic, m); /* mbuf got consumed. */ } NET_EPOCH_EXIT(et); #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_RX) printf("TRACE %s: handled frame type %#0x\n", __func__, ok); #endif IMPROVE(); err: /* The skb is ours so we can free it :-) */ kfree_skb(skb); } uint8_t linuxkpi_ieee80211_get_tid(struct ieee80211_hdr *hdr, bool nonqos_ok) { const struct ieee80211_frame *wh; uint8_t tid; /* Linux seems to assume this is a QOS-Data-Frame */ KASSERT(nonqos_ok || ieee80211_is_data_qos(hdr->frame_control), ("%s: hdr %p fc %#06x not qos_data\n", __func__, hdr, hdr->frame_control)); wh = (const struct ieee80211_frame *)hdr; tid = ieee80211_gettid(wh); KASSERT(nonqos_ok || tid == (tid & IEEE80211_QOS_TID), ("%s: tid %u " "not expected (%u?)\n", __func__, tid, IEEE80211_NONQOS_TID)); return (tid); } struct wiphy * linuxkpi_wiphy_new(const struct cfg80211_ops *ops, size_t priv_len) { struct lkpi_wiphy *lwiphy; lwiphy = kzalloc(sizeof(*lwiphy) + priv_len, GFP_KERNEL); if (lwiphy == NULL) return (NULL); lwiphy->ops = ops; /* XXX TODO */ return (LWIPHY_TO_WIPHY(lwiphy)); } void linuxkpi_wiphy_free(struct wiphy *wiphy) { struct lkpi_wiphy *lwiphy; if (wiphy == NULL) return; lwiphy = WIPHY_TO_LWIPHY(wiphy); kfree(lwiphy); } uint32_t linuxkpi_ieee80211_channel_to_frequency(uint32_t channel, enum nl80211_band band) { switch (band) { case NL80211_BAND_2GHZ: return (ieee80211_ieee2mhz(channel, IEEE80211_CHAN_2GHZ)); break; case NL80211_BAND_5GHZ: return (ieee80211_ieee2mhz(channel, IEEE80211_CHAN_5GHZ)); break; default: /* XXX abort, retry, error, panic? */ break; } return (0); } uint32_t linuxkpi_ieee80211_frequency_to_channel(uint32_t freq, uint32_t flags __unused) { return (ieee80211_mhz2ieee(freq, 0)); } static struct lkpi_sta * lkpi_find_lsta_by_ni(struct lkpi_vif *lvif, struct ieee80211_node *ni) { struct lkpi_sta *lsta, *temp; LKPI_80211_LVIF_LOCK(lvif); TAILQ_FOREACH_SAFE(lsta, &lvif->lsta_head, lsta_entry, temp) { if (lsta->ni == ni) { LKPI_80211_LVIF_UNLOCK(lvif); return (lsta); } } LKPI_80211_LVIF_UNLOCK(lvif); return (NULL); } struct ieee80211_sta * linuxkpi_ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *peer) { struct lkpi_vif *lvif; struct lkpi_sta *lsta, *temp; struct ieee80211_sta *sta; lvif = VIF_TO_LVIF(vif); LKPI_80211_LVIF_LOCK(lvif); TAILQ_FOREACH_SAFE(lsta, &lvif->lsta_head, lsta_entry, temp) { sta = LSTA_TO_STA(lsta); if (IEEE80211_ADDR_EQ(sta->addr, peer)) { LKPI_80211_LVIF_UNLOCK(lvif); return (sta); } } LKPI_80211_LVIF_UNLOCK(lvif); return (NULL); } struct ieee80211_sta * linuxkpi_ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const uint8_t *addr, const uint8_t *ourvifaddr) { struct lkpi_hw *lhw; struct lkpi_vif *lvif; struct lkpi_sta *lsta; struct ieee80211_vif *vif; struct ieee80211_sta *sta; lhw = wiphy_priv(hw->wiphy); sta = NULL; LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { /* XXX-BZ check our address from the vif. */ vif = LVIF_TO_VIF(lvif); if (ourvifaddr != NULL && !IEEE80211_ADDR_EQ(vif->addr, ourvifaddr)) continue; sta = linuxkpi_ieee80211_find_sta(vif, addr); if (sta != NULL) break; } LKPI_80211_LHW_LVIF_UNLOCK(lhw); if (sta != NULL) { lsta = STA_TO_LSTA(sta); if (!lsta->added_to_drv) return (NULL); } return (sta); } struct sk_buff * linuxkpi_ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { struct lkpi_txq *ltxq; struct lkpi_vif *lvif; struct sk_buff *skb; skb = NULL; ltxq = TXQ_TO_LTXQ(txq); ltxq->seen_dequeue = true; if (ltxq->stopped) goto stopped; lvif = VIF_TO_LVIF(ltxq->txq.vif); if (lvif->hw_queue_stopped[ltxq->txq.ac]) { ltxq->stopped = true; goto stopped; } skb = skb_dequeue(<xq->skbq); stopped: return (skb); } void linuxkpi_ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *frame_cnt, unsigned long *byte_cnt) { struct lkpi_txq *ltxq; struct sk_buff *skb; unsigned long fc, bc; ltxq = TXQ_TO_LTXQ(txq); fc = bc = 0; skb_queue_walk(<xq->skbq, skb) { fc++; bc += skb->len; } if (frame_cnt) *frame_cnt = fc; if (byte_cnt) *byte_cnt = bc; /* Validate that this is doing the correct thing. */ /* Should we keep track on en/dequeue? */ IMPROVE(); } /* * We are called from ieee80211_free_txskb() or ieee80211_tx_status(). * The latter tries to derive the success status from the info flags * passed back from the driver. rawx_mit() saves the ni on the m and the * m on the skb for us to be able to give feedback to net80211. */ static void _lkpi_ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb, int status) { struct ieee80211_node *ni; struct mbuf *m; m = skb->m; skb->m = NULL; if (m != NULL) { ni = m->m_pkthdr.PH_loc.ptr; /* Status: 0 is ok, != 0 is error. */ ieee80211_tx_complete(ni, m, status); /* ni & mbuf were consumed. */ } } void linuxkpi_ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb, int status) { _lkpi_ieee80211_free_txskb(hw, skb, status); kfree_skb(skb); } void linuxkpi_ieee80211_tx_status_ext(struct ieee80211_hw *hw, struct ieee80211_tx_status *txstat) { struct sk_buff *skb; struct ieee80211_tx_info *info; struct ieee80211_ratectl_tx_status txs; struct ieee80211_node *ni; int status; skb = txstat->skb; if (skb->m != NULL) { struct mbuf *m; m = skb->m; ni = m->m_pkthdr.PH_loc.ptr; memset(&txs, 0, sizeof(txs)); } else { ni = NULL; } info = txstat->info; if (info->flags & IEEE80211_TX_STAT_ACK) { status = 0; /* No error. */ txs.status = IEEE80211_RATECTL_TX_SUCCESS; } else { status = 1; txs.status = IEEE80211_RATECTL_TX_FAIL_UNSPECIFIED; } if (ni != NULL) { int ridx __unused; #ifdef LINUXKPI_DEBUG_80211 int old_rate; old_rate = ni->ni_vap->iv_bss->ni_txrate; #endif txs.pktlen = skb->len; txs.flags |= IEEE80211_RATECTL_STATUS_PKTLEN; if (info->status.rates[0].count > 1) { txs.long_retries = info->status.rates[0].count - 1; /* 1 + retries in drivers. */ txs.flags |= IEEE80211_RATECTL_STATUS_LONG_RETRY; } #if 0 /* Unused in net80211 currently. */ /* XXX-BZ convert check .flags for MCS/VHT/.. */ txs.final_rate = info->status.rates[0].idx; txs.flags |= IEEE80211_RATECTL_STATUS_FINAL_RATE; #endif if (info->status.flags & IEEE80211_TX_STATUS_ACK_SIGNAL_VALID) { txs.rssi = info->status.ack_signal; /* XXX-BZ CONVERT? */ txs.flags |= IEEE80211_RATECTL_STATUS_RSSI; } IMPROVE("only update of rate matches but that requires us to get a proper rate"); ieee80211_ratectl_tx_complete(ni, &txs); ridx = ieee80211_ratectl_rate(ni->ni_vap->iv_bss, NULL, 0); #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_TX) { printf("TX-RATE: %s: old %d new %d ridx %d, " "long_retries %d\n", __func__, old_rate, ni->ni_vap->iv_bss->ni_txrate, ridx, txs.long_retries); } #endif } #ifdef LINUXKPI_DEBUG_80211 if (linuxkpi_debug_80211 & D80211_TRACE_TX) printf("TX-STATUS: %s: hw %p skb %p status %d : flags %#x " "band %u hw_queue %u tx_time_est %d : " "rates [ %u %u %#x, %u %u %#x, %u %u %#x, %u %u %#x ] " "ack_signal %u ampdu_ack_len %u ampdu_len %u antenna %u " "tx_time %u flags %#x " "status_driver_data [ %p %p ]\n", __func__, hw, skb, status, info->flags, info->band, info->hw_queue, info->tx_time_est, info->status.rates[0].idx, info->status.rates[0].count, info->status.rates[0].flags, info->status.rates[1].idx, info->status.rates[1].count, info->status.rates[1].flags, info->status.rates[2].idx, info->status.rates[2].count, info->status.rates[2].flags, info->status.rates[3].idx, info->status.rates[3].count, info->status.rates[3].flags, info->status.ack_signal, info->status.ampdu_ack_len, info->status.ampdu_len, info->status.antenna, info->status.tx_time, info->status.flags, info->status.status_driver_data[0], info->status.status_driver_data[1]); #endif if (txstat->free_list) { _lkpi_ieee80211_free_txskb(hw, skb, status); list_add_tail(&skb->list, txstat->free_list); } else { linuxkpi_ieee80211_free_txskb(hw, skb, status); } } void linuxkpi_ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct ieee80211_tx_status status; memset(&status, 0, sizeof(status)); status.info = IEEE80211_SKB_CB(skb); status.skb = skb; /* sta, n_rates, rates, free_list? */ ieee80211_tx_status_ext(hw, &status); } /* * This is an internal bandaid for the moment for the way we glue * skbs and mbufs together for TX. Once we have skbs backed by * mbufs this should go away. * This is a public function but kept on the private KPI (lkpi_) * and is not exposed by a header file. */ static void lkpi_ieee80211_free_skb_mbuf(void *p) { struct ieee80211_node *ni; struct mbuf *m; if (p == NULL) return; m = (struct mbuf *)p; M_ASSERTPKTHDR(m); ni = m->m_pkthdr.PH_loc.ptr; m->m_pkthdr.PH_loc.ptr = NULL; if (ni != NULL) ieee80211_free_node(ni); m_freem(m); } void linuxkpi_ieee80211_queue_delayed_work(struct ieee80211_hw *hw, struct delayed_work *w, int delay) { struct lkpi_hw *lhw; /* Need to make sure hw is in a stable (non-suspended) state. */ IMPROVE(); lhw = HW_TO_LHW(hw); queue_delayed_work(lhw->workq, w, delay); } void linuxkpi_ieee80211_queue_work(struct ieee80211_hw *hw, struct work_struct *w) { struct lkpi_hw *lhw; /* Need to make sure hw is in a stable (non-suspended) state. */ IMPROVE(); lhw = HW_TO_LHW(hw); queue_work(lhw->workq, w); } struct sk_buff * linuxkpi_ieee80211_probereq_get(struct ieee80211_hw *hw, uint8_t *addr, uint8_t *ssid, size_t ssid_len, size_t tailroom) { struct sk_buff *skb; struct ieee80211_frame *wh; uint8_t *p; size_t len; len = sizeof(*wh); len += 2 + ssid_len; skb = dev_alloc_skb(hw->extra_tx_headroom + len + tailroom); if (skb == NULL) return (NULL); skb_reserve(skb, hw->extra_tx_headroom); wh = skb_put_zero(skb, sizeof(*wh)); wh->i_fc[0] = IEEE80211_FC0_VERSION_0; wh->i_fc[0] |= IEEE80211_FC0_SUBTYPE_PROBE_REQ | IEEE80211_FC0_TYPE_MGT; IEEE80211_ADDR_COPY(wh->i_addr1, ieee80211broadcastaddr); IEEE80211_ADDR_COPY(wh->i_addr2, addr); IEEE80211_ADDR_COPY(wh->i_addr3, ieee80211broadcastaddr); p = skb_put(skb, 2 + ssid_len); *p++ = IEEE80211_ELEMID_SSID; *p++ = ssid_len; if (ssid_len > 0) memcpy(p, ssid, ssid_len); return (skb); } struct sk_buff * linuxkpi_ieee80211_pspoll_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct lkpi_vif *lvif; struct ieee80211vap *vap; struct sk_buff *skb; struct ieee80211_frame_pspoll *psp; uint16_t v; skb = dev_alloc_skb(hw->extra_tx_headroom + sizeof(*psp)); if (skb == NULL) return (NULL); skb_reserve(skb, hw->extra_tx_headroom); lvif = VIF_TO_LVIF(vif); vap = LVIF_TO_VAP(lvif); psp = skb_put_zero(skb, sizeof(*psp)); psp->i_fc[0] = IEEE80211_FC0_VERSION_0; psp->i_fc[0] |= IEEE80211_FC0_SUBTYPE_PS_POLL | IEEE80211_FC0_TYPE_CTL; v = htole16(vif->cfg.aid | 1<<15 | 1<<16); memcpy(&psp->i_aid, &v, sizeof(v)); IEEE80211_ADDR_COPY(psp->i_bssid, vap->iv_bss->ni_macaddr); IEEE80211_ADDR_COPY(psp->i_ta, vif->addr); return (skb); } struct sk_buff * linuxkpi_ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int linkid, bool qos) { struct lkpi_vif *lvif; struct ieee80211vap *vap; struct sk_buff *skb; struct ieee80211_frame *nullf; IMPROVE("linkid"); skb = dev_alloc_skb(hw->extra_tx_headroom + sizeof(*nullf)); if (skb == NULL) return (NULL); skb_reserve(skb, hw->extra_tx_headroom); lvif = VIF_TO_LVIF(vif); vap = LVIF_TO_VAP(lvif); nullf = skb_put_zero(skb, sizeof(*nullf)); nullf->i_fc[0] = IEEE80211_FC0_VERSION_0; nullf->i_fc[0] |= IEEE80211_FC0_SUBTYPE_NODATA | IEEE80211_FC0_TYPE_DATA; nullf->i_fc[1] = IEEE80211_FC1_DIR_TODS; IEEE80211_ADDR_COPY(nullf->i_addr1, vap->iv_bss->ni_bssid); IEEE80211_ADDR_COPY(nullf->i_addr2, vif->addr); IEEE80211_ADDR_COPY(nullf->i_addr3, vap->iv_bss->ni_macaddr); return (skb); } struct wireless_dev * linuxkpi_ieee80211_vif_to_wdev(struct ieee80211_vif *vif) { struct lkpi_vif *lvif; lvif = VIF_TO_LVIF(vif); return (&lvif->wdev); } void linuxkpi_ieee80211_connection_loss(struct ieee80211_vif *vif) { struct lkpi_vif *lvif; struct ieee80211vap *vap; enum ieee80211_state nstate; int arg; lvif = VIF_TO_LVIF(vif); vap = LVIF_TO_VAP(lvif); /* * Go to init; otherwise we need to elaborately check state and * handle accordingly, e.g., if in RUN we could call iv_bmiss. * Let the statemachine handle all neccessary changes. */ nstate = IEEE80211_S_INIT; arg = 0; /* Not a valid reason. */ ic_printf(vap->iv_ic, "%s: vif %p vap %p state %s\n", __func__, vif, vap, ieee80211_state_name[vap->iv_state]); ieee80211_new_state(vap, nstate, arg); } void linuxkpi_ieee80211_beacon_loss(struct ieee80211_vif *vif) { struct lkpi_vif *lvif; struct ieee80211vap *vap; lvif = VIF_TO_LVIF(vif); vap = LVIF_TO_VAP(lvif); ic_printf(vap->iv_ic, "%s: vif %p vap %p state %s\n", __func__, vif, vap, ieee80211_state_name[vap->iv_state]); ieee80211_beacon_miss(vap->iv_ic); } /* -------------------------------------------------------------------------- */ void linuxkpi_ieee80211_stop_queue(struct ieee80211_hw *hw, int qnum) { struct lkpi_hw *lhw; struct lkpi_vif *lvif; struct ieee80211_vif *vif; int ac_count, ac; KASSERT(qnum < hw->queues, ("%s: qnum %d >= hw->queues %d, hw %p\n", __func__, qnum, hw->queues, hw)); lhw = wiphy_priv(hw->wiphy); /* See lkpi_ic_vap_create(). */ if (hw->queues >= IEEE80211_NUM_ACS) ac_count = IEEE80211_NUM_ACS; else ac_count = 1; LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { vif = LVIF_TO_VIF(lvif); for (ac = 0; ac < ac_count; ac++) { IMPROVE_TXQ("LOCKING"); if (qnum == vif->hw_queue[ac]) { #ifdef LINUXKPI_DEBUG_80211 /* * For now log this to better understand * how this is supposed to work. */ if (lvif->hw_queue_stopped[ac] && (linuxkpi_debug_80211 & D80211_IMPROVE_TXQ) != 0) ic_printf(lhw->ic, "%s:%d: lhw %p hw %p " "lvif %p vif %p ac %d qnum %d already " "stopped\n", __func__, __LINE__, lhw, hw, lvif, vif, ac, qnum); #endif lvif->hw_queue_stopped[ac] = true; } } } LKPI_80211_LHW_LVIF_UNLOCK(lhw); } void linuxkpi_ieee80211_stop_queues(struct ieee80211_hw *hw) { int i; IMPROVE_TXQ("Locking; do we need further info?"); for (i = 0; i < hw->queues; i++) linuxkpi_ieee80211_stop_queue(hw, i); } static void lkpi_ieee80211_wake_queues(struct ieee80211_hw *hw, int hwq) { struct lkpi_hw *lhw; struct lkpi_vif *lvif; struct lkpi_sta *lsta; int ac_count, ac, tid; /* See lkpi_ic_vap_create(). */ if (hw->queues >= IEEE80211_NUM_ACS) ac_count = IEEE80211_NUM_ACS; else ac_count = 1; lhw = wiphy_priv(hw->wiphy); IMPROVE_TXQ("Locking"); LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { struct ieee80211_vif *vif; vif = LVIF_TO_VIF(lvif); for (ac = 0; ac < ac_count; ac++) { if (hwq == vif->hw_queue[ac]) { /* XXX-BZ what about software scan? */ #ifdef LINUXKPI_DEBUG_80211 /* * For now log this to better understand * how this is supposed to work. */ if (!lvif->hw_queue_stopped[ac] && (linuxkpi_debug_80211 & D80211_IMPROVE_TXQ) != 0) ic_printf(lhw->ic, "%s:%d: lhw %p hw %p " "lvif %p vif %p ac %d hw_q not stopped\n", __func__, __LINE__, lhw, hw, lvif, vif, ac); #endif lvif->hw_queue_stopped[ac] = false; LKPI_80211_LVIF_LOCK(lvif); TAILQ_FOREACH(lsta, &lvif->lsta_head, lsta_entry) { struct ieee80211_sta *sta; sta = LSTA_TO_STA(lsta); for (tid = 0; tid < nitems(sta->txq); tid++) { struct lkpi_txq *ltxq; if (sta->txq[tid] == NULL) continue; if (sta->txq[tid]->ac != ac) continue; ltxq = TXQ_TO_LTXQ(sta->txq[tid]); if (!ltxq->stopped) continue; ltxq->stopped = false; /* XXX-BZ see when this explodes with all the locking. taskq? */ lkpi_80211_mo_wake_tx_queue(hw, sta->txq[tid]); } } LKPI_80211_LVIF_UNLOCK(lvif); } } } LKPI_80211_LHW_LVIF_UNLOCK(lhw); } void linuxkpi_ieee80211_wake_queues(struct ieee80211_hw *hw) { int i; IMPROVE_TXQ("Is this all/enough here?"); for (i = 0; i < hw->queues; i++) lkpi_ieee80211_wake_queues(hw, i); } void linuxkpi_ieee80211_wake_queue(struct ieee80211_hw *hw, int qnum) { KASSERT(qnum < hw->queues, ("%s: qnum %d >= hw->queues %d, hw %p\n", __func__, qnum, hw->queues, hw)); lkpi_ieee80211_wake_queues(hw, qnum); } /* This is just hardware queues. */ void linuxkpi_ieee80211_txq_schedule_start(struct ieee80211_hw *hw, uint8_t ac) { struct lkpi_hw *lhw; lhw = HW_TO_LHW(hw); IMPROVE_TXQ("Are there reasons why we wouldn't schedule?"); IMPROVE_TXQ("LOCKING"); if (++lhw->txq_generation[ac] == 0) lhw->txq_generation[ac]++; } struct ieee80211_txq * linuxkpi_ieee80211_next_txq(struct ieee80211_hw *hw, uint8_t ac) { struct lkpi_hw *lhw; struct ieee80211_txq *txq; struct lkpi_txq *ltxq; lhw = HW_TO_LHW(hw); txq = NULL; IMPROVE_TXQ("LOCKING"); /* Check that we are scheduled. */ if (lhw->txq_generation[ac] == 0) goto out; ltxq = TAILQ_FIRST(&lhw->scheduled_txqs[ac]); if (ltxq == NULL) goto out; if (ltxq->txq_generation == lhw->txq_generation[ac]) goto out; ltxq->txq_generation = lhw->txq_generation[ac]; TAILQ_REMOVE(&lhw->scheduled_txqs[ac], ltxq, txq_entry); txq = <xq->txq; TAILQ_ELEM_INIT(ltxq, txq_entry); out: return (txq); } void linuxkpi_ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, bool withoutpkts) { struct lkpi_hw *lhw; struct lkpi_txq *ltxq; ltxq = TXQ_TO_LTXQ(txq); IMPROVE_TXQ("LOCKING"); /* Only schedule if work to do or asked to anyway. */ if (!withoutpkts && skb_queue_empty(<xq->skbq)) goto out; /* Make sure we do not double-schedule. */ if (ltxq->txq_entry.tqe_next != NULL) goto out; lhw = HW_TO_LHW(hw); TAILQ_INSERT_TAIL(&lhw->scheduled_txqs[txq->ac], ltxq, txq_entry); out: return; } /* -------------------------------------------------------------------------- */ struct lkpi_cfg80211_bss { u_int refcnt; struct cfg80211_bss bss; }; struct lkpi_cfg80211_get_bss_iter_lookup { struct wiphy *wiphy; struct linuxkpi_ieee80211_channel *chan; const uint8_t *bssid; const uint8_t *ssid; size_t ssid_len; enum ieee80211_bss_type bss_type; enum ieee80211_privacy privacy; /* * Something to store a copy of the result as the net80211 scan cache * is not refoucnted so a scan entry might go away any time. */ bool match; struct cfg80211_bss *bss; }; static void lkpi_cfg80211_get_bss_iterf(void *arg, const struct ieee80211_scan_entry *se) { struct lkpi_cfg80211_get_bss_iter_lookup *lookup; size_t ielen; lookup = arg; /* Do not try to find another match. */ if (lookup->match) return; /* Nothing to store result. */ if (lookup->bss == NULL) return; if (lookup->privacy != IEEE80211_PRIVACY_ANY) { /* if (se->se_capinfo & IEEE80211_CAPINFO_PRIVACY) */ /* We have no idea what to compare to as the drivers only request ANY */ return; } if (lookup->bss_type != IEEE80211_BSS_TYPE_ANY) { /* if (se->se_capinfo & (IEEE80211_CAPINFO_IBSS|IEEE80211_CAPINFO_ESS)) */ /* We have no idea what to compare to as the drivers only request ANY */ return; } if (lookup->chan != NULL) { struct linuxkpi_ieee80211_channel *chan; chan = linuxkpi_ieee80211_get_channel(lookup->wiphy, se->se_chan->ic_freq); if (chan == NULL || chan != lookup->chan) return; } if (lookup->bssid && !IEEE80211_ADDR_EQ(lookup->bssid, se->se_bssid)) return; if (lookup->ssid) { if (lookup->ssid_len != se->se_ssid[1] || se->se_ssid[1] == 0) return; if (memcmp(lookup->ssid, se->se_ssid+2, lookup->ssid_len) != 0) return; } ielen = se->se_ies.len; lookup->bss->ies = malloc(sizeof(*lookup->bss->ies) + ielen, M_LKPI80211, M_NOWAIT | M_ZERO); if (lookup->bss->ies == NULL) return; lookup->bss->ies->data = (uint8_t *)lookup->bss->ies + sizeof(*lookup->bss->ies); lookup->bss->ies->len = ielen; if (ielen) memcpy(lookup->bss->ies->data, se->se_ies.data, ielen); lookup->match = true; } struct cfg80211_bss * linuxkpi_cfg80211_get_bss(struct wiphy *wiphy, struct linuxkpi_ieee80211_channel *chan, const uint8_t *bssid, const uint8_t *ssid, size_t ssid_len, enum ieee80211_bss_type bss_type, enum ieee80211_privacy privacy) { struct lkpi_cfg80211_bss *lbss; struct lkpi_cfg80211_get_bss_iter_lookup lookup; struct lkpi_hw *lhw; struct ieee80211vap *vap; lhw = wiphy_priv(wiphy); /* Let's hope we can alloc. */ lbss = malloc(sizeof(*lbss), M_LKPI80211, M_NOWAIT | M_ZERO); if (lbss == NULL) { ic_printf(lhw->ic, "%s: alloc failed.\n", __func__); return (NULL); } lookup.wiphy = wiphy; lookup.chan = chan; lookup.bssid = bssid; lookup.ssid = ssid; lookup.ssid_len = ssid_len; lookup.bss_type = bss_type; lookup.privacy = privacy; lookup.match = false; lookup.bss = &lbss->bss; IMPROVE("Iterate over all VAPs comparing perm_addr and addresses?"); vap = TAILQ_FIRST(&lhw->ic->ic_vaps); ieee80211_scan_iterate(vap, lkpi_cfg80211_get_bss_iterf, &lookup); if (!lookup.match) { free(lbss, M_LKPI80211); return (NULL); } refcount_init(&lbss->refcnt, 1); return (&lbss->bss); } void linuxkpi_cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *bss) { struct lkpi_cfg80211_bss *lbss; lbss = container_of(bss, struct lkpi_cfg80211_bss, bss); /* Free everything again on refcount ... */ if (refcount_release(&lbss->refcnt)) { free(lbss->bss.ies, M_LKPI80211); free(lbss, M_LKPI80211); } } void linuxkpi_cfg80211_bss_flush(struct wiphy *wiphy) { struct lkpi_hw *lhw; struct ieee80211com *ic; struct ieee80211vap *vap; lhw = wiphy_priv(wiphy); ic = lhw->ic; /* * If we haven't called ieee80211_ifattach() yet * or there is no VAP, there are no scans to flush. */ if (ic == NULL || (lhw->sc_flags & LKPI_MAC80211_DRV_STARTED) == 0) return; /* Should only happen on the current one? Not seen it late enough. */ IEEE80211_LOCK(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) ieee80211_scan_flush(vap); IEEE80211_UNLOCK(ic); } /* -------------------------------------------------------------------------- */ MODULE_VERSION(linuxkpi_wlan, 1); MODULE_DEPEND(linuxkpi_wlan, linuxkpi, 1, 1, 1); MODULE_DEPEND(linuxkpi_wlan, wlan, 1, 1, 1); diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index ca5ff3f1d417..f57494065aec 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -1,3723 +1,3723 @@ /************************************************************************** SPDX-License-Identifier: BSD-2-Clause Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int txq_fills = 0; int multiq_tx_enable = 1; #ifdef TCP_OFFLOAD CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); #endif extern struct sysctl_oid_list sysctl__hw_cxgb_children; int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, "size of per-queue mbuf ring"); static int cxgb_tx_coalesce_force = 0; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN, &cxgb_tx_coalesce_force, 0, "coalesce small packets into a single work request regardless of ring state"); #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN, &cxgb_tx_coalesce_enable_start, 0, "coalesce enable threshold"); static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN, &cxgb_tx_coalesce_enable_stop, 0, "coalesce disable threshold"); static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN, &cxgb_tx_reclaim_threshold, 0, "tx cleaning minimum threshold"); /* * XXX don't re-enable this until TOE stops assuming * we have an m_ext */ static int recycle_enable = 0; extern int cxgb_use_16k_clusters; extern int nmbjumbop; extern int nmbjumbo9; extern int nmbjumbo16; #define USE_GTS 0 #define SGE_RX_SM_BUF_SIZE 1536 #define SGE_RX_DROP_THRES 16 #define SGE_RX_COPY_THRES 128 /* * Period of the Tx buffer reclaim timer. This timer does not need to run * frequently as Tx buffers are usually reclaimed by new Tx packets. */ #define TX_RECLAIM_PERIOD (hz >> 1) /* * Values for sge_txq.flags */ enum { TXQ_RUNNING = 1 << 0, /* fetch engine is running */ TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ }; struct tx_desc { uint64_t flit[TX_DESC_FLITS]; } __packed; struct rx_desc { uint32_t addr_lo; uint32_t len_gen; uint32_t gen2; uint32_t addr_hi; } __packed; struct rsp_desc { /* response queue descriptor */ struct rss_header rss_hdr; uint32_t flags; uint32_t len_cq; uint8_t imm_data[47]; uint8_t intr_gen; } __packed; #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) struct tx_sw_desc { /* SW state per Tx descriptor */ struct mbuf *m; bus_dmamap_t map; int flags; }; struct rx_sw_desc { /* SW state per Rx descriptor */ caddr_t rxsd_cl; struct mbuf *m; bus_dmamap_t map; int flags; }; struct txq_state { unsigned int compl; unsigned int gen; unsigned int pidx; }; struct refill_fl_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; /* * Maps a number of flits to the number of Tx descriptors that can hold them. * The formula is * * desc = 1 + (flits - 2) / (WR_FLITS - 1). * * HW allows up to 4 descriptors to be combined into a WR. */ static uint8_t flit_desc_map[] = { 0, #if SGE_NUM_GENBITS == 1 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 #elif SGE_NUM_GENBITS == 2 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, #else # error "SGE_NUM_GENBITS must be 1 or 2" #endif }; #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_NEEDS_ENQUEUE(qs) \ drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) #define TXQ_RING_DEQUEUE(qs) \ drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) int cxgb_debug = 0; static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); static void sge_txq_reclaim_handler(void *arg, int ncount); static void cxgb_start_locked(struct sge_qset *qs); /* * XXX need to cope with bursty scheduling by looking at a wider * window than we are now for determining the need for coalescing * */ static __inline uint64_t check_pkt_coalesce(struct sge_qset *qs) { struct adapter *sc; struct sge_txq *txq; uint8_t *fill; if (__predict_false(cxgb_tx_coalesce_force)) return (1); txq = &qs->txq[TXQ_ETH]; sc = qs->port->adapter; fill = &sc->tunq_fill[qs->idx]; if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; /* * if the hardware transmit queue is more than 1/8 full * we mark it as coalescing - we drop back from coalescing * when we go below 1/32 full and there are no packets enqueued, * this provides us with some degree of hysteresis */ if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) *fill = 0; else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) *fill = 1; return (sc->tunq_coalesce); } #ifdef __LP64__ static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { uint64_t wr_hilo; #if _BYTE_ORDER == _LITTLE_ENDIAN wr_hilo = wr_hi; wr_hilo |= (((uint64_t)wr_lo)<<32); #else wr_hilo = wr_lo; wr_hilo |= (((uint64_t)wr_hi)<<32); #endif wrp->wrh_hilo = wr_hilo; } #else static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { wrp->wrh_hi = wr_hi; wmb(); wrp->wrh_lo = wr_lo; } #endif struct coalesce_info { int count; int nbytes; int noncoal; }; static int coalesce_check(struct mbuf *m, void *arg) { struct coalesce_info *ci = arg; if ((m->m_next != NULL) || ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE)) ci->noncoal = 1; if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) && (ci->nbytes + m->m_len <= 10500))) { ci->count++; ci->nbytes += m->m_len; return (1); } return (0); } static struct mbuf * cxgb_dequeue(struct sge_qset *qs) { struct mbuf *m, *m_head, *m_tail; struct coalesce_info ci; if (check_pkt_coalesce(qs) == 0) return TXQ_RING_DEQUEUE(qs); m_head = m_tail = NULL; ci.count = ci.nbytes = ci.noncoal = 0; do { m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); if (m_head == NULL) { m_tail = m_head = m; } else if (m != NULL) { m_tail->m_nextpkt = m; m_tail = m; } } while (m != NULL); if (ci.count > 7) panic("trying to coalesce %d packets in to one WR", ci.count); return (m_head); } /** * reclaim_completed_tx - reclaims completed Tx descriptors * @adapter: the adapter * @q: the Tx queue to reclaim completed descriptors from * * Reclaims Tx descriptors that the SGE has indicated it has processed, * and frees the associated buffers if possible. Called with the Tx * queue's lock held. */ static __inline int reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) { struct sge_txq *q = &qs->txq[queue]; int reclaim = desc_reclaimable(q); if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; if (reclaim < reclaim_min) return (0); mtx_assert(&qs->lock, MA_OWNED); if (reclaim > 0) { t3_free_tx_desc(qs, reclaim, queue); q->cleaned += reclaim; q->in_use -= reclaim; } if (isset(&qs->txq_stopped, TXQ_ETH)) clrbit(&qs->txq_stopped, TXQ_ETH); return (reclaim); } #ifdef DEBUGNET int cxgb_debugnet_poll_tx(struct sge_qset *qs) { return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH)); } #endif /** * should_restart_tx - are there enough resources to restart a Tx queue? * @q: the Tx queue * * Checks if there are enough descriptors to restart a suspended Tx queue. */ static __inline int should_restart_tx(const struct sge_txq *q) { unsigned int r = q->processed - q->cleaned; return q->in_use - r < (q->size >> 1); } /** * t3_sge_init - initialize SGE * @adap: the adapter * @p: the SGE parameters * * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queue sets here, instead the driver * top-level must request those individually. We also do not enable DMA * here, that should be done after the queues have been set up. */ void t3_sge_init(adapter_t *adap, struct sge_params *p) { u_int ctrl, ups; ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; #if SGE_NUM_GENBITS == 1 ctrl |= F_EGRGENCTRL; #endif if (adap->params.rev > 0) { if (!(adap->flags & (USING_MSIX | USING_MSI))) ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; } t3_write_reg(adap, A_SG_CONTROL, ctrl); t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | V_LORCQDRBTHRSH(512)); t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | V_TIMEOUT(200 * core_ticks_per_usec(adap))); t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, adap->params.rev < T3_REV_C ? 1000 : 500); t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); } /** * sgl_len - calculates the size of an SGL of the given capacity * @n: the number of SGL entries * * Calculates the number of flits needed for a scatter/gather list that * can hold the given number of entries. */ static __inline unsigned int sgl_len(unsigned int n) { return ((3 * n) / 2 + (n & 1)); } /** * get_imm_packet - return the next ingress packet buffer from a response * @resp: the response descriptor containing the packet data * * Return a packet containing the immediate data of the given response. */ static int get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) { if (resp->rss_hdr.opcode == CPL_RX_DATA) { const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else m->m_len = IMMED_PKT_SIZE; m->m_ext.ext_buf = NULL; m->m_ext.ext_type = 0; memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); return (0); } static __inline u_int flits_to_desc(u_int n) { return (flit_desc_map[n]); } #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ F_HIRCQPARITYERROR) #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ F_RSPQDISABLED) /** * t3_sge_err_intr_handler - SGE async event interrupt handler * @adapter: the adapter * * Interrupt handler for SGE asynchronous (non-data) events. */ void t3_sge_err_intr_handler(adapter_t *adapter) { unsigned int v, status; status = t3_read_reg(adapter, A_SG_INT_CAUSE); if (status & SGE_PARERR) CH_ALERT(adapter, "SGE parity error (0x%x)\n", status & SGE_PARERR); if (status & SGE_FRAMINGERR) CH_ALERT(adapter, "SGE framing error (0x%x)\n", status & SGE_FRAMINGERR); if (status & F_RSPQCREDITOVERFOW) CH_ALERT(adapter, "SGE response queue credit overflow\n"); if (status & F_RSPQDISABLED) { v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); CH_ALERT(adapter, "packet delivered to disabled response queue (0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff); } t3_write_reg(adapter, A_SG_INT_CAUSE, status); if (status & SGE_FATALERR) t3_fatal_err(adapter); } void t3_sge_prep(adapter_t *adap, struct sge_params *p) { int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); nqsets *= adap->params.nports; fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); while (!powerof2(fl_q_size)) fl_q_size--; use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : is_offload(adap); if (use_16k) { jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM16BYTES; } else { jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM9BYTES; } while (!powerof2(jumbo_q_size)) jumbo_q_size--; if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) device_printf(adap->dev, "Insufficient clusters and/or jumbo buffers.\n"); p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); for (i = 0; i < SGE_QSETS; ++i) { struct qset_params *q = p->qset + i; if (adap->params.nports > 2) { q->coalesce_usecs = 50; } else { #ifdef INVARIANTS q->coalesce_usecs = 10; #else q->coalesce_usecs = 5; #endif } q->polling = 0; q->rspq_size = RSPQ_Q_SIZE; q->fl_size = fl_q_size; q->jumbo_size = jumbo_q_size; q->jumbo_buf_size = jumbo_buf_size; q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; q->cong_thres = 0; } } int t3_sge_alloc(adapter_t *sc) { /* The parent tag. */ if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ BUS_SPACE_UNRESTRICTED, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock, lockarg */ &sc->parent_dmat)) { device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); return (ENOMEM); } /* * DMA tag for normal sized RX frames */ if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); return (ENOMEM); } /* * DMA tag for jumbo sized RX frames. */ if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); return (ENOMEM); } /* * DMA tag for TX frames. */ if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->tx_dmat)) { device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); return (ENOMEM); } return (0); } int t3_sge_free(struct adapter * sc) { if (sc->tx_dmat != NULL) bus_dma_tag_destroy(sc->tx_dmat); if (sc->rx_jumbo_dmat != NULL) bus_dma_tag_destroy(sc->rx_jumbo_dmat); if (sc->rx_dmat != NULL) bus_dma_tag_destroy(sc->rx_dmat); if (sc->parent_dmat != NULL) bus_dma_tag_destroy(sc->parent_dmat); return (0); } void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) { qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); qs->rspq.polling = 0 /* p->polling */; } #if !defined(__i386__) && !defined(__amd64__) static void refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct refill_fl_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #endif /** * refill_fl - refill an SGE free-buffer list * @sc: the controller softc * @q: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an SGE free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void refill_fl(adapter_t *sc, struct sge_fl *q, int n) { struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; struct mbuf *m; caddr_t cl; int err; cb_arg.error = 0; while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. */ if (q->zone == zone_pack) { if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) break; cl = m->m_ext.ext_buf; } else { if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) break; if ((m = m_gethdr_raw(M_NOWAIT, 0)) == NULL) { uma_zfree(q->zone, cl); break; } } if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); uma_zfree(q->zone, cl); goto done; } sd->flags |= RX_SW_DESC_MAP_CREATED; } #if !defined(__i386__) && !defined(__amd64__) err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { if (q->zone != zone_pack) uma_zfree(q->zone, cl); m_free(m); goto done; } #else cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); #endif sd->flags |= RX_SW_DESC_INUSE; sd->rxsd_cl = cl; sd->m = m; d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); d->gen2 = htobe32(V_FLD_GEN2(q->gen)); d++; sd++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; sd = q->sdesc; d = q->desc; } q->credits++; q->db_pending++; } done: if (q->db_pending >= 32) { q->db_pending = 0; t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } } /** * free_rx_bufs - free the Rx buffers on an SGE free list * @sc: the controle softc * @q: the SGE free list to clean up * * Release the buffers on an SGE free-buffer Rx queue. HW fetching from * this queue should be stopped before calling this function. */ static void free_rx_bufs(adapter_t *sc, struct sge_fl *q) { u_int cidx = q->cidx; while (q->credits--) { struct rx_sw_desc *d = &q->sdesc[cidx]; if (d->flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); if (q->zone == zone_pack) { m_init(d->m, M_NOWAIT, MT_DATA, M_EXT); uma_zfree(zone_pack, d->m); } else { m_init(d->m, M_NOWAIT, MT_DATA, 0); m_free_raw(d->m); uma_zfree(q->zone, d->rxsd_cl); } } d->rxsd_cl = NULL; d->m = NULL; if (++cidx == q->size) cidx = 0; } } static __inline void __refill_fl(adapter_t *adap, struct sge_fl *fl) { refill_fl(adap, fl, min(16U, fl->size - fl->credits)); } static __inline void __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) { uint32_t reclaimable = fl->size - fl->credits; if (reclaimable > 0) refill_fl(adap, fl, min(max, reclaimable)); } /** * recycle_rx_buf - recycle a receive buffer * @adapter: the adapter * @q: the SGE free list * @idx: index of buffer to recycle * * Recycles the specified buffer on the given free list by adding it at * the next available slot on the list. */ static void recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) { struct rx_desc *from = &q->desc[idx]; struct rx_desc *to = &q->desc[q->pidx]; q->sdesc[q->pidx] = q->sdesc[idx]; to->addr_lo = from->addr_lo; // already big endian to->addr_hi = from->addr_hi; // likewise wmb(); /* necessary ? */ to->len_gen = htobe32(V_FLD_GEN1(q->gen)); to->gen2 = htobe32(V_FLD_GEN2(q->gen)); q->credits++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; } t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } static void alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { uint32_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) { size_t len = nelem * elem_size; void *s = NULL; void *p = NULL; int err; if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor tag\n"); return (ENOMEM); } if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, map)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor memory\n"); return (ENOMEM); } bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); bzero(p, len); *(void **)desc = p; if (sw_size) { len = nelem * sw_size; s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); *(void **)sdesc = s; } if (parent_entry_tag == NULL) return (0); if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, entry_tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); return (ENOMEM); } return (0); } static void sge_slow_intr_handler(void *arg, int ncount) { adapter_t *sc = arg; t3_slow_intr_handler(sc); t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); (void) t3_read_reg(sc, A_PL_INT_ENABLE0); } /** * sge_timer_cb - perform periodic maintenance of an SGE qset * @data: the SGE queue set to maintain * * Runs periodically from a timer to perform maintenance of an SGE queue * set. It performs two tasks: * * a) Cleans up any completed Tx descriptors that may still be pending. * Normal descriptor cleanup happens when new packets are added to a Tx * queue so this timer is relatively infrequent and does any cleanup only * if the Tx queue has not seen any new packets in a while. We make a * best effort attempt to reclaim descriptors, in that we don't wait * around if we cannot get a queue's lock (which most likely is because * someone else is queueing new packets and so will also handle the clean * up). Since control queues use immediate data exclusively we don't * bother cleaning them up here. * * b) Replenishes Rx queues that have run out due to memory shortage. * Normally new Rx buffers are added when existing ones are consumed but * when out of memory a queue can become empty. We try to add only a few * buffers here, the queue will be replenished fully as these new buffers * are used up if memory shortage has subsided. * * c) Return coalesced response queue credits in case a response queue is * starved. * * d) Ring doorbells for T304 tunnel queues since we have seen doorbell * fifo overflows and the FW doesn't implement any recovery scheme yet. */ static void sge_timer_cb(void *arg) { adapter_t *sc = arg; if ((sc->flags & USING_MSIX) == 0) { struct port_info *pi; struct sge_qset *qs; struct sge_txq *txq; int i, j; int reclaim_ofl, refill_rx; if (sc->open_device_map == 0) return; for (i = 0; i < sc->params.nports; i++) { pi = &sc->port[i]; for (j = 0; j < pi->nqsets; j++) { qs = &sc->sge.qs[pi->first_qset + j]; txq = &qs->txq[0]; reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || (qs->fl[1].credits < qs->fl[1].size)); if (reclaim_ofl || refill_rx) { taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); break; } } } } if (sc->params.nports > 2) { int i; for_each_port(sc, i) { struct port_info *pi = &sc->port[i]; t3_write_reg(sc, A_SG_KDOORBELL, F_SELEGRCNTX | (FW_TUNNEL_SGEEC_START + pi->first_qset)); } } if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && sc->open_device_map != 0) callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); } /* * This is meant to be a catch-all function to keep sge state private * to sge.c * */ int t3_sge_init_adapter(adapter_t *sc) { callout_init(&sc->sge_timer_ch, 1); callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); return (0); } int t3_sge_reset_adapter(adapter_t *sc) { callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); return (0); } int t3_sge_init_port(struct port_info *pi) { TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); return (0); } /** * refill_rspq - replenish an SGE response queue * @adapter: the adapter * @q: the response queue to replenish * @credits: how many new responses to make available * * Replenishes a response queue by making the supplied number of responses * available to HW. */ static __inline void refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) { /* mbufs are allocated on demand when a rspq entry is processed. */ t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); } static void sge_txq_reclaim_handler(void *arg, int ncount) { struct sge_qset *qs = arg; int i; for (i = 0; i < 3; i++) reclaim_completed_tx(qs, 16, i); } static void sge_timer_reclaim(void *arg, int ncount) { struct port_info *pi = arg; int i, nqsets = pi->nqsets; adapter_t *sc = pi->adapter; struct sge_qset *qs; struct mtx *lock; KASSERT((sc->flags & USING_MSIX) == 0, ("can't call timer reclaim for msi-x")); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[pi->first_qset + i]; reclaim_completed_tx(qs, 16, TXQ_OFLD); lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; if (mtx_trylock(lock)) { /* XXX currently assume that we are *NOT* polling */ uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); if (qs->fl[0].credits < qs->fl[0].size - 16) __refill_fl(sc, &qs->fl[0]); if (qs->fl[1].credits < qs->fl[1].size - 16) __refill_fl(sc, &qs->fl[1]); if (status & (1 << qs->rspq.cntxt_id)) { if (qs->rspq.credits) { refill_rspq(sc, &qs->rspq, 1); qs->rspq.credits--; t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1 << qs->rspq.cntxt_id); } } mtx_unlock(lock); } } } /** * init_qset_cntxt - initialize an SGE queue set context info * @qs: the queue set * @id: the queue set id * * Initializes the TIDs and context ids for the queues of a queue set. */ static void init_qset_cntxt(struct sge_qset *qs, u_int id) { qs->rspq.cntxt_id = id; qs->fl[0].cntxt_id = 2 * id; qs->fl[1].cntxt_id = 2 * id + 1; qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; /* XXX: a sane limit is needed instead of INT_MAX */ mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX); mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX); mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX); } static void txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) { txq->in_use += ndesc; /* * XXX we don't handle stopping of queue * presumably start handles this when we bump against the end */ txqs->gen = txq->gen; txq->unacked += ndesc; txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); txq->unacked &= 31; txqs->pidx = txq->pidx; txq->pidx += ndesc; #ifdef INVARIANTS if (((txqs->pidx > txq->cidx) && (txq->pidx < txqs->pidx) && (txq->pidx >= txq->cidx)) || ((txqs->pidx < txq->cidx) && (txq->pidx >= txq-> cidx)) || ((txqs->pidx < txq->cidx) && (txq->cidx < txqs->pidx))) panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", txqs->pidx, txq->pidx, txq->cidx); #endif if (txq->pidx >= txq->size) { txq->pidx -= txq->size; txq->gen ^= 1; } } /** * calc_tx_descs - calculate the number of Tx descriptors for a packet * @m: the packet mbufs * @nsegs: the number of segments * * Returns the number of Tx descriptors needed for the given Ethernet * packet. Ethernet packets require addition of WR and CPL headers. */ static __inline unsigned int calc_tx_descs(const struct mbuf *m, int nsegs) { unsigned int flits; if (m->m_pkthdr.len <= PIO_LEN) return 1; flits = sgl_len(nsegs) + 2; if (m->m_pkthdr.csum_flags & CSUM_TSO) flits++; return flits_to_desc(flits); } /** * make_sgl - populate a scatter/gather list for a packet * @sgp: the SGL to populate * @segs: the packet dma segments * @nsegs: the number of segments * * Generates a scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ static __inline void make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) { int i, idx; for (idx = 0, i = 0; i < nsegs; i++) { /* * firmware doesn't like empty segments */ if (segs[i].ds_len == 0) continue; if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ds_len); sgp->addr[idx] = htobe64(segs[i].ds_addr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } } /** * check_ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter * @q: the Tx queue * * Ring the doorbell if a Tx queue is asleep. There is a natural race, * where the HW is going to sleep just after we checked, however, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. * * When GTS is disabled we unconditionally ring the doorbell. */ static __inline void check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) { #if USE_GTS clear_bit(TXQ_LAST_PKT_DB, &q->flags); if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { set_bit(TXQ_LAST_PKT_DB, &q->flags); #ifdef T3_TRACE T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", q->cntxt_id); #endif t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } #else if (mustring || ++q->db_pending >= 32) { wmb(); /* write descriptors before telling HW */ t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); q->db_pending = 0; } #endif } static __inline void wr_gen2(struct tx_desc *d, unsigned int gen) { #if SGE_NUM_GENBITS == 2 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); #endif } /** * write_wr_hdr_sgl - write a WR header and, optionally, SGL * @ndesc: number of Tx descriptors spanned by the SGL * @txd: first Tx descriptor to be written * @txqs: txq state (generation and producer index) * @txq: the SGE Tx queue * @sgl: the SGL * @flits: number of flits to the start of the SGL in the first descriptor * @sgl_flits: the SGL size in flits * @wr_hi: top 32 bits of WR header based on WR type (big endian) * @wr_lo: low 32 bits of WR header based on WR type (big endian) * * Write a work request header and an associated SGL. If the SGL is * small enough to fit into one Tx descriptor it has already been written * and we just need to write the WR header. Otherwise we distribute the * SGL across the number of descriptors it spans. */ static void write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) { struct work_request_hdr *wrp = (struct work_request_hdr *)txd; if (__predict_true(ndesc == 1)) { set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi, htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | wr_lo); wr_gen2(txd, txqs->gen); } else { unsigned int ogen = txqs->gen; const uint64_t *fp = (const uint64_t *)sgl; struct work_request_hdr *wp = wrp; wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi; while (sgl_flits) { unsigned int avail = WR_FLITS - flits; if (avail > sgl_flits) avail = sgl_flits; memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); sgl_flits -= avail; ndesc--; if (!sgl_flits) break; fp += avail; txd++; if (++txqs->pidx == txq->size) { txqs->pidx = 0; txqs->gen ^= 1; txd = txq->desc; } /* * when the head of the mbuf chain * is freed all clusters will be freed * with it */ wrp = (struct work_request_hdr *)txd; wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | V_WR_SGLSFLT(1)) | wr_hi; wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, sgl_flits + 1)) | V_WR_GEN(txqs->gen)) | wr_lo; wr_gen2(txd, txqs->gen); flits = 1; } wrp->wrh_hi |= htonl(F_WR_EOP); wmb(); wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; wr_gen2((struct tx_desc *)wp, ogen); } } /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) #define GET_VTAG(cntrl, m) \ do { \ if ((m)->m_flags & M_VLANTAG) \ cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ } while (0) static int t3_encap(struct sge_qset *qs, struct mbuf **m) { adapter_t *sc; struct mbuf *m0; struct sge_txq *txq; struct txq_state txqs; struct port_info *pi; unsigned int ndesc, flits, cntrl, mlen; int err, nsegs, tso_info = 0; struct work_request_hdr *wrp; struct tx_sw_desc *txsd; struct sg_ent *sgp, *sgl; uint32_t wr_hi, wr_lo, sgl_flits; bus_dma_segment_t segs[TX_MAX_SEGS]; struct tx_desc *txd; pi = qs->port; sc = pi->adapter; txq = &qs->txq[TXQ_ETH]; txd = &txq->desc[txq->pidx]; txsd = &txq->sdesc[txq->pidx]; sgl = txq->txq_sgl; prefetch(txd); m0 = *m; mtx_assert(&qs->lock, MA_OWNED); cntrl = V_TXPKT_INTF(pi->txpkt_intf); KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); if (m0->m_nextpkt == NULL && m0->m_next != NULL && m0->m_pkthdr.csum_flags & (CSUM_TSO)) tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); if (m0->m_nextpkt != NULL) { busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); ndesc = 1; mlen = 0; } else { if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, &m0, segs, &nsegs))) { if (cxgb_debug) printf("failed ... err=%d\n", err); return (err); } mlen = m0->m_pkthdr.len; ndesc = calc_tx_descs(m0, nsegs); } txq_prod(txq, ndesc, &txqs); KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); txsd->m = m0; if (m0->m_nextpkt != NULL) { struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; int i, fidx; if (nsegs > 7) panic("trying to coalesce %d packets in to one WR", nsegs); txq->txq_coalesced += nsegs; wrp = (struct work_request_hdr *)txd; flits = nsegs*2 + 1; for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { struct cpl_tx_pkt_batch_entry *cbe; uint64_t flit; uint32_t *hflit = (uint32_t *)&flit; int cflags = m0->m_pkthdr.csum_flags; cntrl = V_TXPKT_INTF(pi->txpkt_intf); GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(cflags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; hflit[0] = htonl(cntrl); hflit[1] = htonl(segs[i].ds_len | 0x80000000); flit |= htobe64(1 << 24); cbe = &cpl_batch->pkt_entry[i]; cbe->cntrl = hflit[0]; cbe->len = hflit[1]; cbe->addr = htobe64(segs[i].ds_addr); } wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); set_wr_hdr(wrp, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); return (0); } else if (tso_info) { uint16_t eth_type; struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; struct ether_header *eh; void *l3hdr; struct tcphdr *tcp; txd->flit[2] = 0; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); hdr->cntrl = htonl(cntrl); hdr->len = htonl(mlen | 0x80000000); if (__predict_false(mlen < TCPPKTHDRSIZE)) { printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x", m0, mlen, m0->m_pkthdr.tso_segsz, (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags); panic("tx tso packet too small"); } /* Make sure that ether, ip, tcp headers are all in m0 */ if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { m0 = m_pullup(m0, TCPPKTHDRSIZE); if (__predict_false(m0 == NULL)) { /* XXX panic probably an overreaction */ panic("couldn't fit header into mbuf"); } } eh = mtod(m0, struct ether_header *); eth_type = eh->ether_type; if (eth_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = (void *)eh; tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); l3hdr = evh + 1; eth_type = evh->evl_proto; } else { tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); l3hdr = eh + 1; } if (eth_type == htons(ETHERTYPE_IP)) { struct ip *ip = l3hdr; tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); tcp = (struct tcphdr *)(ip + 1); } else if (eth_type == htons(ETHERTYPE_IPV6)) { struct ip6_hdr *ip6 = l3hdr; KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO with ip6_nxt %d", __func__, ip6->ip6_nxt)); tso_info |= F_LSO_IPV6; tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); tcp = (struct tcphdr *)(ip6 + 1); } else panic("%s: CSUM_TSO but neither ip nor ip6", __func__); tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); hdr->lso_info = htonl(tso_info); if (__predict_false(mlen <= PIO_LEN)) { /* * pkt not undersized but fits in PIO_LEN * Indicates a TSO bug at the higher levels. */ txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); flits = (mlen + 7) / 8 + 3; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&hdr->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 3; } else { struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; cpl->cntrl = htonl(cntrl); cpl->len = htonl(mlen | 0x80000000); if (mlen <= PIO_LEN) { txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); flits = (mlen + 7) / 8 + 2; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&cpl->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 2; } wrp = (struct work_request_hdr *)txd; sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); sgl_flits = sgl_len(nsegs); ETHER_BPF_MTAP(pi->ifp, m0); KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_TID(txq->token)); write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(sc, txq, 0); return (0); } #ifdef DEBUGNET int cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m) { int error; error = t3_encap(qs, m); if (error == 0) check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1); else if (*m != NULL) { m_freem(*m); *m = NULL; } return (error); } #endif void cxgb_tx_watchdog(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs)) qs->coalescing = 0; else if (qs->coalescing == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_FLUSHING; cxgb_start_locked(qs); qs->qs_flags &= ~QS_FLUSHING; TXQ_UNLOCK(qs); } if (if_getdrvflags(qs->port->ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, qs, txq->txq_watchdog.c_cpu); } static void cxgb_tx_timeout(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_TIMEOUT; cxgb_start_locked(qs); qs->qs_flags &= ~QS_TIMEOUT; TXQ_UNLOCK(qs); } } static void cxgb_start_locked(struct sge_qset *qs) { struct mbuf *m_head = NULL; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct port_info *pi = qs->port; if_t ifp = pi->ifp; if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) reclaim_completed_tx(qs, 0, TXQ_ETH); if (!pi->link_config.link_ok) { TXQ_RING_FLUSH(qs); return; } TXQ_LOCK_ASSERT(qs); while (!TXQ_RING_EMPTY(qs) && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && pi->link_config.link_ok) { reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (txq->size - txq->in_use <= TX_MAX_DESC) break; if ((m_head = cxgb_dequeue(qs)) == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (t3_encap(qs, &m_head) || m_head == NULL) break; m_head = NULL; } if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && pi->link_config.link_ok) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); if (m_head != NULL) m_freem(m_head); } static int cxgb_transmit_locked(if_t ifp, struct sge_qset *qs, struct mbuf *m) { struct port_info *pi = qs->port; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct buf_ring *br = txq->txq_mr; int error, avail; avail = txq->size - txq->in_use; TXQ_LOCK_ASSERT(qs); /* * We can only do a direct transmit if the following are true: * - we aren't coalescing (ring < 3/4 full) * - the link is up -- checked in caller * - there are no packets enqueued already * - there is space in hardware transmit queue */ if (check_pkt_coalesce(qs) == 0 && !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { if (t3_encap(qs, &m)) { if (m != NULL && (error = drbr_enqueue(ifp, br, m)) != 0) return (error); } else { if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); /* * We've bypassed the buf ring so we need to update * the stats directly */ txq->txq_direct_packets++; txq->txq_direct_bytes += m->m_pkthdr.len; } } else if ((error = drbr_enqueue(ifp, br, m)) != 0) return (error); reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) cxgb_start_locked(qs); else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); return (0); } int cxgb_transmit(if_t ifp, struct mbuf *m) { struct sge_qset *qs; struct port_info *pi = if_getsoftc(ifp); int error, qidx = pi->first_qset; if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||(!pi->link_config.link_ok)) { m_freem(m); return (0); } /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; qs = &pi->adapter->sge.qs[qidx]; if (TXQ_TRYLOCK(qs)) { /* XXX running */ error = cxgb_transmit_locked(ifp, qs, m); TXQ_UNLOCK(qs); } else error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); return (error); } void cxgb_qflush(if_t ifp) { /* * flush any enqueued mbufs in the buf_rings * and in the transmit queues * no-op for now */ return; } /** * write_imm - write a packet into a Tx descriptor as immediate data * @d: the Tx descriptor to write * @m: the packet * @len: the length of packet data to write as immediate data * @gen: the generation bit value to write * * Writes a packet as immediate data into a Tx descriptor. The packet * contains a work request at its beginning. We must write the packet * carefully so the SGE doesn't read accidentally before it's written in * its entirety. */ static __inline void write_imm(struct tx_desc *d, caddr_t src, unsigned int len, unsigned int gen) { struct work_request_hdr *from = (struct work_request_hdr *)src; struct work_request_hdr *to = (struct work_request_hdr *)d; uint32_t wr_hi, wr_lo; KASSERT(len <= WR_LEN && len >= sizeof(*from), ("%s: invalid len %d", __func__, len)); memcpy(&to[1], &from[1], len - sizeof(*from)); wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | V_WR_BCNTLFLT(len & 7)); wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); set_wr_hdr(to, wr_hi, wr_lo); wmb(); wr_gen2(d, gen); } /** * check_desc_avail - check descriptor availability on a send queue * @adap: the adapter * @q: the TX queue * @m: the packet needing the descriptors * @ndesc: the number of Tx descriptors needed * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) * * Checks if the requested number of Tx descriptors is available on an * SGE send queue. If the queue is already suspended or not enough * descriptors are available the packet is queued for later transmission. * Must be called with the Tx queue locked. * * Returns 0 if enough descriptors are available, 1 if there aren't * enough descriptors and the packet has been queued, and 2 if the caller * needs to retry because there weren't enough descriptors at the * beginning of the call but some freed up in the mean time. */ static __inline int check_desc_avail(adapter_t *adap, struct sge_txq *q, struct mbuf *m, unsigned int ndesc, unsigned int qid) { /* * XXX We currently only use this for checking the control queue * the control queue is only used for binding qsets which happens * at init time so we are guaranteed enough descriptors */ - if (__predict_false(mbufq_len(&q->sendq))) { + if (__predict_false(!mbufq_empty(&q->sendq))) { addq_exit: (void )mbufq_enqueue(&q->sendq, m); return 1; } if (__predict_false(q->size - q->in_use < ndesc)) { struct sge_qset *qs = txq_to_qset(q, qid); setbit(&qs->txq_stopped, qid); if (should_restart_tx(q) && test_and_clear_bit(qid, &qs->txq_stopped)) return 2; q->stops++; goto addq_exit; } return 0; } /** * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs * @q: the SGE control Tx queue * * This is a variant of reclaim_completed_tx() that is used for Tx queues * that send only immediate data (presently just the control queues) and * thus do not have any mbufs */ static __inline void reclaim_completed_tx_imm(struct sge_txq *q) { unsigned int reclaim = q->processed - q->cleaned; q->in_use -= reclaim; q->cleaned += reclaim; } /** * ctrl_xmit - send a packet through an SGE control Tx queue * @adap: the adapter * @q: the control queue * @m: the packet * * Send a packet through an SGE control Tx queue. Packets sent through * a control queue must fit entirely as immediate data in a single Tx * descriptor and have no page fragments. */ static int ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); struct sge_txq *q = &qs->txq[TXQ_CTRL]; KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); wrp->wrh_lo = htonl(V_WR_TID(q->token)); TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (ENOSPC); } goto again; } write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); q->in_use++; if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); m_free(m); return (0); } /** * restart_ctrlq - restart a suspended control queue * @qs: the queue set cotaining the control queue * * Resumes transmission on a suspended Tx control queue. */ static void restart_ctrlq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_CTRL]; adapter_t *adap = qs->port->adapter; TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); while (q->in_use < q->size && (m = mbufq_dequeue(&q->sendq)) != NULL) { write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); m_free(m); if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } q->in_use++; } - if (mbufq_len(&q->sendq)) { + if (!mbufq_empty(&q->sendq)) { setbit(&qs->txq_stopped, TXQ_CTRL); if (should_restart_tx(q) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) goto again; q->stops++; } TXQ_UNLOCK(qs); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /* * Send a management message through control queue 0 */ int t3_mgmt_tx(struct adapter *adap, struct mbuf *m) { return ctrl_xmit(adap, &adap->sge.qs[0], m); } /** * free_qset - free the resources of an SGE queue set * @sc: the controller owning the queue set * @q: the queue set * * Release the HW and SW resources associated with an SGE queue set, such * as HW contexts, packet buffers, and descriptor rings. Traffic to the * queue set must be quiesced prior to calling this. */ static void t3_free_qset(adapter_t *sc, struct sge_qset *q) { int i; reclaim_completed_tx(q, 0, TXQ_ETH); if (q->txq[TXQ_ETH].txq_mr != NULL) buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); if (q->txq[TXQ_ETH].txq_ifq != NULL) { ifq_delete(q->txq[TXQ_ETH].txq_ifq); free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { if (q->fl[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_fl(sc, q->fl[i].cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, q->fl[i].desc_map); bus_dma_tag_destroy(q->fl[i].desc_tag); bus_dma_tag_destroy(q->fl[i].entry_tag); } if (q->fl[i].sdesc) { free_rx_bufs(sc, &q->fl[i]); free(q->fl[i].sdesc, M_DEVBUF); } } mtx_unlock(&q->lock); MTX_DESTROY(&q->lock); for (i = 0; i < SGE_TXQ_PER_SET; i++) { if (q->txq[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->txq[i].desc_tag, q->txq[i].desc_map); bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, q->txq[i].desc_map); bus_dma_tag_destroy(q->txq[i].desc_tag); bus_dma_tag_destroy(q->txq[i].entry_tag); } if (q->txq[i].sdesc) { free(q->txq[i].sdesc, M_DEVBUF); } } if (q->rspq.desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, q->rspq.desc_map); bus_dma_tag_destroy(q->rspq.desc_tag); MTX_DESTROY(&q->rspq.lock); } #if defined(INET6) || defined(INET) tcp_lro_free(&q->lro.ctrl); #endif bzero(q, sizeof(*q)); } /** * t3_free_sge_resources - free SGE resources * @sc: the adapter softc * * Frees resources used by the SGE queue sets. */ void t3_free_sge_resources(adapter_t *sc, int nqsets) { int i; for (i = 0; i < nqsets; ++i) { TXQ_LOCK(&sc->sge.qs[i]); t3_free_qset(sc, &sc->sge.qs[i]); } } /** * t3_sge_start - enable SGE * @sc: the controller softc * * Enables the SGE for DMAs. This is the last step in starting packet * transfers. */ void t3_sge_start(adapter_t *sc) { t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); } /** * t3_sge_stop - disable SGE operation * @sc: the adapter * * Disables the DMA engine. This can be called in emeregencies (e.g., * from error interrupts) or from normal process context. In the latter * case it also disables any pending queue restart tasklets. Note that * if it is called in interrupt context it cannot disable the restart * tasklets as it cannot wait, however the tasklets will have no effect * since the doorbells are disabled and the driver will call this again * later from process context, at which time the tasklets will be stopped * if they are still running. */ void t3_sge_stop(adapter_t *sc) { t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); } /** * t3_free_tx_desc - reclaims Tx descriptors and their buffers * @adapter: the adapter * @q: the Tx queue to reclaim descriptors from * @reclaimable: the number of descriptors to reclaim * @m_vec_size: maximum number of buffers to reclaim * @desc_reclaimed: returns the number of descriptors reclaimed * * Reclaims Tx descriptors from an SGE Tx queue and frees the associated * Tx buffers. Called with the Tx queue lock held. * * Returns number of buffers of reclaimed */ void t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) { struct tx_sw_desc *txsd; unsigned int cidx, mask; struct sge_txq *q = &qs->txq[queue]; #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); #endif cidx = q->cidx; mask = q->size - 1; txsd = &q->sdesc[cidx]; mtx_assert(&qs->lock, MA_OWNED); while (reclaimable--) { prefetch(q->sdesc[(cidx + 1) & mask].m); prefetch(q->sdesc[(cidx + 2) & mask].m); if (txsd->m != NULL) { if (txsd->flags & TX_SW_DESC_MAPPED) { bus_dmamap_unload(q->entry_tag, txsd->map); txsd->flags &= ~TX_SW_DESC_MAPPED; } m_freem_list(txsd->m); txsd->m = NULL; } else q->txq_skipped++; ++txsd; if (++cidx == q->size) { cidx = 0; txsd = q->sdesc; } } q->cidx = cidx; } /** * is_new_response - check if a response is newly written * @r: the response descriptor * @q: the response queue * * Returns true if a response descriptor contains a yet unprocessed * response. */ static __inline int is_new_response(const struct rsp_desc *r, const struct sge_rspq *q) { return (r->intr_gen & F_RSPD_GEN2) == q->gen; } #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ #define NOMEM_INTR_DELAY 2500 #ifdef TCP_OFFLOAD /** * write_ofld_wr - write an offload work request * @adap: the adapter * @m: the packet to send * @q: the Tx queue * @pidx: index of the first Tx descriptor to write * @gen: the generation value to use * @ndesc: number of descriptors the packet will occupy * * Write an offload work request to send the supplied packet. The packet * data already carry the work request with most fields populated. */ static void write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, unsigned int pidx, unsigned int gen, unsigned int ndesc) { unsigned int sgl_flits, flits; int i, idx, nsegs, wrlen; struct work_request_hdr *from; struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; struct tx_desc *d = &q->desc[pidx]; struct txq_state txqs; struct sglist_seg *segs; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sglist *sgl; from = (void *)(oh + 1); /* Start of WR within mbuf */ wrlen = m->m_len - sizeof(*oh); if (!(oh->flags & F_HDR_SGL)) { write_imm(d, (caddr_t)from, wrlen, gen); /* * mbuf with "real" immediate tx data will be enqueue_wr'd by * t3_push_frames and freed in wr_ack. Others, like those sent * down by close_conn, t3_send_reset, etc. should be freed here. */ if (!(oh->flags & F_HDR_DF)) m_free(m); return; } memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); sgl = oh->sgl; flits = wrlen / 8; sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; nsegs = sgl->sg_nseg; segs = sgl->sg_segs; for (idx = 0, i = 0; i < nsegs; i++) { KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ss_len); sgp->addr[idx] = htobe64(segs[i].ss_paddr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } sgl_flits = sgl_len(nsegs); txqs.gen = gen; txqs.pidx = pidx; txqs.compl = 0; write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, from->wrh_hi, from->wrh_lo); } /** * ofld_xmit - send a packet through an offload queue * @adap: the adapter * @q: the Tx offload queue * @m: the packet * * Send an offload packet through an SGE offload queue. */ static int ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; unsigned int ndesc; unsigned int pidx, gen; struct sge_txq *q = &qs->txq[TXQ_OFLD]; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); ndesc = G_HDR_NDESC(oh->flags); TXQ_LOCK(qs); again: reclaim_completed_tx(qs, 16, TXQ_OFLD); ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (EINTR); } goto again; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } write_ofld_wr(adap, m, q, pidx, gen, ndesc); check_ring_tx_db(adap, q, 1); TXQ_UNLOCK(qs); return (0); } /** * restart_offloadq - restart a suspended offload queue * @qs: the queue set cotaining the offload queue * * Resumes transmission on a suspended Tx offload queue. */ static void restart_offloadq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = data; struct sge_txq *q = &qs->txq[TXQ_OFLD]; adapter_t *adap = qs->port->adapter; TXQ_LOCK(qs); again: while ((m = mbufq_first(&q->sendq)) != NULL) { unsigned int gen, pidx; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); unsigned int ndesc = G_HDR_NDESC(oh->flags); if (__predict_false(q->size - q->in_use < ndesc)) { setbit(&qs->txq_stopped, TXQ_OFLD); if (should_restart_tx(q) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) goto again; q->stops++; break; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } (void)mbufq_dequeue(&q->sendq); TXQ_UNLOCK(qs); write_ofld_wr(adap, m, q, pidx, gen, ndesc); TXQ_LOCK(qs); } #if USE_GTS set_bit(TXQ_RUNNING, &q->flags); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /** * t3_offload_tx - send an offload packet * @m: the packet * * Sends an offload packet. We use the packet priority to select the * appropriate Tx queue as follows: bit 0 indicates whether the packet * should be sent as regular or control, bits 1-3 select the queue set. */ int t3_offload_tx(struct adapter *sc, struct mbuf *m) { struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; if (oh->flags & F_HDR_CTRL) { m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ return (ctrl_xmit(sc, qs, m)); } else return (ofld_xmit(sc, qs, m)); } #endif static void restart_tx(struct sge_qset *qs) { struct adapter *sc = qs->port->adapter; if (isset(&qs->txq_stopped, TXQ_OFLD) && should_restart_tx(&qs->txq[TXQ_OFLD]) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { qs->txq[TXQ_OFLD].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); } if (isset(&qs->txq_stopped, TXQ_CTRL) && should_restart_tx(&qs->txq[TXQ_CTRL]) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { qs->txq[TXQ_CTRL].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } } /** * t3_sge_alloc_qset - initialize an SGE queue set * @sc: the controller softc * @id: the queue set id * @nports: how many Ethernet ports will be using this queue set * @irq_vec_idx: the IRQ vector index for response queue interrupts * @p: configuration parameters for this queue set * @ntxq: number of Tx queues for the queue set * @pi: port info for queue set * * Allocate resources and initialize an SGE queue set. A queue set * comprises a response queue, two Rx free-buffer queues, and up to 3 * Tx queues. The Tx queues are assigned roles in the order Ethernet * queue, offload queue, and control queue. */ int t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, const struct qset_params *p, int ntxq, struct port_info *pi) { struct sge_qset *q = &sc->sge.qs[id]; int i, ret = 0; MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); q->port = pi; q->adap = sc; if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { device_printf(sc->dev, "failed to allocate mbuf ring\n"); goto err; } if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) { device_printf(sc->dev, "failed to allocate ifq\n"); goto err; } ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); callout_init(&q->txq[TXQ_ETH].txq_timer, 1); callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; init_qset_cntxt(q, id); q->idx = id; if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, &q->fl[0].desc, &q->fl[0].sdesc, &q->fl[0].desc_tag, &q->fl[0].desc_map, sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { printf("error %d from alloc ring fl0\n", ret); goto err; } if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, &q->fl[1].desc, &q->fl[1].sdesc, &q->fl[1].desc_tag, &q->fl[1].desc_map, sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { printf("error %d from alloc ring fl1\n", ret); goto err; } if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, &q->rspq.phys_addr, &q->rspq.desc, NULL, &q->rspq.desc_tag, &q->rspq.desc_map, NULL, NULL)) != 0) { printf("error %d from alloc ring rspq\n", ret); goto err; } snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", device_get_unit(sc->dev), irq_vec_idx); MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); for (i = 0; i < ntxq; ++i) { size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); if ((ret = alloc_ring(sc, p->txq_size[i], sizeof(struct tx_desc), sz, &q->txq[i].phys_addr, &q->txq[i].desc, &q->txq[i].sdesc, &q->txq[i].desc_tag, &q->txq[i].desc_map, sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { printf("error %d from alloc ring tx %i\n", ret, i); goto err; } mbufq_init(&q->txq[i].sendq, INT_MAX); q->txq[i].gen = 1; q->txq[i].size = p->txq_size[i]; } #ifdef TCP_OFFLOAD TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); #endif TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); q->fl[0].gen = q->fl[1].gen = 1; q->fl[0].size = p->fl_size; q->fl[1].size = p->jumbo_size; q->rspq.gen = 1; q->rspq.cidx = 0; q->rspq.size = p->rspq_size; q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); q->fl[0].buf_size = MCLBYTES; q->fl[0].zone = zone_pack; q->fl[0].type = EXT_PACKET; if (p->jumbo_buf_size == MJUM16BYTES) { q->fl[1].zone = zone_jumbo16; q->fl[1].type = EXT_JUMBO16; } else if (p->jumbo_buf_size == MJUM9BYTES) { q->fl[1].zone = zone_jumbo9; q->fl[1].type = EXT_JUMBO9; } else if (p->jumbo_buf_size == MJUMPAGESIZE) { q->fl[1].zone = zone_jumbop; q->fl[1].type = EXT_JUMBOP; } else { KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); ret = EDOOFUS; goto err; } q->fl[1].buf_size = p->jumbo_buf_size; /* Allocate and setup the lro_ctrl structure */ q->lro.enabled = !!(if_getcapenable(pi->ifp) & IFCAP_LRO); #if defined(INET6) || defined(INET) ret = tcp_lro_init(&q->lro.ctrl); if (ret) { printf("error %d from tcp_lro_init\n", ret); goto err; } #endif q->lro.ctrl.ifp = pi->ifp; mtx_lock_spin(&sc->sge.reg_lock); ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, q->rspq.phys_addr, q->rspq.size, q->fl[0].buf_size, 1, 0); if (ret) { printf("error %d from t3_sge_init_rspcntxt\n", ret); goto err_unlock; } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, q->fl[i].phys_addr, q->fl[i].size, q->fl[i].buf_size, p->cong_thres, 1, 0); if (ret) { printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); goto err_unlock; } } ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } if (ntxq > 1) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, USE_GTS, SGE_CNTXT_OFLD, id, q->txq[TXQ_OFLD].phys_addr, q->txq[TXQ_OFLD].size, 0, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } if (ntxq > 2) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, SGE_CNTXT_CTRL, id, q->txq[TXQ_CTRL].phys_addr, q->txq[TXQ_CTRL].size, q->txq[TXQ_CTRL].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } mtx_unlock_spin(&sc->sge.reg_lock); t3_update_qset_coalesce(q, p); refill_fl(sc, &q->fl[0], q->fl[0].size); refill_fl(sc, &q->fl[1], q->fl[1].size); refill_rspq(sc, &q->rspq, q->rspq.size - 1); t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | V_NEWTIMER(q->rspq.holdoff_tmr)); return (0); err_unlock: mtx_unlock_spin(&sc->sge.reg_lock); err: TXQ_LOCK(q); t3_free_qset(sc, q); return (ret); } /* * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with * ethernet data. Hardware assistance with various checksums and any vlan tag * will also be taken into account here. */ void t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) { struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; if_t ifp = pi->ifp; if (cpl->vlan_valid) { m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); m->m_flags |= M_VLANTAG; } m->m_pkthdr.rcvif = ifp; /* * adjust after conversion to mbuf chain */ m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); m->m_len -= (sizeof(*cpl) + ethpad); m->m_data += (sizeof(*cpl) + ethpad); if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { struct ether_header *eh = mtod(m, void *); uint16_t eh_type; if (eh->ether_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = mtod(m, void *); eh_type = evh->evl_proto; } else eh_type = eh->ether_type; if (if_getcapenable(ifp) & IFCAP_RXCSUM && eh_type == htons(ETHERTYPE_IP)) { m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } else if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6 && eh_type == htons(ETHERTYPE_IPV6)) { m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } } } /** * get_packet - return the next ingress packet buffer from a free list * @adap: the adapter that received the packet * @drop_thres: # of remaining buffers before we start dropping packets * @qs: the qset that the SGE free list holding the packet belongs to * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain * @r: response descriptor * * Get the next packet from a free list and complete setup of the * sk_buff. If the packet is small we make a copy and recycle the * original buffer, otherwise we use the original buffer itself. If a * positive drop threshold is supplied packets are dropped and their * buffers recycled if (a) the number of remaining buffers is under the * threshold and the packet is too big to copy, or (b) the packet should * be copied but there is no memory for the copy. */ static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, struct t3_mbuf_hdr *mh, struct rsp_desc *r) { unsigned int len_cq = ntohl(r->len_cq); struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; int mask, cidx = fl->cidx; struct rx_sw_desc *sd = &fl->sdesc[cidx]; uint32_t len = G_RSPD_LEN(len_cq); uint32_t flags = M_EXT; uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); caddr_t cl; struct mbuf *m; int ret = 0; mask = fl->size - 1; prefetch(fl->sdesc[(cidx + 1) & mask].m); prefetch(fl->sdesc[(cidx + 2) & mask].m); prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) goto skip_recycle; cl = mtod(m, void *); memcpy(cl, sd->rxsd_cl, len); recycle_rx_buf(adap, fl, fl->cidx); m->m_pkthdr.len = m->m_len = len; m->m_flags = 0; mh->mh_head = mh->mh_tail = m; ret = 1; goto done; } else { skip_recycle: bus_dmamap_unload(fl->entry_tag, sd->map); cl = sd->rxsd_cl; m = sd->m; if ((sopeop == RSPQ_SOP_EOP) || (sopeop == RSPQ_SOP)) flags |= M_PKTHDR; m_init(m, M_NOWAIT, MT_DATA, flags); if (fl->zone == zone_pack) { /* * restore clobbered data pointer */ m->m_data = m->m_ext.ext_buf; } else { m_cljset(m, cl, fl->type); } m->m_len = len; } switch(sopeop) { case RSPQ_SOP_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_SOP: mh->mh_head = mh->mh_tail = m; m->m_pkthdr.len = len; break; case RSPQ_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_NSOP_NEOP: if (mh->mh_tail == NULL) { log(LOG_ERR, "discarding intermediate descriptor entry\n"); m_freem(m); m = NULL; break; } mh->mh_tail->m_next = m; mh->mh_tail = m; mh->mh_head->m_pkthdr.len += len; break; } if (cxgb_debug && m != NULL) printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); done: if (++fl->cidx == fl->size) fl->cidx = 0; return (ret); } /** * handle_rsp_cntrl_info - handles control information in a response * @qs: the queue set corresponding to the response * @flags: the response control flags * * Handles the control information of an SGE response, such as GTS * indications and completion credits for the queue set's Tx queues. * HW coalesces credits, we don't do any extra SW coalescing. */ static __inline void handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) { unsigned int credits; #if USE_GTS if (flags & F_RSPD_TXQ0_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); #endif credits = G_RSPD_TXQ0_CR(flags); if (credits) qs->txq[TXQ_ETH].processed += credits; credits = G_RSPD_TXQ2_CR(flags); if (credits) qs->txq[TXQ_CTRL].processed += credits; # if USE_GTS if (flags & F_RSPD_TXQ1_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); # endif credits = G_RSPD_TXQ1_CR(flags); if (credits) qs->txq[TXQ_OFLD].processed += credits; } static void check_ring_db(adapter_t *adap, struct sge_qset *qs, unsigned int sleeping) { ; } /** * process_responses - process responses from an SGE response queue * @adap: the adapter * @qs: the queue set to which the response queue belongs * @budget: how many responses can be processed in this round * * Process responses from an SGE response queue up to the supplied budget. * Responses include received packets as well as credits and other events * for the queues that belong to the response queue's queue set. * A negative budget is effectively unlimited. * * Additionally choose the interrupt holdoff time for the next interrupt * on this queue. If the system is under memory shortage use a fairly * long delay to help recovery. */ static int process_responses(adapter_t *adap, struct sge_qset *qs, int budget) { struct sge_rspq *rspq = &qs->rspq; struct rsp_desc *r = &rspq->desc[rspq->cidx]; int budget_left = budget; unsigned int sleeping = 0; #if defined(INET6) || defined(INET) int lro_enabled = qs->lro.enabled; int skip_lro; struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; #endif struct t3_mbuf_hdr *mh = &rspq->rspq_mh; #ifdef DEBUG static int last_holdoff = 0; if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { printf("next_holdoff=%d\n", rspq->holdoff_tmr); last_holdoff = rspq->holdoff_tmr; } #endif rspq->next_holdoff = rspq->holdoff_tmr; while (__predict_true(budget_left && is_new_response(r, rspq))) { int eth, eop = 0, ethpad = 0; uint32_t flags = ntohl(r->flags); uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); uint8_t opcode = r->rss_hdr.opcode; eth = (opcode == CPL_RX_PKT); if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { struct mbuf *m; if (cxgb_debug) printf("async notification\n"); if (mh->mh_head == NULL) { mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); m = mh->mh_head; } else { m = m_gethdr(M_NOWAIT, MT_DATA); } if (m == NULL) goto no_mem; memcpy(mtod(m, char *), r, AN_PKT_SIZE); m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF; opcode = CPL_ASYNC_NOTIF; eop = 1; rspq->async_notif++; goto skip; } else if (flags & F_RSPD_IMM_DATA_VALID) { struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { no_mem: rspq->next_holdoff = NOMEM_INTR_DELAY; budget_left--; break; } if (mh->mh_head == NULL) mh->mh_head = m; else mh->mh_tail->m_next = m; mh->mh_tail = m; get_imm_packet(adap, r, m); mh->mh_head->m_pkthdr.len += m->m_len; eop = 1; rspq->imm_data++; } else if (r->len_cq) { int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; eop = get_packet(adap, drop_thresh, qs, mh, r); if (eop) { if (r->rss_hdr.hash_type && !adap->timestamp) { M_HASHTYPE_SET(mh->mh_head, M_HASHTYPE_OPAQUE_HASH); mh->mh_head->m_pkthdr.flowid = rss_hash; } } ethpad = 2; } else { rspq->pure_rsps++; } skip: if (flags & RSPD_CTRL_MASK) { sleeping |= flags & RSPD_GTS_MASK; handle_rsp_cntrl_info(qs, flags); } if (!eth && eop) { rspq->offload_pkts++; #ifdef TCP_OFFLOAD adap->cpl_handler[opcode](qs, r, mh->mh_head); #else m_freem(mh->mh_head); #endif mh->mh_head = NULL; } else if (eth && eop) { struct mbuf *m = mh->mh_head; t3_rx_eth(adap, m, ethpad); /* * The T304 sends incoming packets on any qset. If LRO * is also enabled, we could end up sending packet up * lro_ctrl->ifp's input. That is incorrect. * * The mbuf's rcvif was derived from the cpl header and * is accurate. Skip LRO and just use that. */ #if defined(INET6) || defined(INET) skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && (tcp_lro_rx(lro_ctrl, m, 0) == 0) ) { /* successfully queue'd for LRO */ } else #endif { /* * LRO not enabled, packet unsuitable for LRO, * or unable to queue. Pass it up right now in * either case. */ if_t ifp = m->m_pkthdr.rcvif; if_input(ifp, m); } mh->mh_head = NULL; } r++; if (__predict_false(++rspq->cidx == rspq->size)) { rspq->cidx = 0; rspq->gen ^= 1; r = rspq->desc; } if (++rspq->credits >= 64) { refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } __refill_fl_lt(adap, &qs->fl[0], 32); __refill_fl_lt(adap, &qs->fl[1], 32); --budget_left; } #if defined(INET6) || defined(INET) /* Flush LRO */ tcp_lro_flush_all(lro_ctrl); #endif if (sleeping) check_ring_db(adap, qs, sleeping); mb(); /* commit Tx queue processed updates */ if (__predict_false(qs->txq_stopped > 1)) restart_tx(qs); __refill_fl_lt(adap, &qs->fl[0], 512); __refill_fl_lt(adap, &qs->fl[1], 512); budget -= budget_left; return (budget); } /* * A helper function that processes responses and issues GTS. */ static __inline int process_responses_gts(adapter_t *adap, struct sge_rspq *rq) { int work; static int last_holdoff = 0; work = process_responses(adap, rspq_to_qset(rq), -1); if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { printf("next_holdoff=%d\n", rq->next_holdoff); last_holdoff = rq->next_holdoff; } t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); return (work); } #ifdef DEBUGNET int cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs) { return (process_responses_gts(adap, &qs->rspq)); } #endif /* * Interrupt handler for legacy INTx interrupts for T3B-based cards. * Handles data events from SGE response queues as well as error and other * async events as they all use the same interrupt pin. We use one SGE * response queue per port in this mode and protect all response queues with * queue 0's lock. */ void t3b_intr(void *data) { uint32_t i, map; adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); if (!map) return; if (__predict_false(map & F_ERRINTR)) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } mtx_lock(&q0->lock); for_each_port(adap, i) if (map & (1 << i)) process_responses_gts(adap, &adap->sge.qs[i].rspq); mtx_unlock(&q0->lock); } /* * The MSI interrupt handler. This needs to handle data events from SGE * response queues as well as error and other async events as they all use * the same MSI vector. We use one SGE response queue per port in this mode * and protect all response queues with queue 0's lock. */ void t3_intr_msi(void *data) { adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; int i, new_packets = 0; mtx_lock(&q0->lock); for_each_port(adap, i) if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) new_packets = 1; mtx_unlock(&q0->lock); if (new_packets == 0) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } } void t3_intr_msix(void *data) { struct sge_qset *qs = data; adapter_t *adap = qs->port->adapter; struct sge_rspq *rspq = &qs->rspq; if (process_responses_gts(adap, rspq) == 0) rspq->unhandled_irqs++; } #define QDUMP_SBUF_SIZE 32 * 400 static int t3_dump_rspq(SYSCTL_HANDLER_ARGS) { struct sge_rspq *rspq; struct sge_qset *qs; int i, err, dump_end, idx; struct sbuf *sb; struct rsp_desc *rspd; uint32_t data[4]; rspq = arg1; qs = rspq_to_qset(rspq); if (rspq->rspq_dump_count == 0) return (0); if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", rspq->rspq_dump_count); rspq->rspq_dump_count = 0; return (EINVAL); } if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", rspq->rspq_dump_start); rspq->rspq_dump_start = 0; return (EINVAL); } err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; for (i = rspq->rspq_dump_start; i < dump_end; i++) { idx = i & (RSPQ_Q_SIZE-1); rspd = &rspq->desc[idx]; sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), be32toh(rspd->len_cq), rspd->intr_gen); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; uint32_t data[4]; txq = arg1; qs = txq_to_qset(txq, TXQ_ETH); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > TX_ETH_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; txq = arg1; qs = txq_to_qset(txq, TXQ_CTRL); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > 256) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > 255) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = sysctl_wire_old_buffer(req, 0); if (err != 0) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & 255); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (255)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; struct qset_params *qsp = &sc->params.sge.qset[0]; int coalesce_usecs; struct sge_qset *qs; int i, j, err, nqsets = 0; struct mtx *lock; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); coalesce_usecs = qsp->coalesce_usecs; err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); if (err != 0) { return (err); } if (coalesce_usecs == qsp->coalesce_usecs) return (0); for (i = 0; i < sc->params.nports; i++) for (j = 0; j < sc->port[i].nqsets; j++) nqsets++; coalesce_usecs = max(1, coalesce_usecs); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[i]; qsp = &sc->params.sge.qset[i]; qsp->coalesce_usecs = coalesce_usecs; lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; mtx_lock(lock); t3_update_qset_coalesce(qs, qsp); t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | V_NEWTIMER(qs->rspq.holdoff_tmr)); mtx_unlock(lock); } return (0); } static int t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; int rc, timestamp; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); timestamp = sc->timestamp; rc = sysctl_handle_int(oidp, ×tamp, arg2, req); if (rc != 0) return (rc); if (timestamp != sc->timestamp) { t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, timestamp ? F_ENABLERXPKTTMSTPRSS : 0); sc->timestamp = timestamp; } return (0); } void t3_add_attach_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, &sc->params.rev, 0, "chip model"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "port_types", CTLFLAG_RD, sc->port_types, 0, "type of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "enable_debug", CTLFLAG_RW, &cxgb_debug, 0, "enable verbose debugging output"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", CTLFLAG_RD, &sc->tunq_coalesce, "#tunneled packets freed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "txq_overrun", CTLFLAG_RD, &txq_fills, 0, "#times txq overrun"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, &sc->params.vpd.cclk, 0, "core clock frequency (in KHz)"); } static const char *rspq_name = "rspq"; static const char *txq_names[] = { "txq_eth", "txq_ofld", "txq_ctrl" }; static int sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) { struct port_info *p = arg1; uint64_t *parg; if (!p) return (EINVAL); cxgb_refresh_stats(p); parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); return (sysctl_handle_64(oidp, parg, 0, req)); } void t3_add_configured_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, t3_set_coalesce_usecs, "I", "interrupt coalescing timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pkt_timestamp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, t3_pkt_timestamp, "I", "provide packet timestamp instead of connection hash"); for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; struct sysctl_oid *poid; struct sysctl_oid_list *poidlist; struct mac_stats *mstats = &pi->mac.stats; snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, pi->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "port statistics"); poidlist = SYSCTL_CHILDREN(poid); SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, "nqsets", CTLFLAG_RD, &pi->nqsets, 0, "#queue sets"); for (j = 0; j < pi->nqsets; j++) { struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid; struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist; struct sge_txq *txq = &qs->txq[TXQ_ETH]; snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, qs->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "qset statistics"); qspoidlist = SYSCTL_CHILDREN(qspoid); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", CTLFLAG_RD, &qs->fl[0].empty, 0, "freelist #0 empty"); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", CTLFLAG_RD, &qs->fl[1].empty, 0, "freelist #1 empty"); rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, rspq_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rspq statistics"); rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[0], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "txq statistics"); txqpoidlist = SYSCTL_CHILDREN(txqpoid); ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[2], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ctrlq statistics"); ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, "lro_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "LRO statistics"); lropoidlist = SYSCTL_CHILDREN(lropoid); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", CTLFLAG_RD, &qs->rspq.size, 0, "#entries in response queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", CTLFLAG_RD, &qs->rspq.cidx, 0, "consumer index"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", CTLFLAG_RD, &qs->rspq.credits, 0, "#credits"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", CTLFLAG_RD, &qs->rspq.starved, 0, "#times starved"); SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &qs->rspq.phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->rspq.rspq_dump_start, 0, "start rspq dump entry"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->rspq.rspq_dump_count, 0, "#rspq entries to dump"); SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &qs->rspq, 0, t3_dump_rspq, "A", "dump of the response queue"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, "#tunneled packets dropped"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len, 0, "#tunneled packets waiting to be sent"); #if 0 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 0, "#tunneled packets queue producer index"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 0, "#tunneled packets queue consumer index"); #endif SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 0, "#tunneled packets processed by the card"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", CTLFLAG_RD, &txq->cleaned, 0, "#tunneled packets cleaned"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", CTLFLAG_RD, &txq->in_use, 0, "#tunneled packet slots in use"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", CTLFLAG_RD, &txq->txq_frees, "#tunneled packets freed"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", CTLFLAG_RD, &txq->txq_skipped, 0, "#tunneled packet descriptors skipped"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", CTLFLAG_RD, &txq->txq_coalesced, "#tunneled packets coalesced"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", CTLFLAG_RD, &txq->txq_enqueued, 0, "#tunneled packets enqueued to hardware"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", CTLFLAG_RD, &qs->txq_stopped, 0, "tx queues stopped"); SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &txq->phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 0, "txq generation"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", CTLFLAG_RD, &txq->cidx, 0, "hardware queue cidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", CTLFLAG_RD, &txq->pidx, 0, "hardware queue pidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 0, "txq start idx for dump"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 0, "txq #entries to dump"); SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 0, "ctrlq start idx for dump"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 0, "ctrl #entries to dump"); SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued", CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed", CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); } /* Now add a node for mac stats. */ poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC statistics"); poidlist = SYSCTL_CHILDREN(poid); /* * We (ab)use the length argument (arg2) to pass on the offset * of the data that we are interested in. This is only required * for the quad counters that are updated from the hardware (we * make sure that we return the latest value). * sysctl_handle_macstat first updates *all* the counters from * the hardware, and then returns the latest value of the * requested counter. Best would be to update only the * requested counter from hardware, but t3_mac_update_stats() * hides all the register details and we don't want to dive into * all that here. */ #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pi, \ offsetof(struct mac_stats, a), sysctl_handle_macstat, "QU", 0) CXGB_SYSCTL_ADD_QUAD(tx_octets); CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); CXGB_SYSCTL_ADD_QUAD(tx_frames); CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_pause); CXGB_SYSCTL_ADD_QUAD(tx_deferred); CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); CXGB_SYSCTL_ADD_QUAD(tx_underrun); CXGB_SYSCTL_ADD_QUAD(tx_len_errs); CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(tx_frames_64); CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); CXGB_SYSCTL_ADD_QUAD(rx_octets); CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); CXGB_SYSCTL_ADD_QUAD(rx_frames); CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_pause); CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(rx_align_errs); CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); CXGB_SYSCTL_ADD_QUAD(rx_data_errs); CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); CXGB_SYSCTL_ADD_QUAD(rx_runt); CXGB_SYSCTL_ADD_QUAD(rx_jabber); CXGB_SYSCTL_ADD_QUAD(rx_short); CXGB_SYSCTL_ADD_QUAD(rx_too_long); CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); CXGB_SYSCTL_ADD_QUAD(rx_frames_64); CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); #undef CXGB_SYSCTL_ADD_QUAD #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ CTLFLAG_RD, &mstats->a, 0) CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); CXGB_SYSCTL_ADD_ULONG(num_toggled); CXGB_SYSCTL_ADD_ULONG(num_resets); CXGB_SYSCTL_ADD_ULONG(link_faults); #undef CXGB_SYSCTL_ADD_ULONG } } /** * t3_get_desc - dump an SGE descriptor for debugging purposes * @qs: the queue set * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) * @idx: the descriptor index in the queue * @data: where to dump the descriptor contents * * Dumps the contents of a HW descriptor of an SGE queue. Returns the * size of the descriptor. */ int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data) { if (qnum >= 6) return (EINVAL); if (qnum < 3) { if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) return -EINVAL; memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); return sizeof(struct tx_desc); } if (qnum == 3) { if (!qs->rspq.desc || idx >= qs->rspq.size) return (EINVAL); memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); return sizeof(struct rsp_desc); } qnum -= 4; if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) return (EINVAL); memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); return sizeof(struct rx_desc); } diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index e5b173964451..77b6ba5d4032 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -1,2075 +1,2075 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #ifdef TCP_OFFLOAD #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_tcb.h" #include "t4_clip.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" #include "tom/t4_tls.h" static struct protosw toe_protosw; static struct protosw toe6_protosw; /* Module ops */ static int t4_tom_mod_load(void); static int t4_tom_mod_unload(void); static int t4_tom_modevent(module_t, int, void *); /* ULD ops and helpers */ static int t4_tom_activate(struct adapter *); static int t4_tom_deactivate(struct adapter *); static struct uld_info tom_uld_info = { .uld_id = ULD_TOM, .activate = t4_tom_activate, .deactivate = t4_tom_deactivate, }; static void release_offload_resources(struct toepcb *); static int alloc_tid_tabs(struct tid_info *); static void free_tid_tabs(struct tid_info *); static void free_tom_data(struct adapter *, struct tom_data *); static void reclaim_wr_resources(void *, int); struct toepcb * alloc_toepcb(struct vi_info *vi, int flags) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct toepcb *toep; int tx_credits, txsd_total, len; /* * The firmware counts tx work request credits in units of 16 bytes * each. Reserve room for an ABORT_REQ so the driver never has to worry * about tx credits if it wants to abort a connection. */ tx_credits = sc->params.ofldq_wr_cred; tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); /* * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte * immediate payload, and firmware counts tx work request credits in * units of 16 byte. Calculate the maximum work requests possible. */ txsd_total = tx_credits / howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); len = offsetof(struct toepcb, txsd) + txsd_total * sizeof(struct ofld_tx_sdesc); toep = malloc(len, M_CXGBE, M_ZERO | flags); if (toep == NULL) return (NULL); refcount_init(&toep->refcount, 1); toep->td = sc->tom_softc; toep->vi = vi; toep->tid = -1; toep->tx_total = tx_credits; toep->tx_credits = tx_credits; mbufq_init(&toep->ulp_pduq, INT_MAX); mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); toep->txsd_total = txsd_total; toep->txsd_avail = txsd_total; toep->txsd_pidx = 0; toep->txsd_cidx = 0; aiotx_init_toep(toep); return (toep); } /* * Initialize a toepcb after its params have been filled out. */ int init_toepcb(struct vi_info *vi, struct toepcb *toep) { struct conn_params *cp = &toep->params; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct tx_cl_rl_params *tc; if (cp->tc_idx >= 0 && cp->tc_idx < sc->params.nsched_cls) { tc = &pi->sched_params->cl_rl[cp->tc_idx]; mtx_lock(&sc->tc_lock); if (tc->state != CS_HW_CONFIGURED) { CH_ERR(vi, "tid %d cannot be bound to traffic class %d " "because it is not configured (its state is %d)\n", toep->tid, cp->tc_idx, tc->state); cp->tc_idx = -1; } else { tc->refcount++; } mtx_unlock(&sc->tc_lock); } toep->ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; toep->ofld_rxq = &sc->sge.ofld_rxq[cp->rxq_idx]; toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; tls_init_toep(toep); if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_init_toep(toep); toep->flags |= TPF_INITIALIZED; return (0); } struct toepcb * hold_toepcb(struct toepcb *toep) { refcount_acquire(&toep->refcount); return (toep); } void free_toepcb(struct toepcb *toep) { if (refcount_release(&toep->refcount) == 0) return; KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: attached to an inpcb", __func__)); KASSERT(!(toep->flags & TPF_CPL_PENDING), ("%s: CPL pending", __func__)); if (toep->flags & TPF_INITIALIZED) { if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_uninit_toep(toep); tls_uninit_toep(toep); } free(toep, M_CXGBE); } /* * Set up the socket for TCP offload. */ void offload_socket(struct socket *so, struct toepcb *toep) { struct tom_data *td = toep->td; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct sockbuf *sb; INP_WLOCK_ASSERT(inp); /* Update socket */ sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; if (inp->inp_vflag & INP_IPV6) so->so_proto = &toe6_protosw; else so->so_proto = &toe_protosw; SOCKBUF_UNLOCK(sb); /* Update TCP PCB */ tp->tod = &td->tod; tp->t_toe = toep; tp->t_flags |= TF_TOE; /* Install an extra hold on inp */ toep->inp = inp; toep->flags |= TPF_ATTACHED; in_pcbref(inp); /* Add the TOE PCB to the active list */ mtx_lock(&td->toep_list_lock); TAILQ_INSERT_HEAD(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); } void restore_so_proto(struct socket *so, bool v6) { if (v6) so->so_proto = &tcp6_protosw; else so->so_proto = &tcp_protosw; } /* This is _not_ the normal way to "unoffload" a socket. */ void undo_offload_socket(struct socket *so) { struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; struct tom_data *td = toep->td; struct sockbuf *sb; INP_WLOCK_ASSERT(inp); sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; restore_so_proto(so, inp->inp_vflag & INP_IPV6); SOCKBUF_UNLOCK(sb); tp->tod = NULL; tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toep->inp = NULL; toep->flags &= ~TPF_ATTACHED; if (in_pcbrele_wlocked(inp)) panic("%s: inp freed.", __func__); mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); } static void release_offload_resources(struct toepcb *toep) { struct tom_data *td = toep->td; struct adapter *sc = td_adapter(td); int tid = toep->tid; KASSERT(!(toep->flags & TPF_CPL_PENDING), ("%s: %p has CPL pending.", __func__, toep)); KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: %p is still attached.", __func__, toep)); CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", __func__, toep, tid, toep->l2te, toep->ce); /* * These queues should have been emptied at approximately the same time * that a normal connection's socket's so_snd would have been purged or * drained. Do _not_ clean up here. */ - MPASS(mbufq_len(&toep->ulp_pduq) == 0); - MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0); + MPASS(mbufq_empty(&toep->ulp_pduq)); + MPASS(mbufq_empty(&toep->ulp_pdu_reclaimq)); #ifdef INVARIANTS if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_assert_empty(toep); #endif MPASS(TAILQ_EMPTY(&toep->aiotx_jobq)); if (toep->l2te) t4_l2t_release(toep->l2te); if (tid >= 0) { remove_tid(sc, tid, toep->ce ? 2 : 1); release_tid(sc, tid, toep->ctrlq); } if (toep->ce) t4_release_clip_entry(sc, toep->ce); if (toep->params.tc_idx != -1) t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->params.tc_idx); mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); free_toepcb(toep); } /* * The kernel is done with the TCP PCB and this is our opportunity to unhook the * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no * pending CPL) then it is time to release all resources tied to the toepcb. * * Also gets called when an offloaded active open fails and the TOM wants the * kernel to take the TCP PCB back. */ static void t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) { #if defined(KTR) || defined(INVARIANTS) struct inpcb *inp = tptoinpcb(tp); #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); KASSERT(toep->flags & TPF_ATTACHED, ("%s: not attached", __func__)); #ifdef KTR if (tp->t_state == TCPS_SYN_SENT) { CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); } else { CTR6(KTR_CXGBE, "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, inp->inp_flags); } #endif tp->tod = NULL; tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toep->flags &= ~TPF_ATTACHED; if (!(toep->flags & TPF_CPL_PENDING)) release_offload_resources(toep); } /* * setsockopt handler. */ static void t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = tp->t_toe; if (dir == SOPT_GET) return; CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); switch (name) { case TCP_NODELAY: if (tp->t_state != TCPS_ESTABLISHED) break; toep->params.nagle = tp->t_flags & TF_NODELAY ? 0 : 1; t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, V_TF_NAGLE(1), V_TF_NAGLE(toep->params.nagle), 0, 0); break; default: break; } } static inline uint64_t get_tcb_tflags(const uint64_t *tcb) { return ((be64toh(tcb[14]) << 32) | (be64toh(tcb[15]) >> 32)); } static inline uint32_t get_tcb_field(const uint64_t *tcb, u_int word, uint32_t mask, u_int shift) { #define LAST_WORD ((TCB_SIZE / 4) - 1) uint64_t t1, t2; int flit_idx; MPASS(mask != 0); MPASS(word <= LAST_WORD); MPASS(shift < 32); flit_idx = (LAST_WORD - word) / 2; if (word & 0x1) shift += 32; t1 = be64toh(tcb[flit_idx]) >> shift; t2 = 0; if (fls(mask) > 64 - shift) { /* * Will spill over into the next logical flit, which is the flit * before this one. The flit_idx before this one must be valid. */ MPASS(flit_idx > 0); t2 = be64toh(tcb[flit_idx - 1]) << (64 - shift); } return ((t2 | t1) & mask); #undef LAST_WORD } #define GET_TCB_FIELD(tcb, F) \ get_tcb_field(tcb, W_TCB_##F, M_TCB_##F, S_TCB_##F) /* * Issues a CPL_GET_TCB to read the entire TCB for the tid. */ static int send_get_tcb(struct adapter *sc, u_int tid) { struct cpl_get_tcb *cpl; struct wrq_cookie cookie; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); cpl = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*cpl), 16), &cookie); if (__predict_false(cpl == NULL)) return (ENOMEM); bzero(cpl, sizeof(*cpl)); INIT_TP_WR(cpl, tid); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_GET_TCB, tid)); cpl->reply_ctrl = htobe16(V_REPLY_CHAN(0) | V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id)); cpl->cookie = 0xff; commit_wrq_wr(&sc->sge.ctrlq[0], cpl, &cookie); return (0); } static struct tcb_histent * alloc_tcb_histent(struct adapter *sc, u_int tid, int flags) { struct tcb_histent *te; MPASS(flags == M_NOWAIT || flags == M_WAITOK); te = malloc(sizeof(*te), M_CXGBE, M_ZERO | flags); if (te == NULL) return (NULL); mtx_init(&te->te_lock, "TCB entry", NULL, MTX_DEF); callout_init_mtx(&te->te_callout, &te->te_lock, 0); te->te_adapter = sc; te->te_tid = tid; return (te); } static void free_tcb_histent(struct tcb_histent *te) { mtx_destroy(&te->te_lock); free(te, M_CXGBE); } /* * Start tracking the tid in the TCB history. */ int add_tid_to_history(struct adapter *sc, u_int tid) { struct tcb_histent *te = NULL; struct tom_data *td = sc->tom_softc; int rc; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); if (td->tcb_history == NULL) return (ENXIO); rw_wlock(&td->tcb_history_lock); if (td->tcb_history[tid] != NULL) { rc = EEXIST; goto done; } te = alloc_tcb_histent(sc, tid, M_NOWAIT); if (te == NULL) { rc = ENOMEM; goto done; } mtx_lock(&te->te_lock); rc = send_get_tcb(sc, tid); if (rc == 0) { te->te_flags |= TE_RPL_PENDING; td->tcb_history[tid] = te; } else { free(te, M_CXGBE); } mtx_unlock(&te->te_lock); done: rw_wunlock(&td->tcb_history_lock); return (rc); } static void remove_tcb_histent(struct tcb_histent *te) { struct adapter *sc = te->te_adapter; struct tom_data *td = sc->tom_softc; rw_assert(&td->tcb_history_lock, RA_WLOCKED); mtx_assert(&te->te_lock, MA_OWNED); MPASS(td->tcb_history[te->te_tid] == te); td->tcb_history[te->te_tid] = NULL; free_tcb_histent(te); rw_wunlock(&td->tcb_history_lock); } static inline struct tcb_histent * lookup_tcb_histent(struct adapter *sc, u_int tid, bool addrem) { struct tcb_histent *te; struct tom_data *td = sc->tom_softc; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); if (td->tcb_history == NULL) return (NULL); if (addrem) rw_wlock(&td->tcb_history_lock); else rw_rlock(&td->tcb_history_lock); te = td->tcb_history[tid]; if (te != NULL) { mtx_lock(&te->te_lock); return (te); /* with both locks held */ } if (addrem) rw_wunlock(&td->tcb_history_lock); else rw_runlock(&td->tcb_history_lock); return (te); } static inline void release_tcb_histent(struct tcb_histent *te) { struct adapter *sc = te->te_adapter; struct tom_data *td = sc->tom_softc; mtx_assert(&te->te_lock, MA_OWNED); mtx_unlock(&te->te_lock); rw_assert(&td->tcb_history_lock, RA_RLOCKED); rw_runlock(&td->tcb_history_lock); } static void request_tcb(void *arg) { struct tcb_histent *te = arg; mtx_assert(&te->te_lock, MA_OWNED); /* Noone else is supposed to update the histent. */ MPASS(!(te->te_flags & TE_RPL_PENDING)); if (send_get_tcb(te->te_adapter, te->te_tid) == 0) te->te_flags |= TE_RPL_PENDING; else callout_schedule(&te->te_callout, hz / 100); } static void update_tcb_histent(struct tcb_histent *te, const uint64_t *tcb) { struct tom_data *td = te->te_adapter->tom_softc; uint64_t tflags = get_tcb_tflags(tcb); uint8_t sample = 0; if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != GET_TCB_FIELD(tcb, SND_UNA_RAW)) { if (GET_TCB_FIELD(tcb, T_RXTSHIFT) != 0) sample |= TS_RTO; if (GET_TCB_FIELD(tcb, T_DUPACKS) != 0) sample |= TS_DUPACKS; if (GET_TCB_FIELD(tcb, T_DUPACKS) >= td->dupack_threshold) sample |= TS_FASTREXMT; } if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != 0) { uint32_t snd_wnd; sample |= TS_SND_BACKLOGGED; /* for whatever reason. */ snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV); if (tflags & V_TF_RECV_SCALE(1)) snd_wnd <<= GET_TCB_FIELD(tcb, RCV_SCALE); if (GET_TCB_FIELD(tcb, SND_CWND) < snd_wnd) sample |= TS_CWND_LIMITED; /* maybe due to CWND */ } if (tflags & V_TF_CCTRL_ECN(1)) { /* * CE marker on incoming IP hdr, echoing ECE back in the TCP * hdr. Indicates congestion somewhere on the way from the peer * to this node. */ if (tflags & V_TF_CCTRL_ECE(1)) sample |= TS_ECN_ECE; /* * ECE seen and CWR sent (or about to be sent). Might indicate * congestion on the way to the peer. This node is reducing its * congestion window in response. */ if (tflags & (V_TF_CCTRL_CWR(1) | V_TF_CCTRL_RFR(1))) sample |= TS_ECN_CWR; } te->te_sample[te->te_pidx] = sample; if (++te->te_pidx == nitems(te->te_sample)) te->te_pidx = 0; memcpy(te->te_tcb, tcb, TCB_SIZE); te->te_flags |= TE_ACTIVE; } static int do_get_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_get_tcb_rpl *cpl = mtod(m, const void *); const uint64_t *tcb = (const uint64_t *)(const void *)(cpl + 1); struct tcb_histent *te; const u_int tid = GET_TID(cpl); bool remove; remove = GET_TCB_FIELD(tcb, T_STATE) == TCPS_CLOSED; te = lookup_tcb_histent(sc, tid, remove); if (te == NULL) { /* Not in the history. Who issued the GET_TCB for this? */ device_printf(sc->dev, "tcb %u: flags 0x%016jx, state %u, " "srtt %u, sscale %u, rscale %u, cookie 0x%x\n", tid, (uintmax_t)get_tcb_tflags(tcb), GET_TCB_FIELD(tcb, T_STATE), GET_TCB_FIELD(tcb, T_SRTT), GET_TCB_FIELD(tcb, SND_SCALE), GET_TCB_FIELD(tcb, RCV_SCALE), cpl->cookie); goto done; } MPASS(te->te_flags & TE_RPL_PENDING); te->te_flags &= ~TE_RPL_PENDING; if (remove) { remove_tcb_histent(te); } else { update_tcb_histent(te, tcb); callout_reset(&te->te_callout, hz / 10, request_tcb, te); release_tcb_histent(te); } done: m_freem(m); return (0); } static void fill_tcp_info_from_tcb(struct adapter *sc, uint64_t *tcb, struct tcp_info *ti) { uint32_t v; ti->tcpi_state = GET_TCB_FIELD(tcb, T_STATE); v = GET_TCB_FIELD(tcb, T_SRTT); ti->tcpi_rtt = tcp_ticks_to_us(sc, v); v = GET_TCB_FIELD(tcb, T_RTTVAR); ti->tcpi_rttvar = tcp_ticks_to_us(sc, v); ti->tcpi_snd_ssthresh = GET_TCB_FIELD(tcb, SND_SSTHRESH); ti->tcpi_snd_cwnd = GET_TCB_FIELD(tcb, SND_CWND); ti->tcpi_rcv_nxt = GET_TCB_FIELD(tcb, RCV_NXT); ti->tcpi_rcv_adv = GET_TCB_FIELD(tcb, RCV_ADV); ti->tcpi_dupacks = GET_TCB_FIELD(tcb, T_DUPACKS); v = GET_TCB_FIELD(tcb, TX_MAX); ti->tcpi_snd_nxt = v - GET_TCB_FIELD(tcb, SND_NXT_RAW); ti->tcpi_snd_una = v - GET_TCB_FIELD(tcb, SND_UNA_RAW); ti->tcpi_snd_max = v - GET_TCB_FIELD(tcb, SND_MAX_RAW); /* Receive window being advertised by us. */ ti->tcpi_rcv_wscale = GET_TCB_FIELD(tcb, SND_SCALE); /* Yes, SND. */ ti->tcpi_rcv_space = GET_TCB_FIELD(tcb, RCV_WND); /* Send window */ ti->tcpi_snd_wscale = GET_TCB_FIELD(tcb, RCV_SCALE); /* Yes, RCV. */ ti->tcpi_snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV); if (get_tcb_tflags(tcb) & V_TF_RECV_SCALE(1)) ti->tcpi_snd_wnd <<= ti->tcpi_snd_wscale; else ti->tcpi_snd_wscale = 0; } static void fill_tcp_info_from_history(struct adapter *sc, struct tcb_histent *te, struct tcp_info *ti) { fill_tcp_info_from_tcb(sc, te->te_tcb, ti); } /* * Reads the TCB for the given tid using a memory window and copies it to 'buf' * in the same format as CPL_GET_TCB_RPL. */ static void read_tcb_using_memwin(struct adapter *sc, u_int tid, uint64_t *buf) { int i, j, k, rc; uint32_t addr; u_char *tcb, tmp; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE; rc = read_via_memwin(sc, 2, addr, (uint32_t *)buf, TCB_SIZE); if (rc != 0) return; tcb = (u_char *)buf; for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) { for (k = 0; k < 16; k++) { tmp = tcb[i + k]; tcb[i + k] = tcb[j + k]; tcb[j + k] = tmp; } } } static void fill_tcp_info(struct adapter *sc, u_int tid, struct tcp_info *ti) { uint64_t tcb[TCB_SIZE / sizeof(uint64_t)]; struct tcb_histent *te; ti->tcpi_toe_tid = tid; te = lookup_tcb_histent(sc, tid, false); if (te != NULL) { fill_tcp_info_from_history(sc, te, ti); release_tcb_histent(te); } else { if (!(sc->debug_flags & DF_DISABLE_TCB_CACHE)) { /* XXX: tell firmware to flush TCB cache. */ } read_tcb_using_memwin(sc, tid, tcb); fill_tcp_info_from_tcb(sc, tcb, ti); } } /* * Called by the kernel to allow the TOE driver to "refine" values filled up in * the tcp_info for an offloaded connection. */ static void t4_tcp_info(struct toedev *tod, const struct tcpcb *tp, struct tcp_info *ti) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = tp->t_toe; INP_LOCK_ASSERT(tptoinpcb(tp)); MPASS(ti != NULL); fill_tcp_info(sc, toep->tid, ti); } #ifdef KERN_TLS static int t4_alloc_tls_session(struct toedev *tod, struct tcpcb *tp, struct ktls_session *tls, int direction) { struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(tptoinpcb(tp)); MPASS(tls != NULL); return (tls_alloc_ktls(toep, tls, direction)); } #endif /* SET_TCB_FIELD sent as a ULP command looks like this */ #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) static void * mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, uint64_t word, uint64_t mask, uint64_t val, uint32_t tid) { struct ulptx_idata *ulpsc; struct cpl_set_tcb_field_core *req; ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(sizeof(*req)); req = (struct cpl_set_tcb_field_core *)(ulpsc + 1); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); req->reply_ctrl = htobe16(V_NO_REPLY(1)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); req->mask = htobe64(mask); req->val = htobe64(val); ulpsc = (struct ulptx_idata *)(req + 1); if (LEN__SET_TCB_FIELD_ULP % 16) { ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); ulpsc->len = htobe32(0); return (ulpsc + 1); } return (ulpsc); } static void send_mss_flowc_wr(struct adapter *sc, struct toepcb *toep) { struct wrq_cookie cookie; struct fw_flowc_wr *flowc; struct ofld_tx_sdesc *txsd; const int flowclen = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval); const int flowclen16 = howmany(flowclen, 16); if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0) { CH_ERR(sc, "%s: tid %u out of tx credits (%d, %d).\n", __func__, toep->tid, toep->tx_credits, toep->txsd_avail); return; } flowc = start_wrq_wr(&toep->ofld_txq->wrq, flowclen16, &cookie); if (__predict_false(flowc == NULL)) { CH_ERR(sc, "ENOMEM in %s for tid %u.\n", __func__, toep->tid); return; } flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(1)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[0].val = htobe32(toep->params.emss); txsd = &toep->txsd[toep->txsd_pidx]; txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; commit_wrq_wr(&toep->ofld_txq->wrq, flowc, &cookie); } static void t4_pmtu_update(struct toedev *tod, struct tcpcb *tp, tcp_seq seq, int mtu) { struct work_request_hdr *wrh; struct ulp_txpkt *ulpmc; int idx, len; struct wrq_cookie cookie; struct inpcb *inp = tptoinpcb(tp); struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); unsigned short *mtus = &sc->params.mtus[0]; INP_WLOCK_ASSERT(inp); MPASS(mtu > 0); /* kernel is supposed to provide something usable. */ /* tp->snd_una and snd_max are in host byte order too. */ seq = be32toh(seq); CTR6(KTR_CXGBE, "%s: tid %d, seq 0x%08x, mtu %u, mtu_idx %u (%d)", __func__, toep->tid, seq, mtu, toep->params.mtu_idx, mtus[toep->params.mtu_idx]); if (ulp_mode(toep) == ULP_MODE_NONE && /* XXX: Read TCB otherwise? */ (SEQ_LT(seq, tp->snd_una) || SEQ_GEQ(seq, tp->snd_max))) { CTR5(KTR_CXGBE, "%s: tid %d, seq 0x%08x not in range [0x%08x, 0x%08x).", __func__, toep->tid, seq, tp->snd_una, tp->snd_max); return; } /* Find the best mtu_idx for the suggested MTU. */ for (idx = 0; idx < NMTUS - 1 && mtus[idx + 1] <= mtu; idx++) continue; if (idx >= toep->params.mtu_idx) return; /* Never increase the PMTU (just like the kernel). */ /* * We'll send a compound work request with 2 SET_TCB_FIELDs -- the first * one updates the mtu_idx and the second one triggers a retransmit. */ len = sizeof(*wrh) + 2 * roundup2(LEN__SET_TCB_FIELD_ULP, 16); wrh = start_wrq_wr(toep->ctrlq, howmany(len, 16), &cookie); if (wrh == NULL) { CH_ERR(sc, "failed to change mtu_idx of tid %d (%u -> %u).\n", toep->tid, toep->params.mtu_idx, idx); return; } INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ ulpmc = (struct ulp_txpkt *)(wrh + 1); ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_T_MAXSEG, V_TCB_T_MAXSEG(M_TCB_T_MAXSEG), V_TCB_T_MAXSEG(idx), toep->tid); ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_TIMESTAMP, V_TCB_TIMESTAMP(0x7FFFFULL << 11), 0, toep->tid); commit_wrq_wr(toep->ctrlq, wrh, &cookie); /* Update the software toepcb and tcpcb. */ toep->params.mtu_idx = idx; tp->t_maxseg = mtus[toep->params.mtu_idx]; if (inp->inp_inc.inc_flags & INC_ISIPV6) tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr); toep->params.emss = tp->t_maxseg; if (tp->t_flags & TF_RCVD_TSTMP) toep->params.emss -= TCPOLEN_TSTAMP_APPA; /* Update the firmware flowc. */ send_mss_flowc_wr(sc, toep); /* Update the MTU in the kernel's hostcache. */ if (sc->tt.update_hc_on_pmtu_change != 0) { struct in_conninfo inc = {0}; inc.inc_fibnum = inp->inp_inc.inc_fibnum; if (inp->inp_inc.inc_flags & INC_ISIPV6) { inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = inp->inp_inc.inc6_faddr; } else { inc.inc_faddr = inp->inp_inc.inc_faddr; } tcp_hc_updatemtu(&inc, mtu); } CTR6(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u), t_maxseg %u, emss %u", __func__, toep->tid, toep->params.mtu_idx, mtus[toep->params.mtu_idx], tp->t_maxseg, toep->params.emss); } /* * The TOE driver will not receive any more CPLs for the tid associated with the * toepcb; release the hold on the inpcb. */ void final_cpl_received(struct toepcb *toep) { struct inpcb *inp = toep->inp; bool need_wakeup; KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_CPL_PENDING, ("%s: CPL not pending already?", __func__)); CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); if (ulp_mode(toep) == ULP_MODE_TCPDDP) release_ddp_resources(toep); toep->inp = NULL; need_wakeup = (toep->flags & TPF_WAITING_FOR_FINAL) != 0; toep->flags &= ~(TPF_CPL_PENDING | TPF_WAITING_FOR_FINAL); mbufq_drain(&toep->ulp_pduq); mbufq_drain(&toep->ulp_pdu_reclaimq); if (!(toep->flags & TPF_ATTACHED)) release_offload_resources(toep); if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); if (need_wakeup) { struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep); mtx_lock(lock); wakeup(toep); mtx_unlock(lock); } } void insert_tid(struct adapter *sc, int tid, void *ctx, int ntids) { struct tid_info *t = &sc->tids; MPASS(tid >= t->tid_base); MPASS(tid - t->tid_base < t->ntids); t->tid_tab[tid - t->tid_base] = ctx; atomic_add_int(&t->tids_in_use, ntids); } void * lookup_tid(struct adapter *sc, int tid) { struct tid_info *t = &sc->tids; return (t->tid_tab[tid - t->tid_base]); } void update_tid(struct adapter *sc, int tid, void *ctx) { struct tid_info *t = &sc->tids; t->tid_tab[tid - t->tid_base] = ctx; } void remove_tid(struct adapter *sc, int tid, int ntids) { struct tid_info *t = &sc->tids; t->tid_tab[tid - t->tid_base] = NULL; atomic_subtract_int(&t->tids_in_use, ntids); } /* * What mtu_idx to use, given a 4-tuple. Note that both s->mss and tcp_mssopt * have the MSS that we should advertise in our SYN. Advertised MSS doesn't * account for any TCP options so the effective MSS (only payload, no headers or * options) could be different. */ static int find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, struct offload_settings *s) { unsigned short *mtus = &sc->params.mtus[0]; int i, mss, mtu; MPASS(inc != NULL); mss = s->mss > 0 ? s->mss : tcp_mssopt(inc); if (inc->inc_flags & INC_ISIPV6) mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr); for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++) continue; return (i); } /* * Determine the receive window size for a socket. */ u_long select_rcv_wnd(struct socket *so) { unsigned long wnd; SOCKBUF_LOCK_ASSERT(&so->so_rcv); wnd = sbspace(&so->so_rcv); if (wnd < MIN_RCV_WND) wnd = MIN_RCV_WND; return min(wnd, MAX_RCV_WND); } int select_rcv_wscale(void) { int wscale = 0; unsigned long space = sb_max; if (space > MAX_RCV_WND) space = MAX_RCV_WND; while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) wscale++; return (wscale); } __be64 calc_options0(struct vi_info *vi, struct conn_params *cp) { uint64_t opt0 = 0; opt0 |= F_TCAM_BYPASS; MPASS(cp->wscale >= 0 && cp->wscale <= M_WND_SCALE); opt0 |= V_WND_SCALE(cp->wscale); MPASS(cp->mtu_idx >= 0 && cp->mtu_idx < NMTUS); opt0 |= V_MSS_IDX(cp->mtu_idx); MPASS(cp->ulp_mode >= 0 && cp->ulp_mode <= M_ULP_MODE); opt0 |= V_ULP_MODE(cp->ulp_mode); MPASS(cp->opt0_bufsize >= 0 && cp->opt0_bufsize <= M_RCV_BUFSIZ); opt0 |= V_RCV_BUFSIZ(cp->opt0_bufsize); MPASS(cp->l2t_idx >= 0 && cp->l2t_idx < vi->adapter->vres.l2t.size); opt0 |= V_L2T_IDX(cp->l2t_idx); opt0 |= V_SMAC_SEL(vi->smt_idx); opt0 |= V_TX_CHAN(vi->pi->tx_chan); MPASS(cp->keepalive == 0 || cp->keepalive == 1); opt0 |= V_KEEP_ALIVE(cp->keepalive); MPASS(cp->nagle == 0 || cp->nagle == 1); opt0 |= V_NAGLE(cp->nagle); return (htobe64(opt0)); } __be32 calc_options2(struct vi_info *vi, struct conn_params *cp) { uint32_t opt2 = 0; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; /* * rx flow control, rx coalesce, congestion control, and tx pace are all * explicitly set by the driver. On T5+ the ISS is also set by the * driver to the value picked by the kernel. */ if (is_t4(sc)) { opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID; opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID; } else { opt2 |= F_T5_OPT_2_VALID; /* all 4 valid */ opt2 |= F_T5_ISS; /* ISS provided in CPL */ } MPASS(cp->sack == 0 || cp->sack == 1); opt2 |= V_SACK_EN(cp->sack); MPASS(cp->tstamp == 0 || cp->tstamp == 1); opt2 |= V_TSTAMPS_EN(cp->tstamp); if (cp->wscale > 0) opt2 |= F_WND_SCALE_EN; MPASS(cp->ecn == 0 || cp->ecn == 1); opt2 |= V_CCTRL_ECN(cp->ecn); /* XXX: F_RX_CHANNEL for multiple rx c-chan support goes here. */ opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]); opt2 |= V_PACE(0); opt2 |= F_RSS_QUEUE_VALID; opt2 |= V_RSS_QUEUE(sc->sge.ofld_rxq[cp->rxq_idx].iq.abs_id); MPASS(cp->cong_algo >= 0 && cp->cong_algo <= M_CONG_CNTRL); opt2 |= V_CONG_CNTRL(cp->cong_algo); MPASS(cp->rx_coalesce == 0 || cp->rx_coalesce == 1); if (cp->rx_coalesce == 1) opt2 |= V_RX_COALESCE(M_RX_COALESCE); opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0); #ifdef USE_DDP_RX_FLOW_CONTROL if (cp->ulp_mode == ULP_MODE_TCPDDP) opt2 |= F_RX_FC_DDP; #endif return (htobe32(opt2)); } uint64_t select_ntuple(struct vi_info *vi, struct l2t_entry *e) { struct adapter *sc = vi->adapter; struct tp_params *tp = &sc->params.tp; uint64_t ntuple = 0; /* * Initialize each of the fields which we care about which are present * in the Compressed Filter Tuple. */ if (tp->vlan_shift >= 0 && EVL_VLANOFTAG(e->vlan) != CPL_L2T_VLAN_NONE) ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; if (tp->port_shift >= 0) ntuple |= (uint64_t)e->lport << tp->port_shift; if (tp->protocol_shift >= 0) ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; if (tp->vnic_shift >= 0 && tp->vnic_mode == FW_VNIC_MODE_PF_VF) { ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vi->vin) | V_FT_VNID_ID_PF(sc->pf) | V_FT_VNID_ID_VLD(vi->vfvld)) << tp->vnic_shift; } if (is_t4(sc)) return (htobe32((uint32_t)ntuple)); else return (htobe64(V_FILTER_TUPLE(ntuple))); } /* * Initialize various connection parameters. */ void init_conn_params(struct vi_info *vi , struct offload_settings *s, struct in_conninfo *inc, struct socket *so, const struct tcp_options *tcpopt, int16_t l2t_idx, struct conn_params *cp) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct tom_tunables *tt = &sc->tt; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); u_long wnd; u_int q_idx; MPASS(s->offload != 0); /* Congestion control algorithm */ if (s->cong_algo >= 0) cp->cong_algo = s->cong_algo & M_CONG_CNTRL; else if (sc->tt.cong_algorithm >= 0) cp->cong_algo = tt->cong_algorithm & M_CONG_CNTRL; else { struct cc_algo *cc = CC_ALGO(tp); if (strcasecmp(cc->name, "reno") == 0) cp->cong_algo = CONG_ALG_RENO; else if (strcasecmp(cc->name, "tahoe") == 0) cp->cong_algo = CONG_ALG_TAHOE; if (strcasecmp(cc->name, "newreno") == 0) cp->cong_algo = CONG_ALG_NEWRENO; if (strcasecmp(cc->name, "highspeed") == 0) cp->cong_algo = CONG_ALG_HIGHSPEED; else { /* * Use newreno in case the algorithm selected by the * host stack is not supported by the hardware. */ cp->cong_algo = CONG_ALG_NEWRENO; } } /* Tx traffic scheduling class. */ if (s->sched_class >= 0 && s->sched_class < sc->params.nsched_cls) cp->tc_idx = s->sched_class; else cp->tc_idx = -1; /* Nagle's algorithm. */ if (s->nagle >= 0) cp->nagle = s->nagle > 0 ? 1 : 0; else cp->nagle = tp->t_flags & TF_NODELAY ? 0 : 1; /* TCP Keepalive. */ if (V_tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE) cp->keepalive = 1; else cp->keepalive = 0; /* Optimization that's specific to T5 @ 40G. */ if (tt->tx_align >= 0) cp->tx_align = tt->tx_align > 0 ? 1 : 0; else if (chip_id(sc) == CHELSIO_T5 && (port_top_speed(pi) > 10 || sc->params.nports > 2)) cp->tx_align = 1; else cp->tx_align = 0; /* ULP mode. */ if (s->ddp > 0 || (s->ddp < 0 && sc->tt.ddp && (so_options_get(so) & SO_NO_DDP) == 0)) cp->ulp_mode = ULP_MODE_TCPDDP; else cp->ulp_mode = ULP_MODE_NONE; /* Rx coalescing. */ if (s->rx_coalesce >= 0) cp->rx_coalesce = s->rx_coalesce > 0 ? 1 : 0; else if (tt->rx_coalesce >= 0) cp->rx_coalesce = tt->rx_coalesce > 0 ? 1 : 0; else cp->rx_coalesce = 1; /* default */ /* * Index in the PMTU table. This controls the MSS that we announce in * our SYN initially, but after ESTABLISHED it controls the MSS that we * use to send data. */ cp->mtu_idx = find_best_mtu_idx(sc, inc, s); /* Tx queue for this connection. */ if (s->txq == QUEUE_RANDOM) q_idx = arc4random(); else if (s->txq == QUEUE_ROUNDROBIN) q_idx = atomic_fetchadd_int(&vi->txq_rr, 1); else q_idx = s->txq; cp->txq_idx = vi->first_ofld_txq + q_idx % vi->nofldtxq; /* Rx queue for this connection. */ if (s->rxq == QUEUE_RANDOM) q_idx = arc4random(); else if (s->rxq == QUEUE_ROUNDROBIN) q_idx = atomic_fetchadd_int(&vi->rxq_rr, 1); else q_idx = s->rxq; cp->rxq_idx = vi->first_ofld_rxq + q_idx % vi->nofldrxq; if (SOLISTENING(so)) { /* Passive open */ MPASS(tcpopt != NULL); /* TCP timestamp option */ if (tcpopt->tstamp && (s->tstamp > 0 || (s->tstamp < 0 && V_tcp_do_rfc1323))) cp->tstamp = 1; else cp->tstamp = 0; /* SACK */ if (tcpopt->sack && (s->sack > 0 || (s->sack < 0 && V_tcp_do_sack))) cp->sack = 1; else cp->sack = 0; /* Receive window scaling. */ if (tcpopt->wsf > 0 && tcpopt->wsf < 15 && V_tcp_do_rfc1323) cp->wscale = select_rcv_wscale(); else cp->wscale = 0; /* ECN */ if (tcpopt->ecn && /* XXX: review. */ (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn))) cp->ecn = 1; else cp->ecn = 0; wnd = max(so->sol_sbrcv_hiwat, MIN_RCV_WND); cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ); if (tt->sndbuf > 0) cp->sndbuf = tt->sndbuf; else if (so->sol_sbsnd_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) cp->sndbuf = 256 * 1024; else cp->sndbuf = so->sol_sbsnd_hiwat; } else { /* Active open */ /* TCP timestamp option */ if (s->tstamp > 0 || (s->tstamp < 0 && (tp->t_flags & TF_REQ_TSTMP))) cp->tstamp = 1; else cp->tstamp = 0; /* SACK */ if (s->sack > 0 || (s->sack < 0 && (tp->t_flags & TF_SACK_PERMIT))) cp->sack = 1; else cp->sack = 0; /* Receive window scaling */ if (tp->t_flags & TF_REQ_SCALE) cp->wscale = select_rcv_wscale(); else cp->wscale = 0; /* ECN */ if (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn == 1)) cp->ecn = 1; else cp->ecn = 0; SOCKBUF_LOCK(&so->so_rcv); wnd = max(select_rcv_wnd(so), MIN_RCV_WND); SOCKBUF_UNLOCK(&so->so_rcv); cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ); if (tt->sndbuf > 0) cp->sndbuf = tt->sndbuf; else { SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) cp->sndbuf = 256 * 1024; else cp->sndbuf = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); } } cp->l2t_idx = l2t_idx; /* This will be initialized on ESTABLISHED. */ cp->emss = 0; } int negative_advice(int status) { return (status == CPL_ERR_RTX_NEG_ADVICE || status == CPL_ERR_PERSIST_NEG_ADVICE || status == CPL_ERR_KEEPALV_NEG_ADVICE); } static int alloc_tid_tab(struct tid_info *t, int flags) { MPASS(t->ntids > 0); MPASS(t->tid_tab == NULL); t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE, M_ZERO | flags); if (t->tid_tab == NULL) return (ENOMEM); atomic_store_rel_int(&t->tids_in_use, 0); return (0); } static void free_tid_tab(struct tid_info *t) { KASSERT(t->tids_in_use == 0, ("%s: %d tids still in use.", __func__, t->tids_in_use)); free(t->tid_tab, M_CXGBE); t->tid_tab = NULL; } static int alloc_stid_tab(struct tid_info *t, int flags) { MPASS(t->nstids > 0); MPASS(t->stid_tab == NULL); t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE, M_ZERO | flags); if (t->stid_tab == NULL) return (ENOMEM); mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); t->stids_in_use = 0; TAILQ_INIT(&t->stids); t->nstids_free_head = t->nstids; return (0); } static void free_stid_tab(struct tid_info *t) { KASSERT(t->stids_in_use == 0, ("%s: %d tids still in use.", __func__, t->stids_in_use)); if (mtx_initialized(&t->stid_lock)) mtx_destroy(&t->stid_lock); free(t->stid_tab, M_CXGBE); t->stid_tab = NULL; } static void free_tid_tabs(struct tid_info *t) { free_tid_tab(t); free_stid_tab(t); } static int alloc_tid_tabs(struct tid_info *t) { int rc; rc = alloc_tid_tab(t, M_NOWAIT); if (rc != 0) goto failed; rc = alloc_stid_tab(t, M_NOWAIT); if (rc != 0) goto failed; return (0); failed: free_tid_tabs(t); return (rc); } static inline void alloc_tcb_history(struct adapter *sc, struct tom_data *td) { if (sc->tids.ntids == 0 || sc->tids.ntids > 1024) return; rw_init(&td->tcb_history_lock, "TCB history"); td->tcb_history = malloc(sc->tids.ntids * sizeof(*td->tcb_history), M_CXGBE, M_ZERO | M_NOWAIT); td->dupack_threshold = G_DUPACKTHRESH(t4_read_reg(sc, A_TP_PARA_REG0)); } static inline void free_tcb_history(struct adapter *sc, struct tom_data *td) { #ifdef INVARIANTS int i; if (td->tcb_history != NULL) { for (i = 0; i < sc->tids.ntids; i++) { MPASS(td->tcb_history[i] == NULL); } } #endif free(td->tcb_history, M_CXGBE); if (rw_initialized(&td->tcb_history_lock)) rw_destroy(&td->tcb_history_lock); } static void free_tom_data(struct adapter *sc, struct tom_data *td) { ASSERT_SYNCHRONIZED_OP(sc); KASSERT(TAILQ_EMPTY(&td->toep_list), ("%s: TOE PCB list is not empty.", __func__)); KASSERT(td->lctx_count == 0, ("%s: lctx hash table is not empty.", __func__)); t4_free_ppod_region(&td->pr); if (td->listen_mask != 0) hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); if (mtx_initialized(&td->unsent_wr_lock)) mtx_destroy(&td->unsent_wr_lock); if (mtx_initialized(&td->lctx_hash_lock)) mtx_destroy(&td->lctx_hash_lock); if (mtx_initialized(&td->toep_list_lock)) mtx_destroy(&td->toep_list_lock); free_tcb_history(sc, td); free_tid_tabs(&sc->tids); free(td, M_CXGBE); } static char * prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen, int *buflen) { char *pkt; struct tcphdr *th; int ipv6, len; const int maxlen = max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) + max(sizeof(struct ip), sizeof(struct ip6_hdr)) + sizeof(struct tcphdr); MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN); pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT); if (pkt == NULL) return (NULL); ipv6 = inp->inp_vflag & INP_IPV6; len = 0; if (EVL_VLANOFTAG(vtag) == 0xfff) { struct ether_header *eh = (void *)pkt; if (ipv6) eh->ether_type = htons(ETHERTYPE_IPV6); else eh->ether_type = htons(ETHERTYPE_IP); len += sizeof(*eh); } else { struct ether_vlan_header *evh = (void *)pkt; evh->evl_encap_proto = htons(ETHERTYPE_VLAN); evh->evl_tag = htons(vtag); if (ipv6) evh->evl_proto = htons(ETHERTYPE_IPV6); else evh->evl_proto = htons(ETHERTYPE_IP); len += sizeof(*evh); } if (ipv6) { struct ip6_hdr *ip6 = (void *)&pkt[len]; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_nxt = IPPROTO_TCP; if (open_type == OPEN_TYPE_ACTIVE) { ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; } else if (open_type == OPEN_TYPE_LISTEN) { ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = ip6->ip6_src; } len += sizeof(*ip6); } else { struct ip *ip = (void *)&pkt[len]; ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = inp->inp_ip_tos; ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr)); ip->ip_ttl = inp->inp_ip_ttl; ip->ip_p = IPPROTO_TCP; if (open_type == OPEN_TYPE_ACTIVE) { ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } else if (open_type == OPEN_TYPE_LISTEN) { ip->ip_src = inp->inp_laddr; ip->ip_dst = ip->ip_src; } len += sizeof(*ip); } th = (void *)&pkt[len]; if (open_type == OPEN_TYPE_ACTIVE) { th->th_sport = inp->inp_lport; /* network byte order already */ th->th_dport = inp->inp_fport; /* ditto */ } else if (open_type == OPEN_TYPE_LISTEN) { th->th_sport = inp->inp_lport; /* network byte order already */ th->th_dport = th->th_sport; } len += sizeof(th); *pktlen = *buflen = len; return (pkt); } const struct offload_settings * lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m, uint16_t vtag, struct inpcb *inp) { const struct t4_offload_policy *op; char *pkt; struct offload_rule *r; int i, matched, pktlen, buflen; static const struct offload_settings allow_offloading_settings = { .offload = 1, .rx_coalesce = -1, .cong_algo = -1, .sched_class = -1, .tstamp = -1, .sack = -1, .nagle = -1, .ecn = -1, .ddp = -1, .tls = -1, .txq = QUEUE_RANDOM, .rxq = QUEUE_RANDOM, .mss = -1, }; static const struct offload_settings disallow_offloading_settings = { .offload = 0, /* rest is irrelevant when offload is off. */ }; rw_assert(&sc->policy_lock, RA_LOCKED); /* * If there's no Connection Offloading Policy attached to the device * then we need to return a default static policy. If * "cop_managed_offloading" is true, then we need to disallow * offloading until a COP is attached to the device. Otherwise we * allow offloading ... */ op = sc->policy; if (op == NULL) { if (sc->tt.cop_managed_offloading) return (&disallow_offloading_settings); else return (&allow_offloading_settings); } switch (open_type) { case OPEN_TYPE_ACTIVE: case OPEN_TYPE_LISTEN: pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen); break; case OPEN_TYPE_PASSIVE: MPASS(m != NULL); pkt = mtod(m, char *); MPASS(*pkt == CPL_PASS_ACCEPT_REQ); pkt += sizeof(struct cpl_pass_accept_req); pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req); buflen = m->m_len - sizeof(struct cpl_pass_accept_req); break; default: MPASS(0); return (&disallow_offloading_settings); } if (pkt == NULL || pktlen == 0 || buflen == 0) return (&disallow_offloading_settings); matched = 0; r = &op->rule[0]; for (i = 0; i < op->nrules; i++, r++) { if (r->open_type != open_type && r->open_type != OPEN_TYPE_DONTCARE) { continue; } matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen); if (matched) break; } if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN) free(pkt, M_CXGBE); return (matched ? &r->settings : &disallow_offloading_settings); } static void reclaim_wr_resources(void *arg, int count) { struct tom_data *td = arg; STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); struct cpl_act_open_req *cpl; u_int opcode, atid, tid; struct wrqe *wr; struct adapter *sc = td_adapter(td); mtx_lock(&td->unsent_wr_lock); STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); mtx_unlock(&td->unsent_wr_lock); while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { STAILQ_REMOVE_HEAD(&twr_list, link); cpl = wrtod(wr); opcode = GET_OPCODE(cpl); switch (opcode) { case CPL_ACT_OPEN_REQ: case CPL_ACT_OPEN_REQ6: atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); act_open_failure_cleanup(sc, atid, EHOSTUNREACH); free(wr, M_CXGBE); break; case CPL_PASS_ACCEPT_RPL: tid = GET_TID(cpl); CTR2(KTR_CXGBE, "%s: tid %u ", __func__, tid); synack_failure_cleanup(sc, tid); free(wr, M_CXGBE); break; default: log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " "opcode %x\n", __func__, wr, wr->wr_len, opcode); /* WR not freed here; go look at it with a debugger. */ } } } /* * Ground control to Major TOM * Commencing countdown, engines on */ static int t4_tom_activate(struct adapter *sc) { struct tom_data *td; struct toedev *tod; struct vi_info *vi; int i, rc, v; ASSERT_SYNCHRONIZED_OP(sc); /* per-adapter softc for TOM */ td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); if (td == NULL) return (ENOMEM); /* List of TOE PCBs and associated lock */ mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); TAILQ_INIT(&td->toep_list); /* Listen context */ mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, &td->listen_mask, HASH_NOWAIT); /* List of WRs for which L2 resolution failed */ mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); STAILQ_INIT(&td->unsent_wr_list); TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); /* TID tables */ rc = alloc_tid_tabs(&sc->tids); if (rc != 0) goto done; rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp, t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods"); if (rc != 0) goto done; t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK, V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask); alloc_tcb_history(sc, td); /* toedev ops */ tod = &td->tod; init_toedev(tod); tod->tod_softc = sc; tod->tod_connect = t4_connect; tod->tod_listen_start = t4_listen_start; tod->tod_listen_stop = t4_listen_stop; tod->tod_rcvd = t4_rcvd; tod->tod_output = t4_tod_output; tod->tod_send_rst = t4_send_rst; tod->tod_send_fin = t4_send_fin; tod->tod_pcb_detach = t4_pcb_detach; tod->tod_l2_update = t4_l2_update; tod->tod_syncache_added = t4_syncache_added; tod->tod_syncache_removed = t4_syncache_removed; tod->tod_syncache_respond = t4_syncache_respond; tod->tod_offload_socket = t4_offload_socket; tod->tod_ctloutput = t4_ctloutput; tod->tod_tcp_info = t4_tcp_info; #ifdef KERN_TLS tod->tod_alloc_tls_session = t4_alloc_tls_session; #endif tod->tod_pmtu_update = t4_pmtu_update; for_each_port(sc, i) { for_each_vi(sc->port[i], v, vi) { SETTOEDEV(vi->ifp, &td->tod); } } sc->tom_softc = td; register_toedev(sc->tom_softc); done: if (rc != 0) free_tom_data(sc, td); return (rc); } static int t4_tom_deactivate(struct adapter *sc) { int rc = 0; struct tom_data *td = sc->tom_softc; ASSERT_SYNCHRONIZED_OP(sc); if (td == NULL) return (0); /* XXX. KASSERT? */ if (sc->offload_map != 0) return (EBUSY); /* at least one port has IFCAP_TOE enabled */ if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ mtx_lock(&td->toep_list_lock); if (!TAILQ_EMPTY(&td->toep_list)) rc = EBUSY; mtx_unlock(&td->toep_list_lock); mtx_lock(&td->lctx_hash_lock); if (td->lctx_count > 0) rc = EBUSY; mtx_unlock(&td->lctx_hash_lock); taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); mtx_lock(&td->unsent_wr_lock); if (!STAILQ_EMPTY(&td->unsent_wr_list)) rc = EBUSY; mtx_unlock(&td->unsent_wr_lock); if (rc == 0) { unregister_toedev(sc->tom_softc); free_tom_data(sc, td); sc->tom_softc = NULL; } return (rc); } static int t4_aio_queue_tom(struct socket *so, struct kaiocb *job) { struct tcpcb *tp = sototcpcb(so); struct toepcb *toep = tp->t_toe; int error; /* * No lock is needed as TOE sockets never change between * active and passive. */ if (SOLISTENING(so)) return (EINVAL); if (ulp_mode(toep) == ULP_MODE_TCPDDP) { error = t4_aio_queue_ddp(so, job); if (error != EOPNOTSUPP) return (error); } return (t4_aio_queue_aiotx(so, job)); } static int t4_tom_mod_load(void) { /* CPL handlers */ t4_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2, CPL_COOKIE_TOM); t4_init_connect_cpl_handlers(); t4_init_listen_cpl_handlers(); t4_init_cpl_io_handlers(); t4_ddp_mod_load(); t4_tls_mod_load(); bcopy(&tcp_protosw, &toe_protosw, sizeof(toe_protosw)); toe_protosw.pr_aio_queue = t4_aio_queue_tom; bcopy(&tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw)); toe6_protosw.pr_aio_queue = t4_aio_queue_tom; return (t4_register_uld(&tom_uld_info)); } static void tom_uninit(struct adapter *sc, void *arg __unused) { if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) return; /* Try to free resources (works only if no port has IFCAP_TOE) */ if (uld_active(sc, ULD_TOM)) t4_deactivate_uld(sc, ULD_TOM); end_synchronized_op(sc, 0); } static int t4_tom_mod_unload(void) { t4_iterate(tom_uninit, NULL); if (t4_unregister_uld(&tom_uld_info) == EBUSY) return (EBUSY); t4_tls_mod_unload(); t4_ddp_mod_unload(); t4_uninit_connect_cpl_handlers(); t4_uninit_listen_cpl_handlers(); t4_uninit_cpl_io_handlers(); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, NULL, CPL_COOKIE_TOM); t4_register_cpl_handler(CPL_GET_TCB_RPL, NULL); return (0); } #endif /* TCP_OFFLOAD */ static int t4_tom_modevent(module_t mod, int cmd, void *arg) { int rc = 0; #ifdef TCP_OFFLOAD switch (cmd) { case MOD_LOAD: rc = t4_tom_mod_load(); break; case MOD_UNLOAD: rc = t4_tom_mod_unload(); break; default: rc = EINVAL; } #else printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); rc = EOPNOTSUPP; #endif return (rc); } static moduledata_t t4_tom_moddata= { "t4_tom", t4_tom_modevent, 0 }; MODULE_VERSION(t4_tom, 1); MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c index 2fa016c8f35c..7bc218321f01 100644 --- a/sys/net/if_epair.c +++ b/sys/net/if_epair.c @@ -1,942 +1,942 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2008 The FreeBSD Foundation * Copyright (c) 2009-2021 Bjoern A. Zeeb * * This software was developed by CK Software GmbH under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * A pair of virtual back-to-back connected ethernet like interfaces * (``two interfaces with a virtual cross-over cable''). * * This is mostly intended to be used to provide connectivity between * different virtual network stack instances. */ #include #include "opt_rss.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #ifdef INET #include #endif #ifdef INET6 #include #endif #endif #include static const char epairname[] = "epair"; #define RXRSIZE 4096 /* Probably overkill by 4-8x. */ static MALLOC_DEFINE(M_EPAIR, epairname, "Pair of virtual cross-over connected Ethernet-like interfaces"); VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); #define V_epair_cloner VNET(epair_cloner) static unsigned int next_index = 0; #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \ NULL, MTX_DEF) #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx) #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) struct epair_softc; struct epair_queue { struct mtx mtx; struct mbufq q; int id; enum { EPAIR_QUEUE_IDLE, EPAIR_QUEUE_WAKING, EPAIR_QUEUE_RUNNING, } state; struct task tx_task; struct epair_softc *sc; }; static struct mtx epair_n_index_mtx; struct epair_softc { struct ifnet *ifp; /* This ifp. */ struct ifnet *oifp; /* other ifp of pair. */ int num_queues; struct epair_queue *queues; struct ifmedia media; /* Media config (fake). */ STAILQ_ENTRY(epair_softc) entry; }; struct epair_tasks_t { int tasks; struct taskqueue *tq[MAXCPU]; }; static struct epair_tasks_t epair_tasks; static void epair_clear_mbuf(struct mbuf *m) { M_ASSERTPKTHDR(m); /* Remove any CSUM_SND_TAG as ether_input will barf. */ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { m_snd_tag_rele(m->m_pkthdr.snd_tag); m->m_pkthdr.snd_tag = NULL; m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; } /* Clear vlan information. */ m->m_flags &= ~M_VLANTAG; m->m_pkthdr.ether_vtag = 0; m_tag_delete_nonpersistent(m); } static void epair_tx_start_deferred(void *arg, int pending) { struct epair_queue *q = (struct epair_queue *)arg; if_t ifp; struct mbuf *m, *n; bool resched; ifp = q->sc->ifp; if_ref(ifp); CURVNET_SET(ifp->if_vnet); mtx_lock(&q->mtx); m = mbufq_flush(&q->q); q->state = EPAIR_QUEUE_RUNNING; mtx_unlock(&q->mtx); while (m != NULL) { n = STAILQ_NEXT(m, m_stailqpkt); m->m_nextpkt = NULL; if_input(ifp, m); m = n; } /* * Avoid flushing the queue more than once per task. We can otherwise * end up starving ourselves in a multi-epair routing configuration. */ mtx_lock(&q->mtx); - if (mbufq_len(&q->q) > 0) { + if (!mbufq_empty(&q->q)) { resched = true; q->state = EPAIR_QUEUE_WAKING; } else { resched = false; q->state = EPAIR_QUEUE_IDLE; } mtx_unlock(&q->mtx); if (resched) taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); CURVNET_RESTORE(); if_rele(ifp); } static struct epair_queue * epair_select_queue(struct epair_softc *sc, struct mbuf *m) { uint32_t bucket; #ifdef RSS struct ether_header *eh; int ret; ret = rss_m2bucket(m, &bucket); if (ret) { /* Actually hash the packet. */ eh = mtod(m, struct ether_header *); switch (ntohs(eh->ether_type)) { #ifdef INET case ETHERTYPE_IP: rss_soft_m2cpuid_v4(m, 0, &bucket); break; #endif #ifdef INET6 case ETHERTYPE_IPV6: rss_soft_m2cpuid_v6(m, 0, &bucket); break; #endif default: bucket = 0; break; } } bucket %= sc->num_queues; #else bucket = 0; #endif return (&sc->queues[bucket]); } static void epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp) { M_ASSERTPKTHDR(m); epair_clear_mbuf(m); if_setrcvif(m, src_ifp); M_SETFIB(m, src_ifp->if_fib); MPASS(m->m_nextpkt == NULL); MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); } static void epair_menq(struct mbuf *m, struct epair_softc *osc) { struct epair_queue *q; struct ifnet *ifp, *oifp; int error, len; bool mcast; /* * I know this looks weird. We pass the "other sc" as we need that one * and can get both ifps from it as well. */ oifp = osc->ifp; ifp = osc->oifp; epair_prepare_mbuf(m, oifp); /* Save values as once the mbuf is queued, it's not ours anymore. */ len = m->m_pkthdr.len; mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; q = epair_select_queue(osc, m); mtx_lock(&q->mtx); if (q->state == EPAIR_QUEUE_IDLE) { q->state = EPAIR_QUEUE_WAKING; taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); } error = mbufq_enqueue(&q->q, m); mtx_unlock(&q->mtx); if (error != 0) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); } else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mcast) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); } } static void epair_start(struct ifnet *ifp) { struct mbuf *m; struct epair_softc *sc; struct ifnet *oifp; /* * We get packets here from ether_output via if_handoff() * and need to put them into the input queue of the oifp * and will put the packet into the receive-queue (rxq) of the * other interface (oifp) of our pair. */ sc = ifp->if_softc; oifp = sc->oifp; sc = oifp->if_softc; for (;;) { IFQ_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; M_ASSERTPKTHDR(m); BPF_MTAP(ifp, m); /* In case either interface is not usable drop the packet. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_UP) == 0 || (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (oifp->if_flags & IFF_UP) == 0) { m_freem(m); continue; } epair_menq(m, sc); } } static int epair_transmit(struct ifnet *ifp, struct mbuf *m) { struct epair_softc *sc; struct ifnet *oifp; #ifdef ALTQ int len; bool mcast; #endif if (m == NULL) return (0); M_ASSERTPKTHDR(m); /* * We could just transmit this, but it makes testing easier if we're a * little bit more like real hardware. * Allow just that little bit extra for ethernet (and vlan) headers. */ if (m->m_pkthdr.len > (ifp->if_mtu + sizeof(struct ether_vlan_header))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (E2BIG); } /* * We are not going to use the interface en/dequeue mechanism * on the TX side. We are called from ether_output_frame() * and will put the packet into the receive-queue (rxq) of the * other interface (oifp) of our pair. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENXIO); } if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENETDOWN); } BPF_MTAP(ifp, m); /* * In case the outgoing interface is not usable, * drop the packet. */ sc = ifp->if_softc; oifp = sc->oifp; if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (oifp->if_flags & IFF_UP) == 0) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (0); } #ifdef ALTQ len = m->m_pkthdr.len; mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; int error = 0; /* Support ALTQ via the classic if_start() path. */ IF_LOCK(&ifp->if_snd); if (ALTQ_IS_ENABLED(&ifp->if_snd)) { ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); if (error) if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); IF_UNLOCK(&ifp->if_snd); if (!error) { if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mcast) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); epair_start(ifp); } return (error); } IF_UNLOCK(&ifp->if_snd); #endif epair_menq(m, oifp->if_softc); return (0); } static void epair_qflush(struct ifnet *ifp __unused) { } static int epair_media_change(struct ifnet *ifp __unused) { /* Do nothing. */ return (0); } static void epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) { imr->ifm_status = IFM_AVALID | IFM_ACTIVE; imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; } static int epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct epair_softc *sc; struct ifreq *ifr; int error; ifr = (struct ifreq *)data; switch (cmd) { case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: sc = ifp->if_softc; error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); break; case SIOCSIFMTU: /* We basically allow all kinds of MTUs. */ ifp->if_mtu = ifr->ifr_mtu; error = 0; break; default: /* Let the common ethernet handler process this. */ error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void epair_init(void *dummy __unused) { } /* * Interface cloning functions. * We use our private ones so that we can create/destroy our secondary * device along with the primary one. */ static int epair_clone_match(struct if_clone *ifc, const char *name) { const char *cp; /* * Our base name is epair. * Our interfaces will be named epair[ab]. * So accept anything of the following list: * - epair * - epair * but not the epair[ab] versions. */ if (strncmp(epairname, name, sizeof(epairname)-1) != 0) return (0); for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static void epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) { struct ifnet *ifp; uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ ifp = scb->ifp; /* Copy epairNa etheraddr and change the last byte. */ memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); eaddr[5] = 0x0b; ether_ifattach(ifp, eaddr); if_clone_addif(ifc, ifp); } static struct epair_softc * epair_alloc_sc(struct if_clone *ifc) { struct epair_softc *sc; struct ifnet *ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (NULL); sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); sc->ifp = ifp; sc->num_queues = epair_tasks.tasks; sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue), M_EPAIR, M_WAITOK); for (int i = 0; i < sc->num_queues; i++) { struct epair_queue *q = &sc->queues[i]; q->id = i; q->state = EPAIR_QUEUE_IDLE; mtx_init(&q->mtx, "epairq", NULL, MTX_DEF | MTX_NEW); mbufq_init(&q->q, RXRSIZE); q->sc = sc; NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); } /* Initialise pseudo media types. */ ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status); ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T); return (sc); } static void epair_setup_ifp(struct epair_softc *sc, char *name, int unit) { struct ifnet *ifp = sc->ifp; ifp->if_softc = sc; strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = epairname; ifp->if_dunit = unit; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities = IFCAP_VLAN_MTU; ifp->if_capenable = IFCAP_VLAN_MTU; ifp->if_transmit = epair_transmit; ifp->if_qflush = epair_qflush; ifp->if_start = epair_start; ifp->if_ioctl = epair_ioctl; ifp->if_init = epair_init; if_setsendqlen(ifp, ifqmaxlen); if_setsendqready(ifp); ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ } static void epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr) { uint32_t key[3]; uint32_t hash; uint64_t hostid; EPAIR_LOCK(); #ifdef SMP /* Get an approximate distribution. */ hash = next_index % mp_ncpus; #else hash = 0; #endif EPAIR_UNLOCK(); /* * Calculate the etheraddr hashing the hostid and the * interface index. The result would be hopefully unique. * Note that the "a" component of an epair instance may get moved * to a different VNET after creation. In that case its index * will be freed and the index can get reused by new epair instance. * Make sure we do not create same etheraddr again. */ getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); if (hostid == 0) arc4rand(&hostid, sizeof(hostid), 0); struct ifnet *ifp = sc->ifp; EPAIR_LOCK(); if (ifp->if_index > next_index) next_index = ifp->if_index; else next_index++; key[0] = (uint32_t)next_index; EPAIR_UNLOCK(); key[1] = (uint32_t)(hostid & 0xffffffff); key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); hash = jenkins_hash32(key, 3, 0); eaddr[0] = 0x02; memcpy(&eaddr[1], &hash, 4); eaddr[5] = 0x0a; } static void epair_free_sc(struct epair_softc *sc) { if (sc == NULL) return; if_free(sc->ifp); ifmedia_removeall(&sc->media); for (int i = 0; i < sc->num_queues; i++) { struct epair_queue *q = &sc->queues[i]; mtx_destroy(&q->mtx); } free(sc->queues, M_EPAIR); free(sc, M_EPAIR); } static void epair_set_state(struct ifnet *ifp, bool running) { if (running) { ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } } static int epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) { int error = 0, unit, wildcard; char *dp; /* Try to see if a special unit was requested. */ error = ifc_name2unit(name, &unit); if (error != 0) return (error); wildcard = (unit < 0); error = ifc_alloc_unit(ifc, &unit); if (error != 0) return (error); /* * If no unit had been given, we need to adjust the ifName. * Also make sure there is space for our extra [ab] suffix. */ for (dp = name; *dp != '\0'; dp++); if (wildcard) { int slen = snprintf(dp, len - (dp - name), "%d", unit); if (slen > len - (dp - name) - 1) { /* ifName too long. */ error = ENOSPC; goto done; } dp += slen; } if (len - (dp - name) - 1 < 1) { /* No space left for our [ab] suffix. */ error = ENOSPC; goto done; } *dp = 'b'; /* Must not change dp so we can replace 'a' by 'b' later. */ *(dp+1) = '\0'; /* Check if 'a' and 'b' interfaces already exist. */ if (ifunit(name) != NULL) { error = EEXIST; goto done; } *dp = 'a'; if (ifunit(name) != NULL) { error = EEXIST; goto done; } *punit = unit; done: if (error != 0) ifc_free_unit(ifc, unit); return (error); } static int epair_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) { struct epair_softc *sca, *scb; struct ifnet *ifp; char *dp; int error, unit; uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ error = epair_handle_unit(ifc, name, len, &unit); if (error != 0) return (error); /* Allocate memory for both [ab] interfaces */ sca = epair_alloc_sc(ifc); scb = epair_alloc_sc(ifc); if (sca == NULL || scb == NULL) { epair_free_sc(sca); epair_free_sc(scb); ifc_free_unit(ifc, unit); return (ENOSPC); } /* * Cross-reference the interfaces so we will be able to free both. */ sca->oifp = scb->ifp; scb->oifp = sca->ifp; /* Finish initialization of interface a. */ ifp = sca->ifp; epair_setup_ifp(sca, name, unit); epair_generate_mac(sca, eaddr); ether_ifattach(ifp, eaddr); /* Swap the name and finish initialization of interface b. */ dp = name + strlen(name) - 1; *dp = 'b'; epair_setup_ifp(scb, name, unit); ifp = scb->ifp; /* We need to play some tricks here for the second interface. */ strlcpy(name, epairname, len); /* Correctly set the name for the cloner list. */ strlcpy(name, scb->ifp->if_xname, len); epair_clone_add(ifc, scb); /* * Restore name to a as the ifp for this will go into the * cloner list for the initial call. */ strlcpy(name, sca->ifp->if_xname, len); /* Tell the world, that we are ready to rock. */ epair_set_state(sca->ifp, true); epair_set_state(scb->ifp, true); *ifpp = sca->ifp; return (0); } static void epair_drain_rings(struct epair_softc *sc) { for (int i = 0; i < sc->num_queues; i++) { struct epair_queue *q; struct mbuf *m, *n; q = &sc->queues[i]; mtx_lock(&q->mtx); m = mbufq_flush(&q->q); mtx_unlock(&q->mtx); for (; m != NULL; m = n) { n = m->m_nextpkt; m_freem(m); } } } static int epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { struct ifnet *oifp; struct epair_softc *sca, *scb; int unit, error; /* * In case we called into if_clone_destroyif() ourselves * again to remove the second interface, the softc will be * NULL. In that case so not do anything but return success. */ if (ifp->if_softc == NULL) return (0); unit = ifp->if_dunit; sca = ifp->if_softc; oifp = sca->oifp; scb = oifp->if_softc; /* Frist get the interfaces down and detached. */ epair_set_state(ifp, false); epair_set_state(oifp, false); ether_ifdetach(ifp); ether_ifdetach(oifp); /* Third free any queued packets and all the resources. */ CURVNET_SET_QUIET(oifp->if_vnet); epair_drain_rings(scb); oifp->if_softc = NULL; error = if_clone_destroyif(ifc, oifp); if (error) panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", __func__, error); epair_free_sc(scb); CURVNET_RESTORE(); epair_drain_rings(sca); epair_free_sc(sca); /* Last free the cloner unit. */ ifc_free_unit(ifc, unit); return (0); } static void vnet_epair_init(const void *unused __unused) { struct if_clone_addreq req = { .match_f = epair_clone_match, .create_f = epair_clone_create, .destroy_f = epair_clone_destroy, }; V_epair_cloner = ifc_attach_cloner(epairname, &req); } VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_epair_init, NULL); static void vnet_epair_uninit(const void *unused __unused) { ifc_detach_cloner(V_epair_cloner); } VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_epair_uninit, NULL); static int epair_mod_init(void) { char name[32]; epair_tasks.tasks = 0; #ifdef RSS int cpu; CPU_FOREACH(cpu) { cpuset_t cpu_mask; /* Pin to this CPU so we get appropriate NUMA allocations. */ thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); snprintf(name, sizeof(name), "epair_task_%d", cpu); epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK, taskqueue_thread_enqueue, &epair_tasks.tq[cpu]); CPU_SETOF(cpu, &cpu_mask); taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET, &cpu_mask, "%s", name); epair_tasks.tasks++; } thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #else snprintf(name, sizeof(name), "epair_task"); epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK, taskqueue_thread_enqueue, &epair_tasks.tq[0]); taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name); epair_tasks.tasks = 1; #endif return (0); } static void epair_mod_cleanup(void) { for (int i = 0; i < epair_tasks.tasks; i++) { taskqueue_drain_all(epair_tasks.tq[i]); taskqueue_free(epair_tasks.tq[i]); } } static int epair_modevent(module_t mod, int type, void *data) { int ret; switch (type) { case MOD_LOAD: EPAIR_LOCK_INIT(); ret = epair_mod_init(); if (ret != 0) return (ret); if (bootverbose) printf("%s: %s initialized.\n", __func__, epairname); break; case MOD_UNLOAD: epair_mod_cleanup(); EPAIR_LOCK_DESTROY(); if (bootverbose) printf("%s: %s unloaded.\n", __func__, epairname); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t epair_mod = { "if_epair", epair_modevent, 0 }; DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); MODULE_VERSION(if_epair, 3); diff --git a/sys/net80211/ieee80211_ddb.c b/sys/net80211/ieee80211_ddb.c index cd4a4f191745..0042d5d4aeb6 100644 --- a/sys/net80211/ieee80211_ddb.c +++ b/sys/net80211/ieee80211_ddb.c @@ -1,1040 +1,1040 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2007-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "opt_ddb.h" #include "opt_wlan.h" #ifdef DDB /* * IEEE 802.11 DDB support */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_TDMA #include #endif #ifdef IEEE80211_SUPPORT_MESH #include #endif #include #include #define DB_PRINTSYM(prefix, name, addr) do { \ db_printf("%s%-25s : ", prefix, name); \ db_printsym((db_addr_t) addr, DB_STGY_ANY); \ db_printf("\n"); \ } while (0) static void _db_show_sta(const struct ieee80211_node *); static void _db_show_vap(const struct ieee80211vap *, int, int); static void _db_show_com(const struct ieee80211com *, int showvaps, int showsta, int showmesh, int showprocs, int); static void _db_show_all_vaps(void *, struct ieee80211com *); static void _db_show_node_table(const char *tag, const struct ieee80211_node_table *); static void _db_show_channel(const char *tag, const struct ieee80211_channel *); static void _db_show_ssid(const char *tag, int ix, int len, const uint8_t *); static void _db_show_appie(const char *tag, const struct ieee80211_appie *); static void _db_show_key(const char *tag, int ix, const struct ieee80211_key *); static void _db_show_roamparams(const char *tag, const void *arg, const struct ieee80211_roamparam *rp); static void _db_show_txparams(const char *tag, const void *arg, const struct ieee80211_txparam *tp); static void _db_show_ageq(const char *tag, const struct ieee80211_ageq *q); static void _db_show_stats(const struct ieee80211_stats *); #ifdef IEEE80211_SUPPORT_MESH static void _db_show_mesh(const struct ieee80211_mesh_state *); #endif DB_SHOW_COMMAND(sta, db_show_sta) { if (!have_addr) { db_printf("usage: show sta \n"); return; } _db_show_sta((const struct ieee80211_node *) addr); } DB_SHOW_COMMAND(statab, db_show_statab) { if (!have_addr) { db_printf("usage: show statab \n"); return; } _db_show_node_table("", (const struct ieee80211_node_table *) addr); } DB_SHOW_COMMAND(vap, db_show_vap) { int i, showmesh = 0, showprocs = 0; if (!have_addr) { db_printf("usage: show vap \n"); return; } for (i = 0; modif[i] != '\0'; i++) switch (modif[i]) { case 'a': showprocs = 1; showmesh = 1; break; case 'm': showmesh = 1; break; case 'p': showprocs = 1; break; } _db_show_vap((const struct ieee80211vap *) addr, showmesh, showprocs); } DB_SHOW_COMMAND(com, db_show_com) { const struct ieee80211com *ic; int i, showprocs = 0, showvaps = 0, showsta = 0, showmesh = 0, showscan = 0; if (!have_addr) { db_printf("usage: show com \n"); return; } for (i = 0; modif[i] != '\0'; i++) switch (modif[i]) { case 'a': showsta = showmesh = showvaps = showprocs = showscan = 1; break; case 'S': showscan = 1; break; case 's': showsta = 1; break; case 'm': showmesh = 1; break; case 'v': showvaps = 1; break; case 'p': showprocs = 1; break; } ic = (const struct ieee80211com *) addr; _db_show_com(ic, showvaps, showsta, showmesh, showprocs, showscan); } DB_SHOW_ALL_COMMAND(vaps, db_show_all_vaps) { int i, showall = 0; for (i = 0; modif[i] != '\0'; i++) switch (modif[i]) { case 'a': showall = 1; break; } ieee80211_iterate_coms(_db_show_all_vaps, &showall); } #ifdef IEEE80211_SUPPORT_MESH DB_SHOW_ALL_COMMAND(mesh, db_show_mesh) { const struct ieee80211_mesh_state *ms; if (!have_addr) { db_printf("usage: show mesh \n"); return; } ms = (const struct ieee80211_mesh_state *) addr; _db_show_mesh(ms); } #endif /* IEEE80211_SUPPORT_MESH */ static void _db_show_txampdu(const char *sep, int ix, const struct ieee80211_tx_ampdu *tap) { db_printf("%stxampdu[%d]: %p flags %b %s\n", sep, ix, tap, tap->txa_flags, IEEE80211_AGGR_BITS, ieee80211_wme_acnames[TID_TO_WME_AC(tap->txa_tid)]); db_printf("%s token %u lastsample %d pkts %d avgpps %d qbytes %d qframes %d\n", sep, tap->txa_token, tap->txa_lastsample, tap->txa_pkts, tap->txa_avgpps, tap->txa_qbytes, tap->txa_qframes); db_printf("%s start %u seqpending %u wnd %u attempts %d nextrequest %d\n", sep, tap->txa_start, tap->txa_seqpending, tap->txa_wnd, tap->txa_attempts, tap->txa_nextrequest); /* XXX timer */ } static void _db_show_rxampdu(const char *sep, int ix, const struct ieee80211_rx_ampdu *rap) { struct mbuf *m; int i; db_printf("%srxampdu[%d]: %p flags 0x%x tid %u\n", sep, ix, rap, rap->rxa_flags, ix /*XXX */); db_printf("%s qbytes %d qframes %d seqstart %u start %u wnd %u\n", sep, rap->rxa_qbytes, rap->rxa_qframes, rap->rxa_seqstart, rap->rxa_start, rap->rxa_wnd); db_printf("%s age %d nframes %d\n", sep, rap->rxa_age, rap->rxa_nframes); for (i = 0; i < IEEE80211_AGGR_BAWMAX; i++) - if (mbufq_len(&rap->rxa_mq[i]) > 0) { + if (!mbufq_empty(&rap->rxa_mq[i])) { db_printf("%s m[%2u:%4u] ", sep, i, IEEE80211_SEQ_ADD(rap->rxa_start, i)); STAILQ_FOREACH(m, &rap->rxa_mq[i].mq_head, m_stailqpkt) { db_printf(" %p", m); } db_printf("\n"); } } static void _db_show_sta(const struct ieee80211_node *ni) { int i; db_printf("STA: %p: mac %s refcnt %d\n", ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)); db_printf("\tvap %p wdsvap %p ic %p table %p\n", ni->ni_vap, ni->ni_wdsvap, ni->ni_ic, ni->ni_table); db_printf("\tflags=%b\n", ni->ni_flags, IEEE80211_NODE_BITS); db_printf("\tauthmode %u ath_flags 0x%x ath_defkeyix %u\n", ni->ni_authmode, ni->ni_ath_flags, ni->ni_ath_defkeyix); db_printf("\tassocid 0x%x txpower %u vlan %u\n", ni->ni_associd, ni->ni_txpower, ni->ni_vlan); db_printf("\tjointime %d (%lu secs) challenge %p\n", ni->ni_jointime, (unsigned long)(time_uptime - ni->ni_jointime), ni->ni_challenge); db_printf("\ties: data %p len %d\n", ni->ni_ies.data, ni->ni_ies.len); db_printf("\t[wpa_ie %p rsn_ie %p wme_ie %p ath_ie %p\n", ni->ni_ies.wpa_ie, ni->ni_ies.rsn_ie, ni->ni_ies.wme_ie, ni->ni_ies.ath_ie); db_printf("\t htcap_ie %p htinfo_ie %p]\n", ni->ni_ies.htcap_ie, ni->ni_ies.htinfo_ie); db_printf("\t vhtcap_ie %p vhtopmode_ie %p vhtpwrenv_ie %p]\n", ni->ni_ies.vhtcap_ie, ni->ni_ies.vhtopmode_ie, ni->ni_ies.vhtpwrenv_ie); if (ni->ni_flags & IEEE80211_NODE_QOS) { for (i = 0; i < WME_NUM_TID; i++) { if (ni->ni_txseqs[i] || ni->ni_rxseqs[i]) db_printf("\t[%u] txseq %u rxseq %u fragno %u\n", i, ni->ni_txseqs[i], ni->ni_rxseqs[i] >> IEEE80211_SEQ_SEQ_SHIFT, ni->ni_rxseqs[i] & IEEE80211_SEQ_FRAG_MASK); } } db_printf("\ttxseq %u rxseq %u fragno %u rxfragstamp %u\n", ni->ni_txseqs[IEEE80211_NONQOS_TID], ni->ni_rxseqs[IEEE80211_NONQOS_TID] >> IEEE80211_SEQ_SEQ_SHIFT, ni->ni_rxseqs[IEEE80211_NONQOS_TID] & IEEE80211_SEQ_FRAG_MASK, ni->ni_rxfragstamp); db_printf("\trxfrag[0] %p rxfrag[1] %p rxfrag[2] %p\n", ni->ni_rxfrag[0], ni->ni_rxfrag[1], ni->ni_rxfrag[2]); _db_show_key("\tucastkey", 0, &ni->ni_ucastkey); db_printf("\tavgrssi 0x%x (rssi %d) noise %d\n", ni->ni_avgrssi, IEEE80211_RSSI_GET(ni->ni_avgrssi), ni->ni_noise); db_printf("\tintval %u capinfo %b\n", ni->ni_intval, ni->ni_capinfo, IEEE80211_CAPINFO_BITS); db_printf("\tbssid %s", ether_sprintf(ni->ni_bssid)); _db_show_ssid(" essid ", 0, ni->ni_esslen, ni->ni_essid); db_printf("\n"); _db_show_channel("\tchannel", ni->ni_chan); db_printf("\n"); db_printf("\terp %b dtim_period %u dtim_count %u\n", ni->ni_erp, IEEE80211_ERP_BITS, ni->ni_dtim_period, ni->ni_dtim_count); db_printf("\thtcap %b htparam 0x%x htctlchan %u ht2ndchan %u\n", ni->ni_htcap, IEEE80211_HTCAP_BITS, ni->ni_htparam, ni->ni_htctlchan, ni->ni_ht2ndchan); db_printf("\thtopmode 0x%x htstbc 0x%x chw %u\n", ni->ni_htopmode, ni->ni_htstbc, ni->ni_chw); /* XXX ampdu state */ for (i = 0; i < WME_NUM_TID; i++) if (ni->ni_tx_ampdu[i].txa_flags & IEEE80211_AGGR_SETUP) _db_show_txampdu("\t", i, &ni->ni_tx_ampdu[i]); for (i = 0; i < WME_NUM_TID; i++) if (ni->ni_rx_ampdu[i].rxa_flags) _db_show_rxampdu("\t", i, &ni->ni_rx_ampdu[i]); db_printf("\tinact %u inact_reload %u txrate %u\n", ni->ni_inact, ni->ni_inact_reload, ni->ni_txrate); #ifdef IEEE80211_SUPPORT_MESH _db_show_ssid("\tmeshid ", 0, ni->ni_meshidlen, ni->ni_meshid); db_printf(" mlstate %b mllid 0x%x mlpid 0x%x mlrcnt %u mltval %u\n", ni->ni_mlstate, IEEE80211_MESH_MLSTATE_BITS, ni->ni_mllid, ni->ni_mlpid, ni->ni_mlrcnt, ni->ni_mltval); #endif /* VHT state */ db_printf("\tvhtcap %b vht_basicmcs %#06x vht_pad2 %#06x\n", ni->ni_vhtcap, IEEE80211_VHTCAP_BITS, ni->ni_vht_basicmcs, ni->ni_vht_pad2); db_printf("\tvht_mcsinfo: { rx_mcs_map %#06x rx_highest %#06x " "tx_mcs_map %#06x tx_highest %#06x }\n", ni->ni_vht_mcsinfo.rx_mcs_map, ni->ni_vht_mcsinfo.rx_highest, ni->ni_vht_mcsinfo.tx_mcs_map, ni->ni_vht_mcsinfo.tx_highest); db_printf("\tvht_chan1/chan2 %u/%u vht_chanwidth %#04x\n", ni->ni_vht_chan1, ni->ni_vht_chan2, ni->ni_vht_chanwidth); db_printf("\tvht_pad1 %#04x vht_spare { %#x %#x %#x %#x %#x %#x %#x %#x }\n", ni->ni_vht_pad1, ni->ni_vht_spare[0], ni->ni_vht_spare[1], ni->ni_vht_spare[2], ni->ni_vht_spare[3], ni->ni_vht_spare[4], ni->ni_vht_spare[5], ni->ni_vht_spare[6], ni->ni_vht_spare[7]); db_printf("\tni_tx_superg[] = {"); for (i = 0; i < WME_NUM_TID; i++) db_printf(" %p%s", ni->ni_tx_superg[i], (i == 0) ? "" : ","); db_printf(" }\n"); db_printf("\tni_rctls = %p", ni->ni_rctls); db_printf("\tni_drv_data = %p", ni->ni_drv_data); db_printf("\n"); db_printf("\tni_spare[3] = { %#jx %#jx %#jx }", ni->ni_spare[0], ni->ni_spare[1], ni->ni_spare[2]); db_printf("\n"); #ifdef __notyet__ struct ieee80211_psq ni_psq; /* power save queue */ struct ieee80211_nodestats ni_stats; /* per-node statistics */ /* quiet time IE state for the given node */ uint32_t ni_quiet_ie_set; /* Quiet time IE was seen */ struct ieee80211_quiet_ie ni_quiet_ie; /* last seen quiet IE */ /* U-APSD */ uint8_t ni_uapsd; /* U-APSD per-node flags matching WMM STA QoS Info field */ #endif } #ifdef IEEE80211_SUPPORT_TDMA static void _db_show_tdma(const char *sep, const struct ieee80211_tdma_state *ts, int showprocs) { db_printf("%stdma %p:\n", sep, ts); db_printf("%s version %u slot %u bintval %u peer %p\n", sep, ts->tdma_version, ts->tdma_slot, ts->tdma_bintval, ts->tdma_peer); db_printf("%s slotlen %u slotcnt %u", sep, ts->tdma_slotlen, ts->tdma_slotcnt); db_printf(" inuse 0x%x active 0x%x count %d\n", ts->tdma_inuse[0], ts->tdma_active[0], ts->tdma_count); if (showprocs) { DB_PRINTSYM(sep, " tdma_newstate", ts->tdma_newstate); DB_PRINTSYM(sep, " tdma_recv_mgmt", ts->tdma_recv_mgmt); DB_PRINTSYM(sep, " tdma_opdetach", ts->tdma_opdetach); } } #endif /* IEEE80211_SUPPORT_TDMA */ static void _db_show_scan(const struct ieee80211_scan_state *ss, int showprocs) { int i; const struct ieee80211_scanner *ss_ops; db_printf("SCAN %p:", ss); db_printf(" vap %p ic %p", ss->ss_vap, ss->ss_ic); db_printf("\n"); db_printf("\tss_ops %p (%s) ss_priv %p", ss->ss_ops, ss->ss_ops->scan_name, ss->ss_priv); db_printf("\n"); if (showprocs) { ss_ops = ss->ss_ops; DB_PRINTSYM("\t", "scan_attach", ss_ops->scan_attach); DB_PRINTSYM("\t", "scan_detach", ss_ops->scan_detach); DB_PRINTSYM("\t", "scan_start", ss_ops->scan_start); DB_PRINTSYM("\t", "scan_restart", ss_ops->scan_restart); DB_PRINTSYM("\t", "scan_cancel", ss_ops->scan_cancel); DB_PRINTSYM("\t", "scan_end", ss_ops->scan_end); DB_PRINTSYM("\t", "scan_flush", ss_ops->scan_flush); DB_PRINTSYM("\t", "scan_pickchan", ss_ops->scan_pickchan); DB_PRINTSYM("\t", "scan_add", ss_ops->scan_add); DB_PRINTSYM("\t", "scan_age", ss_ops->scan_age); DB_PRINTSYM("\t", "scan_assoc_fail", ss_ops->scan_assoc_fail); DB_PRINTSYM("\t", "scan_assoc_success", ss_ops->scan_assoc_success); DB_PRINTSYM("\t", "scan_iterate", ss_ops->scan_iterate); DB_PRINTSYM("\t", "scan_spare0", ss_ops->scan_spare0); DB_PRINTSYM("\t", "scan_spare1", ss_ops->scan_spare1); DB_PRINTSYM("\t", "scan_spare2", ss_ops->scan_spare2); DB_PRINTSYM("\t", "scan_spare3", ss_ops->scan_spare3); } db_printf("\tss_flags %b", ss->ss_flags, IEEE80211_SS_FLAGS_BITS); db_printf("\n"); db_printf("\tss_nssid %u", ss->ss_nssid); for (i = 0; i < ss->ss_nssid && i < IEEE80211_SCAN_MAX_SSID; i++) _db_show_ssid(" ss_nssid[%d]", i, ss->ss_ssid[i].len, ss->ss_ssid[i].ssid); db_printf("\n"); db_printf("\tss_chans:\n"); for (i = 0; i < ss->ss_last && i < IEEE80211_SCAN_MAX; i++) { db_printf("\t%-3d", i); _db_show_channel(" ", ss->ss_chans[i]); db_printf("\n"); } db_printf("\tss_next %u ss_last %u ss_mindwell %lu ss_maxdwell %lu", ss->ss_next, ss->ss_last, ss->ss_mindwell, ss->ss_maxdwell); db_printf("\n"); } static void _db_show_rate(const struct ieee80211_ratectl *rate, const void *rs, const int showprocs) { db_printf("\tiv_rate %p", rate); db_printf(" iv_rs %p", rs); db_printf("\n"); if (showprocs) { db_printf("\t ir_name %s", rate->ir_name); db_printf("\n"); DB_PRINTSYM("\t ", "ir_attach", rate->ir_attach); DB_PRINTSYM("\t ", "ir_detach", rate->ir_detach); DB_PRINTSYM("\t ", "ir_init", rate->ir_init); DB_PRINTSYM("\t ", "ir_deinit", rate->ir_deinit); DB_PRINTSYM("\t ", "ir_node_init", rate->ir_node_init); DB_PRINTSYM("\t ", "ir_node_deinit", rate->ir_node_deinit); DB_PRINTSYM("\t ", "ir_rate", rate->ir_rate); DB_PRINTSYM("\t ", "ir_tx_complete", rate->ir_tx_complete); DB_PRINTSYM("\t ", "ir_tx_update", rate->ir_tx_update); DB_PRINTSYM("\t ", "ir_setinterval", rate->ir_setinterval); DB_PRINTSYM("\t ", "ir_node_stats", rate->ir_node_stats); } } static void _db_show_vap(const struct ieee80211vap *vap, int showmesh, int showprocs) { const struct ieee80211com *ic = vap->iv_ic; int i; db_printf("VAP %p:", vap); db_printf(" bss %p", vap->iv_bss); db_printf(" myaddr %s", ether_sprintf(vap->iv_myaddr)); db_printf("\n"); db_printf("\topmode %s", ieee80211_opmode_name[vap->iv_opmode]); #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) db_printf("(%p)", vap->iv_mesh); #endif db_printf(" state %s", ieee80211_state_name[vap->iv_state]); db_printf(" ifp %p(%s)", vap->iv_ifp, if_name(vap->iv_ifp)); db_printf("\n"); db_printf("\tic %p", vap->iv_ic); db_printf(" media %p", &vap->iv_media); db_printf(" bpf_if %p", vap->iv_rawbpf); db_printf(" mgtsend %p", &vap->iv_mgtsend); #if 0 struct sysctllog *iv_sysctl; /* dynamic sysctl context */ #endif db_printf("\n"); db_printf("\tdebug=%b\n", vap->iv_debug, IEEE80211_MSG_BITS); db_printf("\tflags=%b\n", vap->iv_flags, IEEE80211_F_BITS); db_printf("\tflags_ext=%b\n", vap->iv_flags_ext, IEEE80211_FEXT_BITS); db_printf("\tflags_ht=%b\n", vap->iv_flags_ht, IEEE80211_FHT_BITS); db_printf("\tflags_ven=%b\n", vap->iv_flags_ven, IEEE80211_FVEN_BITS); db_printf("\tcaps=%b\n", vap->iv_caps, IEEE80211_C_BITS); db_printf("\thtcaps=%b\n", vap->iv_htcaps, IEEE80211_C_HTCAP_BITS); db_printf("\tvhtcap=%b\n", vap->iv_vht_cap.vht_cap_info, IEEE80211_VHTCAP_BITS); _db_show_stats(&vap->iv_stats); db_printf("\tinact_init %d", vap->iv_inact_init); db_printf(" inact_auth %d", vap->iv_inact_auth); db_printf(" inact_run %d", vap->iv_inact_run); db_printf(" inact_probe %d", vap->iv_inact_probe); db_printf("\n"); db_printf("\tdes_nssid %d", vap->iv_des_nssid); if (vap->iv_des_nssid) _db_show_ssid(" des_ssid[%u] ", 0, vap->iv_des_ssid[0].len, vap->iv_des_ssid[0].ssid); db_printf(" des_bssid %s", ether_sprintf(vap->iv_des_bssid)); db_printf("\n"); db_printf("\tdes_mode %d", vap->iv_des_mode); _db_show_channel(" des_chan", vap->iv_des_chan); db_printf("\n"); #if 0 int iv_nicknamelen; /* XXX junk */ uint8_t iv_nickname[IEEE80211_NWID_LEN]; #endif db_printf("\tbgscanidle %u", vap->iv_bgscanidle); db_printf(" bgscanintvl %u", vap->iv_bgscanintvl); db_printf(" scanvalid %u", vap->iv_scanvalid); db_printf("\n"); db_printf("\tscanreq_duration %u", vap->iv_scanreq_duration); db_printf(" scanreq_mindwell %u", vap->iv_scanreq_mindwell); db_printf(" scanreq_maxdwell %u", vap->iv_scanreq_maxdwell); db_printf("\n"); db_printf("\tscanreq_flags 0x%x", vap->iv_scanreq_flags); db_printf(" scanreq_nssid %d", vap->iv_scanreq_nssid); for (i = 0; i < vap->iv_scanreq_nssid; i++) _db_show_ssid(" scanreq_ssid[%u]", i, vap->iv_scanreq_ssid[i].len, vap->iv_scanreq_ssid[i].ssid); db_printf(" roaming %d", vap->iv_roaming); db_printf("\n"); for (i = IEEE80211_MODE_11A; i < IEEE80211_MODE_MAX; i++) if (isset(ic->ic_modecaps, i)) { _db_show_roamparams("\troamparms[%s]", ieee80211_phymode_name[i], &vap->iv_roamparms[i]); db_printf("\n"); } db_printf("\tbmissthreshold %u", vap->iv_bmissthreshold); db_printf(" bmiss_max %u", vap->iv_bmiss_count); db_printf(" bmiss_max %d", vap->iv_bmiss_max); db_printf("\n"); db_printf("\tswbmiss_count %u", vap->iv_swbmiss_count); db_printf(" swbmiss_period %u", vap->iv_swbmiss_period); db_printf(" swbmiss %p", &vap->iv_swbmiss); db_printf("\n"); db_printf("\tampdu_rxmax %d", vap->iv_ampdu_rxmax); db_printf(" ampdu_density %d", vap->iv_ampdu_density); db_printf(" ampdu_limit %d", vap->iv_ampdu_limit); db_printf(" amsdu_limit %d", vap->iv_amsdu_limit); db_printf("\n"); db_printf("\tmax_aid %u", vap->iv_max_aid); db_printf(" aid_bitmap %p", vap->iv_aid_bitmap); db_printf("\n"); db_printf("\tsta_assoc %u", vap->iv_sta_assoc); db_printf(" ps_sta %u", vap->iv_ps_sta); db_printf(" ps_pending %u", vap->iv_ps_pending); db_printf(" tim_len %u", vap->iv_tim_len); db_printf(" tim_bitmap %p", vap->iv_tim_bitmap); db_printf("\n"); db_printf("\tdtim_period %u", vap->iv_dtim_period); db_printf(" dtim_count %u", vap->iv_dtim_count); db_printf(" set_tim %p", vap->iv_set_tim); db_printf(" csa_count %d", vap->iv_csa_count); db_printf("\n"); db_printf("\trtsthreshold %u", vap->iv_rtsthreshold); db_printf(" fragthreshold %u", vap->iv_fragthreshold); db_printf(" inact_timer %d", vap->iv_inact_timer); db_printf("\n"); for (i = IEEE80211_MODE_11A; i < IEEE80211_MODE_MAX; i++) if (isset(ic->ic_modecaps, i)) { _db_show_txparams("\ttxparms[%s]", ieee80211_phymode_name[i], &vap->iv_txparms[i]); db_printf("\n"); } /* application-specified IE's to attach to mgt frames */ _db_show_appie("\tappie_beacon", vap->iv_appie_beacon); _db_show_appie("\tappie_probereq", vap->iv_appie_probereq); _db_show_appie("\tappie_proberesp", vap->iv_appie_proberesp); _db_show_appie("\tappie_assocreq", vap->iv_appie_assocreq); _db_show_appie("\tappie_asscoresp", vap->iv_appie_assocresp); _db_show_appie("\tappie_wpa", vap->iv_appie_wpa); if (vap->iv_wpa_ie != NULL || vap->iv_rsn_ie != NULL) { if (vap->iv_wpa_ie != NULL) db_printf("\twpa_ie %p", vap->iv_wpa_ie); if (vap->iv_rsn_ie != NULL) db_printf("\trsn_ie %p", vap->iv_rsn_ie); db_printf("\n"); } db_printf("\tmax_keyix %u", vap->iv_max_keyix); db_printf(" def_txkey %d", vap->iv_def_txkey); db_printf("\n"); for (i = 0; i < IEEE80211_WEP_NKID; i++) _db_show_key("\tnw_keys[%u]", i, &vap->iv_nw_keys[i]); db_printf("\tauth %p(%s)", vap->iv_auth, vap->iv_auth->ia_name); db_printf(" ec %p", vap->iv_ec); db_printf(" acl %p", vap->iv_acl); db_printf(" as %p", vap->iv_as); db_printf("\n"); #ifdef IEEE80211_SUPPORT_MESH if (showmesh && vap->iv_mesh != NULL) _db_show_mesh(vap->iv_mesh); #endif #ifdef IEEE80211_SUPPORT_TDMA if (vap->iv_tdma != NULL) _db_show_tdma("\t", vap->iv_tdma, showprocs); #endif /* IEEE80211_SUPPORT_TDMA */ db_printf("\tsta_assoc %u", vap->iv_sta_assoc); db_printf(" ht_sta_assoc %u", vap->iv_ht_sta_assoc); db_printf(" ht40_sta_assoc %u", vap->iv_ht40_sta_assoc); db_printf("\n"); db_printf("\tnonerpsta %u", vap->iv_nonerpsta); db_printf(" longslotsta %u", vap->iv_longslotsta); db_printf(" lastnonerp %d", vap->iv_lastnonerp); db_printf(" lastnonht %d", vap->iv_lastnonht); db_printf("\n"); if (vap->iv_rate != NULL) _db_show_rate(vap->iv_rate, vap->iv_rs, showprocs); if (showprocs) { DB_PRINTSYM("\t", "iv_key_alloc", vap->iv_key_alloc); DB_PRINTSYM("\t", "iv_key_delete", vap->iv_key_delete); DB_PRINTSYM("\t", "iv_key_set", vap->iv_key_set); DB_PRINTSYM("\t", "iv_key_update_begin", vap->iv_key_update_begin); DB_PRINTSYM("\t", "iv_key_update_end", vap->iv_key_update_end); DB_PRINTSYM("\t", "iv_opdetach", vap->iv_opdetach); DB_PRINTSYM("\t", "iv_input", vap->iv_input); DB_PRINTSYM("\t", "iv_recv_mgmt", vap->iv_recv_mgmt); DB_PRINTSYM("\t", "iv_deliver_data", vap->iv_deliver_data); DB_PRINTSYM("\t", "iv_bmiss", vap->iv_bmiss); DB_PRINTSYM("\t", "iv_reset", vap->iv_reset); DB_PRINTSYM("\t", "iv_update_beacon", vap->iv_update_beacon); DB_PRINTSYM("\t", "iv_newstate", vap->iv_newstate); DB_PRINTSYM("\t", "iv_output", vap->iv_output); } } static void _db_show_com(const struct ieee80211com *ic, int showvaps, int showsta, int showmesh, int showprocs, int showscan) { struct ieee80211vap *vap; db_printf("COM: %p:", ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) db_printf(" %s(%p)", if_name(vap->iv_ifp), vap); db_printf("\n"); db_printf("\tsoftc %p", ic->ic_softc); db_printf("\tname %s", ic->ic_name); db_printf(" comlock %p", &ic->ic_comlock); db_printf(" txlock %p", &ic->ic_txlock); db_printf(" fflock %p", &ic->ic_fflock); db_printf("\n"); db_printf("\theadroom %d", ic->ic_headroom); db_printf(" phytype %d", ic->ic_phytype); db_printf(" opmode %s", ieee80211_opmode_name[ic->ic_opmode]); db_printf("\n"); db_printf("\tinact %p", &ic->ic_inact); db_printf("\n"); db_printf("\tflags=%b\n", ic->ic_flags, IEEE80211_F_BITS); db_printf("\tflags_ext=%b\n", ic->ic_flags_ext, IEEE80211_FEXT_BITS); db_printf("\tflags_ht=%b\n", ic->ic_flags_ht, IEEE80211_FHT_BITS); db_printf("\tflags_ven=%b\n", ic->ic_flags_ven, IEEE80211_FVEN_BITS); db_printf("\tcaps=%b\n", ic->ic_caps, IEEE80211_C_BITS); db_printf("\tcryptocaps=%b\n", ic->ic_cryptocaps, IEEE80211_CRYPTO_BITS); db_printf("\thtcaps=%b\n", ic->ic_htcaps, IEEE80211_HTCAP_BITS); db_printf("\tvhtcaps=%b\n", ic->ic_vht_cap.vht_cap_info, IEEE80211_VHTCAP_BITS); #if 0 uint8_t ic_modecaps[2]; /* set of mode capabilities */ #endif db_printf("\tcurmode %u", ic->ic_curmode); db_printf(" promisc %u", ic->ic_promisc); db_printf(" allmulti %u", ic->ic_allmulti); db_printf(" nrunning %u", ic->ic_nrunning); db_printf("\n"); db_printf("\tbintval %u", ic->ic_bintval); db_printf(" lintval %u", ic->ic_lintval); db_printf(" holdover %u", ic->ic_holdover); db_printf(" txpowlimit %u", ic->ic_txpowlimit); db_printf("\n"); #if 0 struct ieee80211_rateset ic_sup_rates[IEEE80211_MODE_MAX]; #endif /* * Channel state: * * ic_channels is the set of available channels for the device; * it is setup by the driver * ic_nchans is the number of valid entries in ic_channels * ic_chan_avail is a bit vector of these channels used to check * whether a channel is available w/o searching the channel table. * ic_chan_active is a (potentially) constrained subset of * ic_chan_avail that reflects any mode setting or user-specified * limit on the set of channels to use/scan * ic_curchan is the current channel the device is set to; it may * be different from ic_bsschan when we are off-channel scanning * or otherwise doing background work * ic_bsschan is the channel selected for operation; it may * be undefined (IEEE80211_CHAN_ANYC) * ic_prevchan is a cached ``previous channel'' used to optimize * lookups when switching back+forth between two channels * (e.g. for dynamic turbo) */ db_printf("\tnchans %d", ic->ic_nchans); #if 0 struct ieee80211_channel ic_channels[IEEE80211_CHAN_MAX]; uint8_t ic_chan_avail[IEEE80211_CHAN_BYTES]; uint8_t ic_chan_active[IEEE80211_CHAN_BYTES]; uint8_t ic_chan_scan[IEEE80211_CHAN_BYTES]; #endif db_printf("\n"); _db_show_channel("\tcurchan", ic->ic_curchan); db_printf("\n"); _db_show_channel("\tbsschan", ic->ic_bsschan); db_printf("\n"); _db_show_channel("\tprevchan", ic->ic_prevchan); db_printf("\n"); db_printf("\tregdomain %p", &ic->ic_regdomain); db_printf("\n"); _db_show_channel("\tcsa_newchan", ic->ic_csa_newchan); db_printf(" csa_count %d", ic->ic_csa_count); db_printf( "dfs %p", &ic->ic_dfs); db_printf("\n"); db_printf("\tscan %p", ic->ic_scan); db_printf(" lastdata %d", ic->ic_lastdata); db_printf(" lastscan %d", ic->ic_lastscan); db_printf("\n"); db_printf("\tmax_keyix %d", ic->ic_max_keyix); db_printf(" hash_key 0x%x", ic->ic_hash_key); db_printf(" wme %p", &ic->ic_wme); if (!showsta) db_printf(" sta %p", &ic->ic_sta); db_printf("\n"); db_printf("\tstageq@%p:\n", &ic->ic_stageq); _db_show_ageq("\t", &ic->ic_stageq); if (showsta) _db_show_node_table("\t", &ic->ic_sta); db_printf("\tprotmode %d", ic->ic_protmode); db_printf("\tcurhtprotmode 0x%x", ic->ic_curhtprotmode); db_printf(" htprotmode %d", ic->ic_htprotmode); db_printf("\n"); db_printf("\tsuperg %p\n", ic->ic_superg); db_printf("\tmontaps %d th %p txchan %p rh %p rxchan %p\n", ic->ic_montaps, ic->ic_th, ic->ic_txchan, ic->ic_rh, ic->ic_rxchan); if (showprocs) { DB_PRINTSYM("\t", "ic_vap_create", ic->ic_vap_create); DB_PRINTSYM("\t", "ic_vap_delete", ic->ic_vap_delete); #if 0 /* operating mode attachment */ ieee80211vap_attach ic_vattach[IEEE80211_OPMODE_MAX]; #endif DB_PRINTSYM("\t", "ic_newassoc", ic->ic_newassoc); DB_PRINTSYM("\t", "ic_getradiocaps", ic->ic_getradiocaps); DB_PRINTSYM("\t", "ic_setregdomain", ic->ic_setregdomain); DB_PRINTSYM("\t", "ic_send_mgmt", ic->ic_send_mgmt); DB_PRINTSYM("\t", "ic_raw_xmit", ic->ic_raw_xmit); DB_PRINTSYM("\t", "ic_updateslot", ic->ic_updateslot); DB_PRINTSYM("\t", "ic_update_mcast", ic->ic_update_mcast); DB_PRINTSYM("\t", "ic_update_promisc", ic->ic_update_promisc); DB_PRINTSYM("\t", "ic_node_alloc", ic->ic_node_alloc); DB_PRINTSYM("\t", "ic_node_free", ic->ic_node_free); DB_PRINTSYM("\t", "ic_node_cleanup", ic->ic_node_cleanup); DB_PRINTSYM("\t", "ic_node_getrssi", ic->ic_node_getrssi); DB_PRINTSYM("\t", "ic_node_getsignal", ic->ic_node_getsignal); DB_PRINTSYM("\t", "ic_node_getmimoinfo", ic->ic_node_getmimoinfo); DB_PRINTSYM("\t", "ic_scan_start", ic->ic_scan_start); DB_PRINTSYM("\t", "ic_scan_end", ic->ic_scan_end); DB_PRINTSYM("\t", "ic_set_channel", ic->ic_set_channel); DB_PRINTSYM("\t", "ic_scan_curchan", ic->ic_scan_curchan); DB_PRINTSYM("\t", "ic_scan_mindwell", ic->ic_scan_mindwell); DB_PRINTSYM("\t", "ic_recv_action", ic->ic_recv_action); DB_PRINTSYM("\t", "ic_send_action", ic->ic_send_action); DB_PRINTSYM("\t", "ic_addba_request", ic->ic_addba_request); DB_PRINTSYM("\t", "ic_addba_response", ic->ic_addba_response); DB_PRINTSYM("\t", "ic_addba_stop", ic->ic_addba_stop); } if (showscan) { db_printf("\n"); _db_show_scan(ic->ic_scan, showprocs); } if (showvaps && !TAILQ_EMPTY(&ic->ic_vaps)) { db_printf("\n"); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) _db_show_vap(vap, showmesh, showprocs); } if (showsta && !TAILQ_EMPTY(&ic->ic_sta.nt_node)) { const struct ieee80211_node_table *nt = &ic->ic_sta; const struct ieee80211_node *ni; TAILQ_FOREACH(ni, &nt->nt_node, ni_list) { db_printf("\n"); _db_show_sta(ni); } } } static void _db_show_all_vaps(void *arg, struct ieee80211com *ic) { int showall = *(int *)arg; if (!showall) { const struct ieee80211vap *vap; db_printf("%s: com %p vaps:", ic->ic_name, ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) db_printf(" %s(%p)", if_name(vap->iv_ifp), vap); db_printf("\n"); } else _db_show_com(ic, 1, 1, 1, 1, 1); } static void _db_show_node_table(const char *tag, const struct ieee80211_node_table *nt) { int i; db_printf("%s%s@%p:\n", tag, nt->nt_name, nt); db_printf("%s nodelock %p", tag, &nt->nt_nodelock); db_printf(" inact_init %d", nt->nt_inact_init); db_printf("%s keyixmax %d keyixmap %p\n", tag, nt->nt_keyixmax, nt->nt_keyixmap); for (i = 0; i < nt->nt_keyixmax; i++) { const struct ieee80211_node *ni = nt->nt_keyixmap[i]; if (ni != NULL) db_printf("%s [%3u] %p %s\n", tag, i, ni, ether_sprintf(ni->ni_macaddr)); } } static void _db_show_channel(const char *tag, const struct ieee80211_channel *c) { db_printf("%s ", tag); if (c == NULL) db_printf(""); else if (c == IEEE80211_CHAN_ANYC) db_printf(""); else db_printf("[%u (%u) flags=%b maxreg %d maxpow %d minpow %d state 0x%x extieee %u]", c->ic_freq, c->ic_ieee, c->ic_flags, IEEE80211_CHAN_BITS, c->ic_maxregpower, c->ic_maxpower, c->ic_minpower, c->ic_state, c->ic_extieee); } static void _db_show_ssid(const char *tag, int ix, int len, const uint8_t *ssid) { const uint8_t *p; int i; db_printf(tag, ix); if (len > IEEE80211_NWID_LEN) len = IEEE80211_NWID_LEN; /* determine printable or not */ for (i = 0, p = ssid; i < len; i++, p++) { if (*p < ' ' || *p > 0x7e) break; } if (i == len) { db_printf("\""); for (i = 0, p = ssid; i < len; i++, p++) db_printf("%c", *p); db_printf("\""); } else { db_printf("0x"); for (i = 0, p = ssid; i < len; i++, p++) db_printf("%02x", *p); } } static void _db_show_appie(const char *tag, const struct ieee80211_appie *ie) { const uint8_t *p; int i; if (ie == NULL) return; db_printf("%s [0x", tag); for (i = 0, p = ie->ie_data; i < ie->ie_len; i++, p++) db_printf("%02x", *p); db_printf("]\n"); } static void _db_show_key(const char *tag, int ix, const struct ieee80211_key *wk) { static const uint8_t zerodata[IEEE80211_KEYBUF_SIZE]; const struct ieee80211_cipher *cip = wk->wk_cipher; int keylen = wk->wk_keylen; db_printf(tag, ix); switch (cip->ic_cipher) { case IEEE80211_CIPHER_WEP: /* compatibility */ db_printf(" wepkey %u:%s", wk->wk_keyix, keylen <= 5 ? "40-bit" : keylen <= 13 ? "104-bit" : "128-bit"); break; case IEEE80211_CIPHER_TKIP: if (keylen > 128/8) keylen -= 128/8; /* ignore MIC for now */ db_printf(" TKIP %u:%u-bit", wk->wk_keyix, 8*keylen); break; case IEEE80211_CIPHER_AES_OCB: db_printf(" AES-OCB %u:%u-bit", wk->wk_keyix, 8*keylen); break; case IEEE80211_CIPHER_AES_CCM: db_printf(" AES-CCM %u:%u-bit", wk->wk_keyix, 8*keylen); break; case IEEE80211_CIPHER_CKIP: db_printf(" CKIP %u:%u-bit", wk->wk_keyix, 8*keylen); break; case IEEE80211_CIPHER_NONE: db_printf(" NULL %u:%u-bit", wk->wk_keyix, 8*keylen); break; default: db_printf(" UNKNOWN (0x%x) %u:%u-bit", cip->ic_cipher, wk->wk_keyix, 8*keylen); break; } if (wk->wk_rxkeyix != wk->wk_keyix) db_printf(" rxkeyix %u", wk->wk_rxkeyix); if (memcmp(wk->wk_key, zerodata, keylen) != 0) { int i; db_printf(" <"); for (i = 0; i < keylen; i++) db_printf("%02x", wk->wk_key[i]); db_printf(">"); if (cip->ic_cipher != IEEE80211_CIPHER_WEP && wk->wk_keyrsc[IEEE80211_NONQOS_TID] != 0) db_printf(" rsc %ju", (uintmax_t)wk->wk_keyrsc[IEEE80211_NONQOS_TID]); if (cip->ic_cipher != IEEE80211_CIPHER_WEP && wk->wk_keytsc != 0) db_printf(" tsc %ju", (uintmax_t)wk->wk_keytsc); db_printf(" flags=%b", wk->wk_flags, IEEE80211_KEY_BITS); } db_printf("\n"); } static void printrate(const char *tag, int v) { if (v == IEEE80211_FIXED_RATE_NONE) db_printf(" %s ", tag); else if (v == 11) db_printf(" %s 5.5", tag); else if (v & IEEE80211_RATE_MCS) db_printf(" %s MCS%d", tag, v &~ IEEE80211_RATE_MCS); else db_printf(" %s %d", tag, v/2); } static void _db_show_roamparams(const char *tag, const void *arg, const struct ieee80211_roamparam *rp) { db_printf(tag, arg); if (rp->rssi & 1) db_printf(" rssi %u.5", rp->rssi/2); else db_printf(" rssi %u", rp->rssi/2); printrate("rate", rp->rate); } static void _db_show_txparams(const char *tag, const void *arg, const struct ieee80211_txparam *tp) { db_printf(tag, arg); printrate("ucastrate", tp->ucastrate); printrate("mcastrate", tp->mcastrate); printrate("mgmtrate", tp->mgmtrate); db_printf(" maxretry %d", tp->maxretry); } static void _db_show_ageq(const char *tag, const struct ieee80211_ageq *q) { const struct mbuf *m; db_printf("%s lock %p len %d maxlen %d drops %d head %p tail %p\n", tag, &q->aq_lock, q->aq_len, q->aq_maxlen, q->aq_drops, q->aq_head, q->aq_tail); for (m = q->aq_head; m != NULL; m = m->m_nextpkt) db_printf("%s %p (len %d, %b)\n", tag, m, m->m_len, /* XXX could be either TX or RX but is mostly TX */ m->m_flags, IEEE80211_MBUF_TX_FLAG_BITS); } static void _db_show_stats(const struct ieee80211_stats *is) { } #ifdef IEEE80211_SUPPORT_MESH static void _db_show_mesh(const struct ieee80211_mesh_state *ms) { struct ieee80211_mesh_route *rt; int i; _db_show_ssid(" meshid ", 0, ms->ms_idlen, ms->ms_id); db_printf("nextseq %u ttl %u flags 0x%x\n", ms->ms_seq, ms->ms_ttl, ms->ms_flags); db_printf("routing table:\n"); i = 0; TAILQ_FOREACH(rt, &ms->ms_routes, rt_next) { db_printf("entry %d:\tdest: %6D nexthop: %6D metric: %u", i, rt->rt_dest, ":", rt->rt_nexthop, ":", rt->rt_metric); db_printf("\tlifetime: %u lastseq: %u priv: %p\n", ieee80211_mesh_rt_update(rt, 0), rt->rt_lastmseq, rt->rt_priv); i++; } } #endif /* IEEE80211_SUPPORT_MESH */ #endif /* DDB */ diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c index a322c21b4673..cb981014fdb9 100644 --- a/sys/net80211/ieee80211_ht.c +++ b/sys/net80211/ieee80211_ht.c @@ -1,3610 +1,3610 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2007-2008 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef __FreeBSD__ #endif /* * IEEE 802.11n protocol support. */ #include "opt_inet.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include const struct ieee80211_mcs_rates ieee80211_htrates[IEEE80211_HTRATE_MAXSIZE] = { { 13, 14, 27, 30 }, /* MCS 0 */ { 26, 29, 54, 60 }, /* MCS 1 */ { 39, 43, 81, 90 }, /* MCS 2 */ { 52, 58, 108, 120 }, /* MCS 3 */ { 78, 87, 162, 180 }, /* MCS 4 */ { 104, 116, 216, 240 }, /* MCS 5 */ { 117, 130, 243, 270 }, /* MCS 6 */ { 130, 144, 270, 300 }, /* MCS 7 */ { 26, 29, 54, 60 }, /* MCS 8 */ { 52, 58, 108, 120 }, /* MCS 9 */ { 78, 87, 162, 180 }, /* MCS 10 */ { 104, 116, 216, 240 }, /* MCS 11 */ { 156, 173, 324, 360 }, /* MCS 12 */ { 208, 231, 432, 480 }, /* MCS 13 */ { 234, 260, 486, 540 }, /* MCS 14 */ { 260, 289, 540, 600 }, /* MCS 15 */ { 39, 43, 81, 90 }, /* MCS 16 */ { 78, 87, 162, 180 }, /* MCS 17 */ { 117, 130, 243, 270 }, /* MCS 18 */ { 156, 173, 324, 360 }, /* MCS 19 */ { 234, 260, 486, 540 }, /* MCS 20 */ { 312, 347, 648, 720 }, /* MCS 21 */ { 351, 390, 729, 810 }, /* MCS 22 */ { 390, 433, 810, 900 }, /* MCS 23 */ { 52, 58, 108, 120 }, /* MCS 24 */ { 104, 116, 216, 240 }, /* MCS 25 */ { 156, 173, 324, 360 }, /* MCS 26 */ { 208, 231, 432, 480 }, /* MCS 27 */ { 312, 347, 648, 720 }, /* MCS 28 */ { 416, 462, 864, 960 }, /* MCS 29 */ { 468, 520, 972, 1080 }, /* MCS 30 */ { 520, 578, 1080, 1200 }, /* MCS 31 */ { 0, 0, 12, 13 }, /* MCS 32 */ { 78, 87, 162, 180 }, /* MCS 33 */ { 104, 116, 216, 240 }, /* MCS 34 */ { 130, 144, 270, 300 }, /* MCS 35 */ { 117, 130, 243, 270 }, /* MCS 36 */ { 156, 173, 324, 360 }, /* MCS 37 */ { 195, 217, 405, 450 }, /* MCS 38 */ { 104, 116, 216, 240 }, /* MCS 39 */ { 130, 144, 270, 300 }, /* MCS 40 */ { 130, 144, 270, 300 }, /* MCS 41 */ { 156, 173, 324, 360 }, /* MCS 42 */ { 182, 202, 378, 420 }, /* MCS 43 */ { 182, 202, 378, 420 }, /* MCS 44 */ { 208, 231, 432, 480 }, /* MCS 45 */ { 156, 173, 324, 360 }, /* MCS 46 */ { 195, 217, 405, 450 }, /* MCS 47 */ { 195, 217, 405, 450 }, /* MCS 48 */ { 234, 260, 486, 540 }, /* MCS 49 */ { 273, 303, 567, 630 }, /* MCS 50 */ { 273, 303, 567, 630 }, /* MCS 51 */ { 312, 347, 648, 720 }, /* MCS 52 */ { 130, 144, 270, 300 }, /* MCS 53 */ { 156, 173, 324, 360 }, /* MCS 54 */ { 182, 202, 378, 420 }, /* MCS 55 */ { 156, 173, 324, 360 }, /* MCS 56 */ { 182, 202, 378, 420 }, /* MCS 57 */ { 208, 231, 432, 480 }, /* MCS 58 */ { 234, 260, 486, 540 }, /* MCS 59 */ { 208, 231, 432, 480 }, /* MCS 60 */ { 234, 260, 486, 540 }, /* MCS 61 */ { 260, 289, 540, 600 }, /* MCS 62 */ { 260, 289, 540, 600 }, /* MCS 63 */ { 286, 318, 594, 660 }, /* MCS 64 */ { 195, 217, 405, 450 }, /* MCS 65 */ { 234, 260, 486, 540 }, /* MCS 66 */ { 273, 303, 567, 630 }, /* MCS 67 */ { 234, 260, 486, 540 }, /* MCS 68 */ { 273, 303, 567, 630 }, /* MCS 69 */ { 312, 347, 648, 720 }, /* MCS 70 */ { 351, 390, 729, 810 }, /* MCS 71 */ { 312, 347, 648, 720 }, /* MCS 72 */ { 351, 390, 729, 810 }, /* MCS 73 */ { 390, 433, 810, 900 }, /* MCS 74 */ { 390, 433, 810, 900 }, /* MCS 75 */ { 429, 477, 891, 990 }, /* MCS 76 */ }; static int ieee80211_ampdu_age = -1; /* threshold for ampdu reorder q (ms) */ SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I", "AMPDU max reorder age (ms)"); static int ieee80211_recv_bar_ena = 1; SYSCTL_INT(_net_wlan, OID_AUTO, recv_bar, CTLFLAG_RW, &ieee80211_recv_bar_ena, 0, "BAR frame processing (ena/dis)"); static int ieee80211_addba_timeout = -1;/* timeout for ADDBA response */ SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I", "ADDBA request timeout (ms)"); static int ieee80211_addba_backoff = -1;/* backoff after max ADDBA requests */ SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I", "ADDBA request backoff (ms)"); static int ieee80211_addba_maxtries = 3;/* max ADDBA requests before backoff */ SYSCTL_INT(_net_wlan, OID_AUTO, addba_maxtries, CTLFLAG_RW, &ieee80211_addba_maxtries, 0, "max ADDBA requests sent before backoff"); static int ieee80211_bar_timeout = -1; /* timeout waiting for BAR response */ static int ieee80211_bar_maxtries = 50;/* max BAR requests before DELBA */ static ieee80211_recv_action_func ht_recv_action_ba_addba_request; static ieee80211_recv_action_func ht_recv_action_ba_addba_response; static ieee80211_recv_action_func ht_recv_action_ba_delba; static ieee80211_recv_action_func ht_recv_action_ht_mimopwrsave; static ieee80211_recv_action_func ht_recv_action_ht_txchwidth; static ieee80211_send_action_func ht_send_action_ba_addba; static ieee80211_send_action_func ht_send_action_ba_delba; static ieee80211_send_action_func ht_send_action_ht_txchwidth; static void ieee80211_ht_init(void) { /* * Setup HT parameters that depends on the clock frequency. */ ieee80211_ampdu_age = msecs_to_ticks(500); ieee80211_addba_timeout = msecs_to_ticks(250); ieee80211_addba_backoff = msecs_to_ticks(10*1000); ieee80211_bar_timeout = msecs_to_ticks(250); /* * Register action frame handlers. */ ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_REQUEST, ht_recv_action_ba_addba_request); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_RESPONSE, ht_recv_action_ba_addba_response); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_DELBA, ht_recv_action_ba_delba); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_HT, IEEE80211_ACTION_HT_MIMOPWRSAVE, ht_recv_action_ht_mimopwrsave); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_HT, IEEE80211_ACTION_HT_TXCHWIDTH, ht_recv_action_ht_txchwidth); ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_REQUEST, ht_send_action_ba_addba); ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_RESPONSE, ht_send_action_ba_addba); ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_DELBA, ht_send_action_ba_delba); ieee80211_send_action_register(IEEE80211_ACTION_CAT_HT, IEEE80211_ACTION_HT_TXCHWIDTH, ht_send_action_ht_txchwidth); } SYSINIT(wlan_ht, SI_SUB_DRIVERS, SI_ORDER_FIRST, ieee80211_ht_init, NULL); static int ieee80211_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap); static int ieee80211_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int dialogtoken, int baparamset, int batimeout); static int ieee80211_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int code, int baparamset, int batimeout); static void ieee80211_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap); static void null_addba_response_timeout(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap); static void ieee80211_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int status); static void ampdu_tx_stop(struct ieee80211_tx_ampdu *tap); static void bar_stop_timer(struct ieee80211_tx_ampdu *tap); static int ampdu_rx_start(struct ieee80211_node *, struct ieee80211_rx_ampdu *, int baparamset, int batimeout, int baseqctl); static void ampdu_rx_stop(struct ieee80211_node *, struct ieee80211_rx_ampdu *); void ieee80211_ht_attach(struct ieee80211com *ic) { /* setup default aggregation policy */ ic->ic_recv_action = ieee80211_recv_action; ic->ic_send_action = ieee80211_send_action; ic->ic_ampdu_enable = ieee80211_ampdu_enable; ic->ic_addba_request = ieee80211_addba_request; ic->ic_addba_response = ieee80211_addba_response; ic->ic_addba_response_timeout = null_addba_response_timeout; ic->ic_addba_stop = ieee80211_addba_stop; ic->ic_bar_response = ieee80211_bar_response; ic->ic_ampdu_rx_start = ampdu_rx_start; ic->ic_ampdu_rx_stop = ampdu_rx_stop; ic->ic_htprotmode = IEEE80211_PROT_RTSCTS; ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_PURE; } void ieee80211_ht_detach(struct ieee80211com *ic) { } void ieee80211_ht_vattach(struct ieee80211vap *vap) { /* driver can override defaults */ vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_8K; vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_NA; vap->iv_ampdu_limit = vap->iv_ampdu_rxmax; vap->iv_amsdu_limit = vap->iv_htcaps & IEEE80211_HTCAP_MAXAMSDU; /* tx aggregation traffic thresholds */ vap->iv_ampdu_mintraffic[WME_AC_BK] = 128; vap->iv_ampdu_mintraffic[WME_AC_BE] = 64; vap->iv_ampdu_mintraffic[WME_AC_VO] = 32; vap->iv_ampdu_mintraffic[WME_AC_VI] = 32; vap->iv_htprotmode = IEEE80211_PROT_RTSCTS; vap->iv_curhtprotmode = IEEE80211_HTINFO_OPMODE_PURE; if (vap->iv_htcaps & IEEE80211_HTC_HT) { /* * Device is HT capable; enable all HT-related * facilities by default. * XXX these choices may be too aggressive. */ vap->iv_flags_ht |= IEEE80211_FHT_HT | IEEE80211_FHT_HTCOMPAT ; if (vap->iv_htcaps & IEEE80211_HTCAP_SHORTGI20) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI20; /* XXX infer from channel list? */ if (vap->iv_htcaps & IEEE80211_HTCAP_CHWIDTH40) { vap->iv_flags_ht |= IEEE80211_FHT_USEHT40; if (vap->iv_htcaps & IEEE80211_HTCAP_SHORTGI40) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI40; } /* enable RIFS if capable */ if (vap->iv_htcaps & IEEE80211_HTC_RIFS) vap->iv_flags_ht |= IEEE80211_FHT_RIFS; /* NB: A-MPDU and A-MSDU rx are mandated, these are tx only */ vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_RX; if (vap->iv_htcaps & IEEE80211_HTC_AMPDU) vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_TX; vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_RX; if (vap->iv_htcaps & IEEE80211_HTC_AMSDU) vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_TX; if (vap->iv_htcaps & IEEE80211_HTCAP_TXSTBC) vap->iv_flags_ht |= IEEE80211_FHT_STBC_TX; if (vap->iv_htcaps & IEEE80211_HTCAP_RXSTBC) vap->iv_flags_ht |= IEEE80211_FHT_STBC_RX; if (vap->iv_htcaps & IEEE80211_HTCAP_LDPC) vap->iv_flags_ht |= IEEE80211_FHT_LDPC_RX; if (vap->iv_htcaps & IEEE80211_HTC_TXLDPC) vap->iv_flags_ht |= IEEE80211_FHT_LDPC_TX; } /* NB: disable default legacy WDS, too many issues right now */ if (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY) vap->iv_flags_ht &= ~IEEE80211_FHT_HT; } void ieee80211_ht_vdetach(struct ieee80211vap *vap) { } static int ht_getrate(struct ieee80211com *ic, int index, enum ieee80211_phymode mode, int ratetype) { int mword, rate; mword = ieee80211_rate2media(ic, index | IEEE80211_RATE_MCS, mode); if (IFM_SUBTYPE(mword) != IFM_IEEE80211_MCS) return (0); switch (ratetype) { case 0: rate = ieee80211_htrates[index].ht20_rate_800ns; break; case 1: rate = ieee80211_htrates[index].ht20_rate_400ns; break; case 2: rate = ieee80211_htrates[index].ht40_rate_800ns; break; default: rate = ieee80211_htrates[index].ht40_rate_400ns; break; } return (rate); } static struct printranges { int minmcs; int maxmcs; int txstream; int ratetype; int htcapflags; } ranges[] = { { 0, 7, 1, 0, 0 }, { 8, 15, 2, 0, 0 }, { 16, 23, 3, 0, 0 }, { 24, 31, 4, 0, 0 }, { 32, 0, 1, 2, IEEE80211_HTC_TXMCS32 }, { 33, 38, 2, 0, IEEE80211_HTC_TXUNEQUAL }, { 39, 52, 3, 0, IEEE80211_HTC_TXUNEQUAL }, { 53, 76, 4, 0, IEEE80211_HTC_TXUNEQUAL }, { 0, 0, 0, 0, 0 }, }; static void ht_rateprint(struct ieee80211com *ic, enum ieee80211_phymode mode, int ratetype) { int minrate, maxrate; struct printranges *range; for (range = ranges; range->txstream != 0; range++) { if (ic->ic_txstream < range->txstream) continue; if (range->htcapflags && (ic->ic_htcaps & range->htcapflags) == 0) continue; if (ratetype < range->ratetype) continue; minrate = ht_getrate(ic, range->minmcs, mode, ratetype); maxrate = ht_getrate(ic, range->maxmcs, mode, ratetype); if (range->maxmcs) { ic_printf(ic, "MCS %d-%d: %d%sMbps - %d%sMbps\n", range->minmcs, range->maxmcs, minrate/2, ((minrate & 0x1) != 0 ? ".5" : ""), maxrate/2, ((maxrate & 0x1) != 0 ? ".5" : "")); } else { ic_printf(ic, "MCS %d: %d%sMbps\n", range->minmcs, minrate/2, ((minrate & 0x1) != 0 ? ".5" : "")); } } } static void ht_announce(struct ieee80211com *ic, enum ieee80211_phymode mode) { const char *modestr = ieee80211_phymode_name[mode]; ic_printf(ic, "%s MCS 20MHz\n", modestr); ht_rateprint(ic, mode, 0); if (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20) { ic_printf(ic, "%s MCS 20MHz SGI\n", modestr); ht_rateprint(ic, mode, 1); } if (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) { ic_printf(ic, "%s MCS 40MHz:\n", modestr); ht_rateprint(ic, mode, 2); } if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) && (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40)) { ic_printf(ic, "%s MCS 40MHz SGI:\n", modestr); ht_rateprint(ic, mode, 3); } } void ieee80211_ht_announce(struct ieee80211com *ic) { if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA) || isset(ic->ic_modecaps, IEEE80211_MODE_11NG)) ic_printf(ic, "%dT%dR\n", ic->ic_txstream, ic->ic_rxstream); if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA)) ht_announce(ic, IEEE80211_MODE_11NA); if (isset(ic->ic_modecaps, IEEE80211_MODE_11NG)) ht_announce(ic, IEEE80211_MODE_11NG); } void ieee80211_init_suphtrates(struct ieee80211com *ic) { #define ADDRATE(x) do { \ htrateset->rs_rates[htrateset->rs_nrates] = x; \ htrateset->rs_nrates++; \ } while (0) struct ieee80211_htrateset *htrateset = &ic->ic_sup_htrates; int i; memset(htrateset, 0, sizeof(struct ieee80211_htrateset)); for (i = 0; i < ic->ic_txstream * 8; i++) ADDRATE(i); if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) && (ic->ic_htcaps & IEEE80211_HTC_TXMCS32)) ADDRATE(32); if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) { if (ic->ic_txstream >= 2) { for (i = 33; i <= 38; i++) ADDRATE(i); } if (ic->ic_txstream >= 3) { for (i = 39; i <= 52; i++) ADDRATE(i); } if (ic->ic_txstream == 4) { for (i = 53; i <= 76; i++) ADDRATE(i); } } #undef ADDRATE } /* * Receive processing. */ /* * Decap the encapsulated A-MSDU frames and dispatch all but * the last for delivery. The last frame is returned for * delivery via the normal path. */ struct mbuf * ieee80211_decap_amsdu(struct ieee80211_node *ni, struct mbuf *m) { struct ieee80211vap *vap = ni->ni_vap; int framelen; struct mbuf *n; /* discard 802.3 header inserted by ieee80211_decap */ m_adj(m, sizeof(struct ether_header)); vap->iv_stats.is_amsdu_decap++; for (;;) { /* * Decap the first frame, bust it apart from the * remainder and deliver. We leave the last frame * delivery to the caller (for consistency with other * code paths, could also do it here). */ m = ieee80211_decap1(m, &framelen); if (m == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "a-msdu", "%s", "decap failed"); vap->iv_stats.is_amsdu_tooshort++; return NULL; } if (m->m_pkthdr.len == framelen) break; n = m_split(m, framelen, IEEE80211_M_NOWAIT); if (n == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "a-msdu", "%s", "unable to split encapsulated frames"); vap->iv_stats.is_amsdu_split++; m_freem(m); /* NB: must reclaim */ return NULL; } vap->iv_deliver_data(vap, ni, m); /* * Remove frame contents; each intermediate frame * is required to be aligned to a 4-byte boundary. */ m = n; m_adj(m, roundup2(framelen, 4) - framelen); /* padding */ } return m; /* last delivered by caller */ } static void ampdu_rx_purge_slot(struct ieee80211_rx_ampdu *rap, int i) { struct mbuf *m; /* Walk the queue, removing frames as appropriate */ for (;;) { m = mbufq_dequeue(&rap->rxa_mq[i]); if (m == NULL) break; rap->rxa_qbytes -= m->m_pkthdr.len; rap->rxa_qframes--; m_freem(m); } } /* * Add the given frame to the current RX reorder slot. * * For future offloaded A-MSDU handling where multiple frames with * the same sequence number show up here, this routine will append * those frames as long as they're appropriately tagged. */ static int ampdu_rx_add_slot(struct ieee80211_rx_ampdu *rap, int off, int tid, ieee80211_seq rxseq, struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_rx_stats *rxs) { const struct ieee80211_rx_stats *rxs_final = NULL; struct ieee80211vap *vap = ni->ni_vap; int toss_dup; #define PROCESS 0 /* caller should process frame */ #define CONSUMED 1 /* frame consumed, caller does nothing */ /* * Figure out if this is a duplicate frame for the given slot. * * We're assuming that the driver will hand us all the frames * for a given AMSDU decap pass and if we get /a/ frame * for an AMSDU decap then we'll get all of them. * * The tricksy bit is that we don't know when the /end/ of * the decap pass is, because we aren't tracking state here * per-slot to know that we've finished receiving the frame list. * * The driver sets RX_F_AMSDU and RX_F_AMSDU_MORE to tell us * what's going on; so ideally we'd just check the frame at the * end of the reassembly slot to see if its F_AMSDU w/ no F_AMSDU_MORE - * that means we've received the whole AMSDU decap pass. */ /* * Get the rxs of the final mbuf in the slot, if one exists. */ - if (mbufq_len(&rap->rxa_mq[off]) != 0) { + if (!mbufq_empty(&rap->rxa_mq[off])) { rxs_final = ieee80211_get_rx_params_ptr(mbufq_last(&rap->rxa_mq[off])); } /* Default to tossing the duplicate frame */ toss_dup = 1; /* * Check to see if the final frame has F_AMSDU and F_AMSDU set, AND * this frame has F_AMSDU set (MORE or otherwise.) That's a sign * that more can come. */ if ((rxs != NULL) && (rxs_final != NULL) && ieee80211_check_rxseq_amsdu(rxs) && ieee80211_check_rxseq_amsdu(rxs_final)) { if (! ieee80211_check_rxseq_amsdu_more(rxs_final)) { /* * amsdu_more() returning 0 means "it's not the * final frame" so we can append more * frames here. */ toss_dup = 0; } } /* * If the list is empty OR we have determined we can put more * driver decap'ed AMSDU frames in here, then insert. */ - if ((mbufq_len(&rap->rxa_mq[off]) == 0) || (toss_dup == 0)) { + if (mbufq_empty(&rap->rxa_mq[off]) || (toss_dup == 0)) { if (mbufq_enqueue(&rap->rxa_mq[off], m) != 0) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "a-mpdu queue fail", "seqno %u tid %u BA win <%u:%u> off=%d, qlen=%d, maxqlen=%d", rxseq, tid, rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), off, mbufq_len(&rap->rxa_mq[off]), rap->rxa_mq[off].mq_maxlen); /* XXX error count */ m_freem(m); return CONSUMED; } rap->rxa_qframes++; rap->rxa_qbytes += m->m_pkthdr.len; vap->iv_stats.is_ampdu_rx_reorder++; /* * Statistics for AMSDU decap. */ if (rxs != NULL && ieee80211_check_rxseq_amsdu(rxs)) { if (ieee80211_check_rxseq_amsdu_more(rxs)) { /* more=1, AMSDU, end of batch */ IEEE80211_NODE_STAT(ni, rx_amsdu_more_end); } else { IEEE80211_NODE_STAT(ni, rx_amsdu_more); } } } else { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "a-mpdu duplicate", "seqno %u tid %u BA win <%u:%u>", rxseq, tid, rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1)); if (rxs != NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "a-mpdu duplicate", "seqno %d tid %u pktflags 0x%08x\n", rxseq, tid, rxs->c_pktflags); } if (rxs_final != NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "a-mpdu duplicate", "final: pktflags 0x%08x\n", rxs_final->c_pktflags); } vap->iv_stats.is_rx_dup++; IEEE80211_NODE_STAT(ni, rx_dup); m_freem(m); } return CONSUMED; #undef CONSUMED #undef PROCESS } /* * Purge all frames in the A-MPDU re-order queue. */ static void ampdu_rx_purge(struct ieee80211_rx_ampdu *rap) { int i; for (i = 0; i < rap->rxa_wnd; i++) { ampdu_rx_purge_slot(rap, i); if (rap->rxa_qframes == 0) break; } KASSERT(rap->rxa_qbytes == 0 && rap->rxa_qframes == 0, ("lost %u data, %u frames on ampdu rx q", rap->rxa_qbytes, rap->rxa_qframes)); } static void ieee80211_ampdu_rx_init_rap(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) { int i; /* XXX TODO: ensure the queues are empty */ memset(rap, 0, sizeof(*rap)); for (i = 0; i < IEEE80211_AGGR_BAWMAX; i++) mbufq_init(&rap->rxa_mq[i], 256); } /* * Start A-MPDU rx/re-order processing for the specified TID. */ static int ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap, int baparamset, int batimeout, int baseqctl) { struct ieee80211vap *vap = ni->ni_vap; int bufsiz = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_BUFSIZ); if (rap->rxa_flags & IEEE80211_AGGR_RUNNING) { /* * AMPDU previously setup and not terminated with a DELBA, * flush the reorder q's in case anything remains. */ ampdu_rx_purge(rap); } ieee80211_ampdu_rx_init_rap(ni, rap); rap->rxa_wnd = (bufsiz == 0) ? IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX); rap->rxa_start = _IEEE80211_MASKSHIFT(baseqctl, IEEE80211_BASEQ_START); rap->rxa_flags |= IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND; /* XXX this should be a configuration flag */ if ((vap->iv_htcaps & IEEE80211_HTC_RX_AMSDU_AMPDU) && (_IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_AMSDU))) rap->rxa_flags |= IEEE80211_AGGR_AMSDU; else rap->rxa_flags &= ~IEEE80211_AGGR_AMSDU; return 0; } /* * Public function; manually setup the RX ampdu state. */ int ieee80211_ampdu_rx_start_ext(struct ieee80211_node *ni, int tid, int seq, int baw) { struct ieee80211_rx_ampdu *rap; /* XXX TODO: sanity check tid, seq, baw */ rap = &ni->ni_rx_ampdu[tid]; if (rap->rxa_flags & IEEE80211_AGGR_RUNNING) { /* * AMPDU previously setup and not terminated with a DELBA, * flush the reorder q's in case anything remains. */ ampdu_rx_purge(rap); } ieee80211_ampdu_rx_init_rap(ni, rap); rap->rxa_wnd = (baw== 0) ? IEEE80211_AGGR_BAWMAX : min(baw, IEEE80211_AGGR_BAWMAX); if (seq == -1) { /* Wait for the first RX frame, use that as BAW */ rap->rxa_start = 0; rap->rxa_flags |= IEEE80211_AGGR_WAITRX; } else { rap->rxa_start = seq; } rap->rxa_flags |= IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND; /* XXX TODO: no amsdu flag */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: tid=%d, start=%d, wnd=%d, flags=0x%08x", __func__, tid, seq, rap->rxa_wnd, rap->rxa_flags); return 0; } /* * Public function; manually stop the RX AMPDU state. */ void ieee80211_ampdu_rx_stop_ext(struct ieee80211_node *ni, int tid) { struct ieee80211_rx_ampdu *rap; /* XXX TODO: sanity check tid, seq, baw */ rap = &ni->ni_rx_ampdu[tid]; ampdu_rx_stop(ni, rap); } /* * Stop A-MPDU rx processing for the specified TID. */ static void ampdu_rx_stop(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) { ampdu_rx_purge(rap); rap->rxa_flags &= ~(IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND | IEEE80211_AGGR_WAITRX); } /* * Dispatch a frame from the A-MPDU reorder queue. The * frame is fed back into ieee80211_input marked with an * M_AMPDU_MPDU flag so it doesn't come back to us (it also * permits ieee80211_input to optimize re-processing). */ static __inline void ampdu_dispatch(struct ieee80211_node *ni, struct mbuf *m) { m->m_flags |= M_AMPDU_MPDU; /* bypass normal processing */ /* NB: rssi and noise are ignored w/ M_AMPDU_MPDU set */ (void) ieee80211_input(ni, m, 0, 0); } static int ampdu_dispatch_slot(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni, int i) { struct mbuf *m; int n = 0; for (;;) { m = mbufq_dequeue(&rap->rxa_mq[i]); if (m == NULL) break; n++; rap->rxa_qbytes -= m->m_pkthdr.len; rap->rxa_qframes--; ampdu_dispatch(ni, m); } return (n); } static void ampdu_rx_moveup(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni, int i, int winstart) { struct ieee80211vap *vap = ni->ni_vap; /* * If frames remain, copy the mbuf pointers down so * they correspond to the offsets in the new window. */ if (rap->rxa_qframes != 0) { int n = rap->rxa_qframes, j; for (j = i+1; j < rap->rxa_wnd; j++) { /* * Concat the list contents over, which will * blank the source list for us. */ if (mbufq_len(&rap->rxa_mq[j]) != 0) { n = n - mbufq_len(&rap->rxa_mq[j]); mbufq_concat(&rap->rxa_mq[j-i], &rap->rxa_mq[j]); KASSERT(n >= 0, ("%s: n < 0 (%d)", __func__, n)); if (n == 0) break; } } KASSERT(n == 0, ("%s: lost %d frames, qframes %d off %d " "BA win <%d:%d> winstart %d", __func__, n, rap->rxa_qframes, i, rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), winstart)); vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes; } } /* * Dispatch as many frames as possible from the re-order queue. * Frames will always be "at the front"; we process all frames * up to the first empty slot in the window. On completion we * cleanup state if there are still pending frames in the current * BA window. We assume the frame at slot 0 is already handled * by the caller; we always start at slot 1. */ static void ampdu_rx_dispatch(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; int i, r, r2; /* flush run of frames */ r2 = 0; for (i = 1; i < rap->rxa_wnd; i++) { r = ampdu_dispatch_slot(rap, ni, i); if (r == 0) break; r2 += r; } /* move up frames */ ampdu_rx_moveup(rap, ni, i, -1); /* * Adjust the start of the BA window to * reflect the frames just dispatched. */ rap->rxa_start = IEEE80211_SEQ_ADD(rap->rxa_start, i); vap->iv_stats.is_ampdu_rx_oor += r2; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: moved slot up %d slots to start at %d (%d frames)", __func__, i, rap->rxa_start, r2); } /* * Dispatch all frames in the A-MPDU re-order queue. */ static void ampdu_rx_flush(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) { int i, r; for (i = 0; i < rap->rxa_wnd; i++) { r = ampdu_dispatch_slot(rap, ni, i); if (r == 0) continue; ni->ni_vap->iv_stats.is_ampdu_rx_oor += r; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: moved slot up %d slots to start at %d (%d frames)", __func__, 1, rap->rxa_start, r); if (rap->rxa_qframes == 0) break; } } /* * Dispatch all frames in the A-MPDU re-order queue * preceding the specified sequence number. This logic * handles window moves due to a received MSDU or BAR. */ static void ampdu_rx_flush_upto(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap, ieee80211_seq winstart) { struct ieee80211vap *vap = ni->ni_vap; ieee80211_seq seqno; int i, r; /* * Flush any complete MSDU's with a sequence number lower * than winstart. Gaps may exist. Note that we may actually * dispatch frames past winstart if a run continues; this is * an optimization that avoids having to do a separate pass * to dispatch frames after moving the BA window start. */ seqno = rap->rxa_start; for (i = 0; i < rap->rxa_wnd; i++) { if ((r = mbufq_len(&rap->rxa_mq[i])) != 0) { (void) ampdu_dispatch_slot(rap, ni, i); } else { if (!IEEE80211_SEQ_BA_BEFORE(seqno, winstart)) break; } vap->iv_stats.is_ampdu_rx_oor += r; seqno = IEEE80211_SEQ_INC(seqno); IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: moved slot up %d slots to start at %d (%d frames)", __func__, 1, seqno, r); } /* * If frames remain, copy the mbuf pointers down so * they correspond to the offsets in the new window. */ ampdu_rx_moveup(rap, ni, i, winstart); /* * Move the start of the BA window; we use the * sequence number of the last MSDU that was * passed up the stack+1 or winstart if stopped on * a gap in the reorder buffer. */ rap->rxa_start = seqno; } /* * Process a received QoS data frame for an HT station. Handle * A-MPDU reordering: if this frame is received out of order * and falls within the BA window hold onto it. Otherwise if * this frame completes a run, flush any pending frames. We * return 1 if the frame is consumed. A 0 is returned if * the frame should be processed normally by the caller. * * A-MSDU: handle hardware decap'ed A-MSDU frames that are * pretending to be MPDU's. They're dispatched directly if * able; or attempted to put into the receive reordering slot. */ int ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_rx_stats *rxs) { #define PROCESS 0 /* caller should process frame */ #define CONSUMED 1 /* frame consumed, caller does nothing */ struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_qosframe *wh; struct ieee80211_rx_ampdu *rap; ieee80211_seq rxseq; uint8_t tid; int off; int amsdu = ieee80211_check_rxseq_amsdu(rxs); int amsdu_end = ieee80211_check_rxseq_amsdu_more(rxs); KASSERT((m->m_flags & (M_AMPDU | M_AMPDU_MPDU)) == M_AMPDU, ("!a-mpdu or already re-ordered, flags 0x%x", m->m_flags)); KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT sta")); /* NB: m_len known to be sufficient */ wh = mtod(m, struct ieee80211_qosframe *); if (wh->i_fc[0] != IEEE80211_FC0_QOSDATA) { /* * Not QoS data, shouldn't get here but just * return it to the caller for processing. */ return PROCESS; } /* * 802.11-2012 9.3.2.10 - Duplicate detection and recovery. * * Multicast QoS data frames are checked against a different * counter, not the per-TID counter. */ if (IEEE80211_IS_MULTICAST(wh->i_addr1)) return PROCESS; tid = ieee80211_getqos(wh)[0]; tid &= IEEE80211_QOS_TID; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) { /* * No ADDBA request yet, don't touch. */ return PROCESS; } rxseq = le16toh(*(uint16_t *)wh->i_seq); if ((rxseq & IEEE80211_SEQ_FRAG_MASK) != 0) { /* * Fragments are not allowed; toss. */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "A-MPDU", "fragment, rxseq 0x%x tid %u%s", rxseq, tid, wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : ""); vap->iv_stats.is_ampdu_rx_drop++; IEEE80211_NODE_STAT(ni, rx_drop); m_freem(m); return CONSUMED; } rxseq >>= IEEE80211_SEQ_SEQ_SHIFT; rap->rxa_nframes++; /* * Handle waiting for the first frame to define the BAW. * Some firmware doesn't provide the RX of the starting point * of the BAW and we have to cope. */ if (rap->rxa_flags & IEEE80211_AGGR_WAITRX) { rap->rxa_flags &= ~IEEE80211_AGGR_WAITRX; rap->rxa_start = rxseq; } again: if (rxseq == rap->rxa_start) { /* * First frame in window. */ if (rap->rxa_qframes != 0) { /* * Dispatch as many packets as we can. */ - KASSERT((mbufq_len(&rap->rxa_mq[0]) == 0), ("unexpected dup")); + KASSERT(mbufq_empty(&rap->rxa_mq[0]), ("unexpected dup")); ampdu_dispatch(ni, m); ampdu_rx_dispatch(rap, ni); return CONSUMED; } else { /* * In order; advance window if needed and notify * caller to dispatch directly. */ if (amsdu) { if (amsdu_end) { rap->rxa_start = IEEE80211_SEQ_INC(rxseq); IEEE80211_NODE_STAT(ni, rx_amsdu_more_end); } else { IEEE80211_NODE_STAT(ni, rx_amsdu_more); } } else { rap->rxa_start = IEEE80211_SEQ_INC(rxseq); } return PROCESS; } } /* * Frame is out of order; store if in the BA window. */ /* calculate offset in BA window */ off = IEEE80211_SEQ_SUB(rxseq, rap->rxa_start); if (off < rap->rxa_wnd) { /* * Common case (hopefully): in the BA window. * Sec 9.10.7.6.2 a) (p.137) */ /* * Check for frames sitting too long in the reorder queue. * This should only ever happen if frames are not delivered * without the sender otherwise notifying us (e.g. with a * BAR to move the window). Typically this happens because * of vendor bugs that cause the sequence number to jump. * When this happens we get a gap in the reorder queue that * leaves frame sitting on the queue until they get pushed * out due to window moves. When the vendor does not send * BAR this move only happens due to explicit packet sends * * NB: we only track the time of the oldest frame in the * reorder q; this means that if we flush we might push * frames that still "new"; if this happens then subsequent * frames will result in BA window moves which cost something * but is still better than a big throughput dip. */ if (rap->rxa_qframes != 0) { /* XXX honor batimeout? */ if (ticks - rap->rxa_age > ieee80211_ampdu_age) { /* * Too long since we received the first * frame; flush the reorder buffer. */ if (rap->rxa_qframes != 0) { vap->iv_stats.is_ampdu_rx_age += rap->rxa_qframes; ampdu_rx_flush(ni, rap); } /* * Advance the window if needed and notify * the caller to dispatch directly. */ if (amsdu) { if (amsdu_end) { rap->rxa_start = IEEE80211_SEQ_INC(rxseq); IEEE80211_NODE_STAT(ni, rx_amsdu_more_end); } else { IEEE80211_NODE_STAT(ni, rx_amsdu_more); } } else { rap->rxa_start = IEEE80211_SEQ_INC(rxseq); } return PROCESS; } } else { /* * First frame, start aging timer. */ rap->rxa_age = ticks; } /* save packet - this consumes, no matter what */ ampdu_rx_add_slot(rap, off, tid, rxseq, ni, m, rxs); return CONSUMED; } if (off < IEEE80211_SEQ_BA_RANGE) { /* * Outside the BA window, but within range; * flush the reorder q and move the window. * Sec 9.10.7.6.2 b) (p.138) */ IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni, "move BA win <%u:%u> (%u frames) rxseq %u tid %u", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid); vap->iv_stats.is_ampdu_rx_move++; /* * The spec says to flush frames up to but not including: * WinStart_B = rxseq - rap->rxa_wnd + 1 * Then insert the frame or notify the caller to process * it immediately. We can safely do this by just starting * over again because we know the frame will now be within * the BA window. */ /* NB: rxa_wnd known to be >0 */ ampdu_rx_flush_upto(ni, rap, IEEE80211_SEQ_SUB(rxseq, rap->rxa_wnd-1)); goto again; } else { /* * Outside the BA window and out of range; toss. * Sec 9.10.7.6.2 c) (p.138) */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "MPDU", "BA win <%u:%u> (%u frames) rxseq %u tid %u%s", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid, wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : ""); vap->iv_stats.is_ampdu_rx_drop++; IEEE80211_NODE_STAT(ni, rx_drop); m_freem(m); return CONSUMED; } #undef CONSUMED #undef PROCESS } /* * Process a BAR ctl frame. Dispatch all frames up to * the sequence number of the frame. If this frame is * out of range it's discarded. */ void ieee80211_recv_bar(struct ieee80211_node *ni, struct mbuf *m0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_frame_bar *wh; struct ieee80211_rx_ampdu *rap; ieee80211_seq rxseq; int tid, off; if (!ieee80211_recv_bar_ena) { #if 0 IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_11N, ni->ni_macaddr, "BAR", "%s", "processing disabled"); #endif vap->iv_stats.is_ampdu_bar_bad++; return; } wh = mtod(m0, struct ieee80211_frame_bar *); /* XXX check basic BAR */ tid = _IEEE80211_MASKSHIFT(le16toh(wh->i_ctl), IEEE80211_BAR_TID); rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) { /* * No ADDBA request yet, don't touch. */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "BAR", "no BA stream, tid %u", tid); vap->iv_stats.is_ampdu_bar_bad++; return; } vap->iv_stats.is_ampdu_bar_rx++; rxseq = le16toh(wh->i_seq) >> IEEE80211_SEQ_SEQ_SHIFT; if (rxseq == rap->rxa_start) return; /* calculate offset in BA window */ off = IEEE80211_SEQ_SUB(rxseq, rap->rxa_start); if (off < IEEE80211_SEQ_BA_RANGE) { /* * Flush the reorder q up to rxseq and move the window. * Sec 9.10.7.6.3 a) (p.138) */ IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni, "BAR moves BA win <%u:%u> (%u frames) rxseq %u tid %u", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid); vap->iv_stats.is_ampdu_bar_move++; ampdu_rx_flush_upto(ni, rap, rxseq); if (off >= rap->rxa_wnd) { /* * BAR specifies a window start to the right of BA * window; we must move it explicitly since * ampdu_rx_flush_upto will not. */ rap->rxa_start = rxseq; } } else { /* * Out of range; toss. * Sec 9.10.7.6.3 b) (p.138) */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "BAR", "BA win <%u:%u> (%u frames) rxseq %u tid %u%s", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid, wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : ""); vap->iv_stats.is_ampdu_bar_oow++; IEEE80211_NODE_STAT(ni, rx_drop); } } /* * Setup HT-specific state in a node. Called only * when HT use is negotiated so we don't do extra * work for temporary and/or legacy sta's. */ void ieee80211_ht_node_init(struct ieee80211_node *ni) { struct ieee80211_tx_ampdu *tap; int tid; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: called (%p)", __func__, ni); if (ni->ni_flags & IEEE80211_NODE_HT) { /* * Clean AMPDU state on re-associate. This handles the case * where a station leaves w/o notifying us and then returns * before node is reaped for inactivity. */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: calling cleanup (%p)", __func__, ni); ieee80211_ht_node_cleanup(ni); } for (tid = 0; tid < WME_NUM_TID; tid++) { tap = &ni->ni_tx_ampdu[tid]; tap->txa_tid = tid; tap->txa_ni = ni; ieee80211_txampdu_init_pps(tap); /* NB: further initialization deferred */ ieee80211_ampdu_rx_init_rap(ni, &ni->ni_rx_ampdu[tid]); } ni->ni_flags |= IEEE80211_NODE_HT | IEEE80211_NODE_AMPDU | IEEE80211_NODE_AMSDU; } /* * Cleanup HT-specific state in a node. Called only * when HT use has been marked. */ void ieee80211_ht_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; int i; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: called (%p)", __func__, ni); KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT node")); /* XXX optimize this */ for (i = 0; i < WME_NUM_TID; i++) { struct ieee80211_tx_ampdu *tap = &ni->ni_tx_ampdu[i]; if (tap->txa_flags & IEEE80211_AGGR_SETUP) ampdu_tx_stop(tap); } for (i = 0; i < WME_NUM_TID; i++) ic->ic_ampdu_rx_stop(ni, &ni->ni_rx_ampdu[i]); ni->ni_htcap = 0; ni->ni_flags &= ~IEEE80211_NODE_HT_ALL; } /* * Age out HT resources for a station. */ void ieee80211_ht_node_age(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; uint8_t tid; KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT sta")); for (tid = 0; tid < WME_NUM_TID; tid++) { struct ieee80211_rx_ampdu *rap; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) continue; if (rap->rxa_qframes == 0) continue; /* * Check for frames sitting too long in the reorder queue. * See above for more details on what's happening here. */ /* XXX honor batimeout? */ if (ticks - rap->rxa_age > ieee80211_ampdu_age) { /* * Too long since we received the first * frame; flush the reorder buffer. */ vap->iv_stats.is_ampdu_rx_age += rap->rxa_qframes; ampdu_rx_flush(ni, rap); } } } static struct ieee80211_channel * findhtchan(struct ieee80211com *ic, struct ieee80211_channel *c, int htflags) { return ieee80211_find_channel(ic, c->ic_freq, (c->ic_flags &~ IEEE80211_CHAN_HT) | htflags); } /* * Adjust a channel to be HT/non-HT according to the vap's configuration. */ struct ieee80211_channel * ieee80211_ht_adjust_channel(struct ieee80211com *ic, struct ieee80211_channel *chan, int flags) { struct ieee80211_channel *c; if (flags & IEEE80211_FHT_HT) { /* promote to HT if possible */ if (flags & IEEE80211_FHT_USEHT40) { if (!IEEE80211_IS_CHAN_HT40(chan)) { /* NB: arbitrarily pick ht40+ over ht40- */ c = findhtchan(ic, chan, IEEE80211_CHAN_HT40U); if (c == NULL) c = findhtchan(ic, chan, IEEE80211_CHAN_HT40D); if (c == NULL) c = findhtchan(ic, chan, IEEE80211_CHAN_HT20); if (c != NULL) chan = c; } } else if (!IEEE80211_IS_CHAN_HT20(chan)) { c = findhtchan(ic, chan, IEEE80211_CHAN_HT20); if (c != NULL) chan = c; } } else if (IEEE80211_IS_CHAN_HT(chan)) { /* demote to legacy, HT use is disabled */ c = ieee80211_find_channel(ic, chan->ic_freq, chan->ic_flags &~ IEEE80211_CHAN_HT); if (c != NULL) chan = c; } return chan; } /* * Setup HT-specific state for a legacy WDS peer. */ void ieee80211_ht_wds_init(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_tx_ampdu *tap; int tid; KASSERT(vap->iv_flags_ht & IEEE80211_FHT_HT, ("no HT requested")); /* XXX check scan cache in case peer has an ap and we have info */ /* * If setup with a legacy channel; locate an HT channel. * Otherwise if the inherited channel (from a companion * AP) is suitable use it so we use the same location * for the extension channel). */ ni->ni_chan = ieee80211_ht_adjust_channel(ni->ni_ic, ni->ni_chan, ieee80211_htchanflags(ni->ni_chan)); ni->ni_htcap = 0; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI20; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { ni->ni_htcap |= IEEE80211_HTCAP_CHWIDTH40; ni->ni_chw = 40; if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan)) ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_ABOVE; else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan)) ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_BELOW; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI40; } else { ni->ni_chw = 20; ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_NONE; } ni->ni_htctlchan = ni->ni_chan->ic_ieee; if (vap->iv_flags_ht & IEEE80211_FHT_RIFS) ni->ni_flags |= IEEE80211_NODE_RIFS; /* XXX does it make sense to enable SMPS? */ ni->ni_htopmode = 0; /* XXX need protection state */ ni->ni_htstbc = 0; /* XXX need info */ for (tid = 0; tid < WME_NUM_TID; tid++) { tap = &ni->ni_tx_ampdu[tid]; tap->txa_tid = tid; ieee80211_txampdu_init_pps(tap); } /* NB: AMPDU tx/rx governed by IEEE80211_FHT_AMPDU_{TX,RX} */ ni->ni_flags |= IEEE80211_NODE_HT | IEEE80211_NODE_AMPDU | IEEE80211_NODE_AMSDU; } /* * Notify a VAP of a change in the HTINFO ie if it's a hostap VAP. * * This is to be called from the deferred HT protection update * task once the flags are updated. */ void ieee80211_htinfo_notify(struct ieee80211vap *vap) { IEEE80211_LOCK_ASSERT(vap->iv_ic); if (vap->iv_opmode != IEEE80211_M_HOSTAP) return; if (vap->iv_state != IEEE80211_S_RUN || !IEEE80211_IS_CHAN_HT(vap->iv_bss->ni_chan)) return; IEEE80211_NOTE(vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, vap->iv_bss, "HT bss occupancy change: %d sta, %d ht, " "%d ht40%s, HT protmode now 0x%x" , vap->iv_sta_assoc , vap->iv_ht_sta_assoc , vap->iv_ht40_sta_assoc , (vap->iv_flags_ht & IEEE80211_FHT_NONHT_PR) ? ", non-HT sta present" : "" , vap->iv_curhtprotmode); ieee80211_beacon_notify(vap, IEEE80211_BEACON_HTINFO); } /* * Calculate HT protection mode from current * state and handle updates. */ static void htinfo_update(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; uint8_t protmode; if (vap->iv_sta_assoc != vap->iv_ht_sta_assoc) { protmode = IEEE80211_HTINFO_OPMODE_MIXED | IEEE80211_HTINFO_NONHT_PRESENT; } else if (vap->iv_flags_ht & IEEE80211_FHT_NONHT_PR) { protmode = IEEE80211_HTINFO_OPMODE_PROTOPT | IEEE80211_HTINFO_NONHT_PRESENT; } else if (ic->ic_bsschan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_HT40(ic->ic_bsschan) && vap->iv_sta_assoc != vap->iv_ht40_sta_assoc) { protmode = IEEE80211_HTINFO_OPMODE_HT20PR; } else { protmode = IEEE80211_HTINFO_OPMODE_PURE; } if (protmode != vap->iv_curhtprotmode) { vap->iv_curhtprotmode = protmode; /* Update VAP with new protection mode */ ieee80211_vap_update_ht_protmode(vap); } } /* * Handle an HT station joining a BSS. */ void ieee80211_ht_node_join(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; IEEE80211_LOCK_ASSERT(vap->iv_ic); if (ni->ni_flags & IEEE80211_NODE_HT) { vap->iv_ht_sta_assoc++; if (ni->ni_chw == 40) vap->iv_ht40_sta_assoc++; } htinfo_update(vap); } /* * Handle an HT station leaving a BSS. */ void ieee80211_ht_node_leave(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; IEEE80211_LOCK_ASSERT(vap->iv_ic); if (ni->ni_flags & IEEE80211_NODE_HT) { vap->iv_ht_sta_assoc--; if (ni->ni_chw == 40) vap->iv_ht40_sta_assoc--; } htinfo_update(vap); } /* * Public version of htinfo_update; used for processing * beacon frames from overlapping bss. * * Caller can specify either IEEE80211_HTINFO_OPMODE_MIXED * (on receipt of a beacon that advertises MIXED) or * IEEE80211_HTINFO_OPMODE_PROTOPT (on receipt of a beacon * from an overlapping legacy bss). We treat MIXED with * a higher precedence than PROTOPT (i.e. we will not change * change PROTOPT -> MIXED; only MIXED -> PROTOPT). This * corresponds to how we handle things in htinfo_update. * */ void ieee80211_htprot_update(struct ieee80211vap *vap, int protmode) { struct ieee80211com *ic = vap->iv_ic; #define OPMODE(x) _IEEE80211_SHIFTMASK(x, IEEE80211_HTINFO_OPMODE) IEEE80211_LOCK(ic); /* track non-HT station presence */ KASSERT(protmode & IEEE80211_HTINFO_NONHT_PRESENT, ("protmode 0x%x", protmode)); vap->iv_flags_ht |= IEEE80211_FHT_NONHT_PR; vap->iv_lastnonht = ticks; if (protmode != vap->iv_curhtprotmode && (OPMODE(vap->iv_curhtprotmode) != IEEE80211_HTINFO_OPMODE_MIXED || OPMODE(protmode) == IEEE80211_HTINFO_OPMODE_PROTOPT)) { vap->iv_curhtprotmode = protmode; /* Update VAP with new protection mode */ ieee80211_vap_update_ht_protmode(vap); } IEEE80211_UNLOCK(ic); #undef OPMODE } /* * Time out presence of an overlapping bss with non-HT * stations. When operating in hostap mode we listen for * beacons from other stations and if we identify a non-HT * station is present we update the opmode field of the * HTINFO ie. To identify when all non-HT stations are * gone we time out this condition. */ void ieee80211_ht_timeout(struct ieee80211vap *vap) { IEEE80211_LOCK_ASSERT(vap->iv_ic); if ((vap->iv_flags_ht & IEEE80211_FHT_NONHT_PR) && ieee80211_time_after(ticks, vap->iv_lastnonht + IEEE80211_NONHT_PRESENT_AGE)) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N, "%s", "time out non-HT STA present on channel"); vap->iv_flags_ht &= ~IEEE80211_FHT_NONHT_PR; htinfo_update(vap); } } /* * Process an 802.11n HT capabilities ie. */ void ieee80211_parse_htcap(struct ieee80211_node *ni, const uint8_t *ie) { if (ie[0] == IEEE80211_ELEMID_VENDOR) { /* * Station used Vendor OUI ie to associate; * mark the node so when we respond we'll use * the Vendor OUI's and not the standard ie's. */ ni->ni_flags |= IEEE80211_NODE_HTCOMPAT; ie += 4; } else ni->ni_flags &= ~IEEE80211_NODE_HTCOMPAT; ni->ni_htcap = le16dec(ie + __offsetof(struct ieee80211_ie_htcap, hc_cap)); ni->ni_htparam = ie[__offsetof(struct ieee80211_ie_htcap, hc_param)]; } static void htinfo_parse(struct ieee80211_node *ni, const struct ieee80211_ie_htinfo *htinfo) { uint16_t w; ni->ni_htctlchan = htinfo->hi_ctrlchannel; ni->ni_ht2ndchan = _IEEE80211_SHIFTMASK(htinfo->hi_byte1, IEEE80211_HTINFO_2NDCHAN); w = le16dec(&htinfo->hi_byte2); ni->ni_htopmode = _IEEE80211_SHIFTMASK(w, IEEE80211_HTINFO_OPMODE); w = le16dec(&htinfo->hi_byte45); ni->ni_htstbc = _IEEE80211_SHIFTMASK(w, IEEE80211_HTINFO_BASIC_STBCMCS); } /* * Parse an 802.11n HT info ie and save useful information * to the node state. Note this does not effect any state * changes such as for channel width change. */ void ieee80211_parse_htinfo(struct ieee80211_node *ni, const uint8_t *ie) { if (ie[0] == IEEE80211_ELEMID_VENDOR) ie += 4; htinfo_parse(ni, (const struct ieee80211_ie_htinfo *) ie); } /* * Handle 11n/11ac channel switch. * * Use the received HT/VHT ie's to identify the right channel to use. * If we cannot locate it in the channel table then fallback to * legacy operation. * * Note that we use this information to identify the node's * channel only; the caller is responsible for insuring any * required channel change is done (e.g. in sta mode when * parsing the contents of a beacon frame). */ static int htinfo_update_chw(struct ieee80211_node *ni, int htflags, int vhtflags) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211_channel *c; int chanflags; int ret = 0; /* * First step - do HT/VHT only channel lookup based on operating mode * flags. This involves masking out the VHT flags as well. * Otherwise we end up doing the full channel walk each time * we trigger this, which is expensive. */ chanflags = (ni->ni_chan->ic_flags &~ (IEEE80211_CHAN_HT | IEEE80211_CHAN_VHT)) | htflags | vhtflags; if (chanflags == ni->ni_chan->ic_flags) goto done; /* * If HT /or/ VHT flags have changed then check both. * We need to start by picking a HT channel anyway. */ c = NULL; chanflags = (ni->ni_chan->ic_flags &~ (IEEE80211_CHAN_HT | IEEE80211_CHAN_VHT)) | htflags; /* XXX not right for ht40- */ c = ieee80211_find_channel(ic, ni->ni_chan->ic_freq, chanflags); if (c == NULL && (htflags & IEEE80211_CHAN_HT40)) { /* * No HT40 channel entry in our table; fall back * to HT20 operation. This should not happen. */ c = findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT20); #if 0 IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, "no HT40 channel (freq %u), falling back to HT20", ni->ni_chan->ic_freq); #endif /* XXX stat */ } /* Nothing found - leave it alone; move onto VHT */ if (c == NULL) c = ni->ni_chan; /* * If it's non-HT, then bail out now. */ if (! IEEE80211_IS_CHAN_HT(c)) { IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, "not HT; skipping VHT check (%u/0x%x)", c->ic_freq, c->ic_flags); goto done; } /* * Next step - look at the current VHT flags and determine * if we need to upgrade. Mask out the VHT and HT flags since * the vhtflags field will already have the correct HT * flags to use. */ if (IEEE80211_CONF_VHT(ic) && ni->ni_vhtcap != 0 && vhtflags != 0) { chanflags = (c->ic_flags &~ (IEEE80211_CHAN_HT | IEEE80211_CHAN_VHT)) | vhtflags; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, "%s: VHT; chanwidth=0x%02x; vhtflags=0x%08x", __func__, ni->ni_vht_chanwidth, vhtflags); IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, "%s: VHT; trying lookup for %d/0x%08x", __func__, c->ic_freq, chanflags); c = ieee80211_find_channel(ic, c->ic_freq, chanflags); } /* Finally, if it's changed */ if (c != NULL && c != ni->ni_chan) { IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, "switch station to %s%d channel %u/0x%x", IEEE80211_IS_CHAN_VHT(c) ? "VHT" : "HT", IEEE80211_IS_CHAN_VHT80(c) ? 80 : (IEEE80211_IS_CHAN_HT40(c) ? 40 : 20), c->ic_freq, c->ic_flags); ni->ni_chan = c; ret = 1; } /* NB: caller responsible for forcing any channel change */ done: /* update node's (11n) tx channel width */ ni->ni_chw = IEEE80211_IS_CHAN_HT40(ni->ni_chan)? 40 : 20; return (ret); } /* * Update 11n MIMO PS state according to received htcap. */ static __inline int htcap_update_mimo_ps(struct ieee80211_node *ni) { uint16_t oflags = ni->ni_flags; switch (ni->ni_htcap & IEEE80211_HTCAP_SMPS) { case IEEE80211_HTCAP_SMPS_DYNAMIC: ni->ni_flags |= IEEE80211_NODE_MIMO_PS; ni->ni_flags |= IEEE80211_NODE_MIMO_RTS; break; case IEEE80211_HTCAP_SMPS_ENA: ni->ni_flags |= IEEE80211_NODE_MIMO_PS; ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS; break; case IEEE80211_HTCAP_SMPS_OFF: default: /* disable on rx of reserved value */ ni->ni_flags &= ~IEEE80211_NODE_MIMO_PS; ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS; break; } return (oflags ^ ni->ni_flags); } /* * Update short GI state according to received htcap * and local settings. */ static __inline void htcap_update_shortgi(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; ni->ni_flags &= ~(IEEE80211_NODE_SGI20|IEEE80211_NODE_SGI40); if ((ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) && (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20)) ni->ni_flags |= IEEE80211_NODE_SGI20; if ((ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) && (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40)) ni->ni_flags |= IEEE80211_NODE_SGI40; } /* * Update LDPC state according to received htcap * and local settings. */ static __inline void htcap_update_ldpc(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; if ((ni->ni_htcap & IEEE80211_HTCAP_LDPC) && (vap->iv_flags_ht & IEEE80211_FHT_LDPC_TX)) ni->ni_flags |= IEEE80211_NODE_LDPC; } /* * Parse and update HT-related state extracted from * the HT cap and info ie's. * * This is called from the STA management path and * the ieee80211_node_join() path. It will take into * account the IEs discovered during scanning and * adjust things accordingly. */ void ieee80211_ht_updateparams(struct ieee80211_node *ni, const uint8_t *htcapie, const uint8_t *htinfoie) { struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_ie_htinfo *htinfo; ieee80211_parse_htcap(ni, htcapie); if (vap->iv_htcaps & IEEE80211_HTC_SMPS) htcap_update_mimo_ps(ni); htcap_update_shortgi(ni); htcap_update_ldpc(ni); if (htinfoie[0] == IEEE80211_ELEMID_VENDOR) htinfoie += 4; htinfo = (const struct ieee80211_ie_htinfo *) htinfoie; htinfo_parse(ni, htinfo); /* * Defer the node channel change; we need to now * update VHT parameters before we do it. */ if ((htinfo->hi_byte1 & IEEE80211_HTINFO_RIFSMODE_PERM) && (vap->iv_flags_ht & IEEE80211_FHT_RIFS)) ni->ni_flags |= IEEE80211_NODE_RIFS; else ni->ni_flags &= ~IEEE80211_NODE_RIFS; } static uint32_t ieee80211_vht_get_vhtflags(struct ieee80211_node *ni, uint32_t htflags) { struct ieee80211vap *vap = ni->ni_vap; uint32_t vhtflags = 0; vhtflags = 0; if (ni->ni_flags & IEEE80211_NODE_VHT && vap->iv_vht_flags & IEEE80211_FVHT_VHT) { if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_160MHZ) && /* XXX 2 means "160MHz and 80+80MHz", 1 means "160MHz" */ (_IEEE80211_MASKSHIFT(vap->iv_vht_cap.vht_cap_info, IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) >= 1) && (vap->iv_vht_flags & IEEE80211_FVHT_USEVHT160)) { vhtflags = IEEE80211_CHAN_VHT160; /* Mirror the HT40 flags */ if (htflags == IEEE80211_CHAN_HT40U) { vhtflags |= IEEE80211_CHAN_HT40U; } else if (htflags == IEEE80211_CHAN_HT40D) { vhtflags |= IEEE80211_CHAN_HT40D; } } else if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_80P80MHZ) && /* XXX 2 means "160MHz and 80+80MHz" */ (_IEEE80211_MASKSHIFT(vap->iv_vht_cap.vht_cap_info, IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) == 2) && (vap->iv_vht_flags & IEEE80211_FVHT_USEVHT80P80)) { vhtflags = IEEE80211_CHAN_VHT80P80; /* Mirror the HT40 flags */ if (htflags == IEEE80211_CHAN_HT40U) { vhtflags |= IEEE80211_CHAN_HT40U; } else if (htflags == IEEE80211_CHAN_HT40D) { vhtflags |= IEEE80211_CHAN_HT40D; } } else if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_80MHZ) && (vap->iv_vht_flags & IEEE80211_FVHT_USEVHT80)) { vhtflags = IEEE80211_CHAN_VHT80; /* Mirror the HT40 flags */ if (htflags == IEEE80211_CHAN_HT40U) { vhtflags |= IEEE80211_CHAN_HT40U; } else if (htflags == IEEE80211_CHAN_HT40D) { vhtflags |= IEEE80211_CHAN_HT40D; } } else if (ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_USE_HT) { /* Mirror the HT40 flags */ /* * XXX TODO: if ht40 is disabled, but vht40 isn't * disabled then this logic will get very, very sad. * It's quite possible the only sane thing to do is * to not have vht40 as an option, and just obey * 'ht40' as that flag. */ if ((htflags == IEEE80211_CHAN_HT40U) && (vap->iv_vht_flags & IEEE80211_FVHT_USEVHT40)) { vhtflags = IEEE80211_CHAN_VHT40U | IEEE80211_CHAN_HT40U; } else if (htflags == IEEE80211_CHAN_HT40D && (vap->iv_vht_flags & IEEE80211_FVHT_USEVHT40)) { vhtflags = IEEE80211_CHAN_VHT40D | IEEE80211_CHAN_HT40D; } else if (htflags == IEEE80211_CHAN_HT20) { vhtflags = IEEE80211_CHAN_VHT20 | IEEE80211_CHAN_HT20; } } else { vhtflags = IEEE80211_CHAN_VHT20; } } return (vhtflags); } /* * Final part of updating the HT parameters. * * This is called from the STA management path and * the ieee80211_node_join() path. It will take into * account the IEs discovered during scanning and * adjust things accordingly. * * This is done after a call to ieee80211_ht_updateparams() * because it (and the upcoming VHT version of updateparams) * needs to ensure everything is parsed before htinfo_update_chw() * is called - which will change the channel config for the * node for us. */ int ieee80211_ht_updateparams_final(struct ieee80211_node *ni, const uint8_t *htcapie, const uint8_t *htinfoie) { struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_ie_htinfo *htinfo; int htflags, vhtflags; int ret = 0; htinfo = (const struct ieee80211_ie_htinfo *) htinfoie; htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ? IEEE80211_CHAN_HT20 : 0; /* NB: honor operating mode constraint */ if ((htinfo->hi_byte1 & IEEE80211_HTINFO_TXWIDTH_2040) && (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)) { if (ni->ni_ht2ndchan == IEEE80211_HTINFO_2NDCHAN_ABOVE) htflags = IEEE80211_CHAN_HT40U; else if (ni->ni_ht2ndchan == IEEE80211_HTINFO_2NDCHAN_BELOW) htflags = IEEE80211_CHAN_HT40D; } /* * VHT flags - do much the same; check whether VHT is available * and if so, what our ideal channel use would be based on our * capabilities and the (pre-parsed) VHT info IE. */ vhtflags = ieee80211_vht_get_vhtflags(ni, htflags); if (htinfo_update_chw(ni, htflags, vhtflags)) ret = 1; return (ret); } /* * Parse and update HT-related state extracted from the HT cap ie * for a station joining an HT BSS. * * This is called from the hostap path for each station. */ void ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie) { struct ieee80211vap *vap = ni->ni_vap; ieee80211_parse_htcap(ni, htcapie); if (vap->iv_htcaps & IEEE80211_HTC_SMPS) htcap_update_mimo_ps(ni); htcap_update_shortgi(ni); htcap_update_ldpc(ni); } /* * Called once HT and VHT capabilities are parsed in hostap mode - * this will adjust the channel configuration of the given node * based on the configuration and capabilities. */ void ieee80211_ht_updatehtcap_final(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; int htflags; int vhtflags; /* NB: honor operating mode constraint */ /* XXX 40 MHz intolerant */ htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ? IEEE80211_CHAN_HT20 : 0; if ((ni->ni_htcap & IEEE80211_HTCAP_CHWIDTH40) && (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)) { if (IEEE80211_IS_CHAN_HT40U(vap->iv_bss->ni_chan)) htflags = IEEE80211_CHAN_HT40U; else if (IEEE80211_IS_CHAN_HT40D(vap->iv_bss->ni_chan)) htflags = IEEE80211_CHAN_HT40D; } /* * VHT flags - do much the same; check whether VHT is available * and if so, what our ideal channel use would be based on our * capabilities and the (pre-parsed) VHT info IE. */ vhtflags = ieee80211_vht_get_vhtflags(ni, htflags); (void) htinfo_update_chw(ni, htflags, vhtflags); } /* * Install received HT rate set by parsing the HT cap ie. */ int ieee80211_setup_htrates(struct ieee80211_node *ni, const uint8_t *ie, int flags) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_ie_htcap *htcap; struct ieee80211_htrateset *rs; int i, maxequalmcs, maxunequalmcs; maxequalmcs = ic->ic_txstream * 8 - 1; maxunequalmcs = 0; if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) { if (ic->ic_txstream >= 2) maxunequalmcs = 38; if (ic->ic_txstream >= 3) maxunequalmcs = 52; if (ic->ic_txstream >= 4) maxunequalmcs = 76; } rs = &ni->ni_htrates; memset(rs, 0, sizeof(*rs)); if (ie != NULL) { if (ie[0] == IEEE80211_ELEMID_VENDOR) ie += 4; htcap = (const struct ieee80211_ie_htcap *) ie; for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) { if (isclr(htcap->hc_mcsset, i)) continue; if (rs->rs_nrates == IEEE80211_HTRATE_MAXSIZE) { IEEE80211_NOTE(vap, IEEE80211_MSG_XRATE | IEEE80211_MSG_11N, ni, "WARNING, HT rate set too large; only " "using %u rates", IEEE80211_HTRATE_MAXSIZE); vap->iv_stats.is_rx_rstoobig++; break; } if (i <= 31 && i > maxequalmcs) continue; if (i == 32 && (ic->ic_htcaps & IEEE80211_HTC_TXMCS32) == 0) continue; if (i > 32 && i > maxunequalmcs) continue; rs->rs_rates[rs->rs_nrates++] = i; } } return ieee80211_fix_rate(ni, (struct ieee80211_rateset *) rs, flags); } /* * Mark rates in a node's HT rate set as basic according * to the information in the supplied HT info ie. */ void ieee80211_setup_basic_htrates(struct ieee80211_node *ni, const uint8_t *ie) { const struct ieee80211_ie_htinfo *htinfo; struct ieee80211_htrateset *rs; int i, j; if (ie[0] == IEEE80211_ELEMID_VENDOR) ie += 4; htinfo = (const struct ieee80211_ie_htinfo *) ie; rs = &ni->ni_htrates; if (rs->rs_nrates == 0) { IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_XRATE | IEEE80211_MSG_11N, ni, "%s", "WARNING, empty HT rate set"); return; } for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) { if (isclr(htinfo->hi_basicmcsset, i)) continue; for (j = 0; j < rs->rs_nrates; j++) if ((rs->rs_rates[j] & IEEE80211_RATE_VAL) == i) rs->rs_rates[j] |= IEEE80211_RATE_BASIC; } } static void ampdu_tx_setup(struct ieee80211_tx_ampdu *tap) { callout_init(&tap->txa_timer, 1); tap->txa_flags |= IEEE80211_AGGR_SETUP; tap->txa_lastsample = ticks; } static void ampdu_tx_stop(struct ieee80211_tx_ampdu *tap) { struct ieee80211_node *ni = tap->txa_ni; struct ieee80211com *ic = ni->ni_ic; IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N, tap->txa_ni, "%s: called", __func__); KASSERT(tap->txa_flags & IEEE80211_AGGR_SETUP, ("txa_flags 0x%x tid %d ac %d", tap->txa_flags, tap->txa_tid, TID_TO_WME_AC(tap->txa_tid))); /* * Stop BA stream if setup so driver has a chance * to reclaim any resources it might have allocated. */ ic->ic_addba_stop(ni, tap); /* * Stop any pending BAR transmit. */ bar_stop_timer(tap); /* * Reset packet estimate. */ ieee80211_txampdu_init_pps(tap); /* NB: clearing NAK means we may re-send ADDBA */ tap->txa_flags &= ~(IEEE80211_AGGR_SETUP | IEEE80211_AGGR_NAK); } /* * ADDBA response timeout. * * If software aggregation and per-TID queue management was done here, * that queue would be unpaused after the ADDBA timeout occurs. */ static void addba_timeout(void *arg) { struct ieee80211_tx_ampdu *tap = arg; struct ieee80211_node *ni = tap->txa_ni; struct ieee80211com *ic = ni->ni_ic; /* XXX ? */ tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND; tap->txa_attempts++; ic->ic_addba_response_timeout(ni, tap); } static void addba_start_timeout(struct ieee80211_tx_ampdu *tap) { /* XXX use CALLOUT_PENDING instead? */ callout_reset(&tap->txa_timer, ieee80211_addba_timeout, addba_timeout, tap); tap->txa_flags |= IEEE80211_AGGR_XCHGPEND; tap->txa_nextrequest = ticks + ieee80211_addba_timeout; } static void addba_stop_timeout(struct ieee80211_tx_ampdu *tap) { /* XXX use CALLOUT_PENDING instead? */ if (tap->txa_flags & IEEE80211_AGGR_XCHGPEND) { callout_stop(&tap->txa_timer); tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND; } } static void null_addba_response_timeout(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { } /* * Default method for requesting A-MPDU tx aggregation. * We setup the specified state block and start a timer * to wait for an ADDBA response frame. */ static int ieee80211_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int dialogtoken, int baparamset, int batimeout) { int bufsiz; /* XXX locking */ tap->txa_token = dialogtoken; tap->txa_flags |= IEEE80211_AGGR_IMMEDIATE; bufsiz = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_BUFSIZ); tap->txa_wnd = (bufsiz == 0) ? IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX); addba_start_timeout(tap); return 1; } /* * Called by drivers that wish to request an ADDBA session be * setup. This brings it up and starts the request timer. */ int ieee80211_ampdu_tx_request_ext(struct ieee80211_node *ni, int tid) { struct ieee80211_tx_ampdu *tap; if (tid < 0 || tid > 15) return (0); tap = &ni->ni_tx_ampdu[tid]; /* XXX locking */ if ((tap->txa_flags & IEEE80211_AGGR_SETUP) == 0) { /* do deferred setup of state */ ampdu_tx_setup(tap); } /* XXX hack for not doing proper locking */ tap->txa_flags &= ~IEEE80211_AGGR_NAK; addba_start_timeout(tap); return (1); } /* * Called by drivers that have marked a session as active. */ int ieee80211_ampdu_tx_request_active_ext(struct ieee80211_node *ni, int tid, int status) { struct ieee80211_tx_ampdu *tap; if (tid < 0 || tid > 15) return (0); tap = &ni->ni_tx_ampdu[tid]; /* XXX locking */ addba_stop_timeout(tap); if (status == 1) { tap->txa_flags |= IEEE80211_AGGR_RUNNING; tap->txa_attempts = 0; } else { /* mark tid so we don't try again */ tap->txa_flags |= IEEE80211_AGGR_NAK; } return (1); } /* * Default method for processing an A-MPDU tx aggregation * response. We shutdown any pending timer and update the * state block according to the reply. */ static int ieee80211_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int status, int baparamset, int batimeout) { struct ieee80211vap *vap = ni->ni_vap; int bufsiz; /* XXX locking */ addba_stop_timeout(tap); if (status == IEEE80211_STATUS_SUCCESS) { bufsiz = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_BUFSIZ); /* XXX override our request? */ tap->txa_wnd = (bufsiz == 0) ? IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX); #ifdef __notyet__ tid = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_TID); #endif tap->txa_flags |= IEEE80211_AGGR_RUNNING; tap->txa_attempts = 0; /* TODO: this should be a vap flag */ if ((vap->iv_htcaps & IEEE80211_HTC_TX_AMSDU_AMPDU) && (ni->ni_flags & IEEE80211_NODE_AMSDU_TX) && (_IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_AMSDU))) tap->txa_flags |= IEEE80211_AGGR_AMSDU; else tap->txa_flags &= ~IEEE80211_AGGR_AMSDU; } else { /* mark tid so we don't try again */ tap->txa_flags |= IEEE80211_AGGR_NAK; } return 1; } /* * Default method for stopping A-MPDU tx aggregation. * Any timer is cleared and we drain any pending frames. */ static void ieee80211_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { /* XXX locking */ addba_stop_timeout(tap); if (tap->txa_flags & IEEE80211_AGGR_RUNNING) { /* XXX clear aggregation queue */ tap->txa_flags &= ~(IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_AMSDU); } tap->txa_attempts = 0; } /* * Process a received action frame using the default aggregation * policy. We intercept ADDBA-related frames and use them to * update our aggregation state. All other frames are passed up * for processing by ieee80211_recv_action. */ static int ht_recv_action_ba_addba_request(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_rx_ampdu *rap; uint8_t dialogtoken; uint16_t baparamset, batimeout, baseqctl; uint16_t args[5]; int tid; dialogtoken = frm[2]; baparamset = le16dec(frm+3); batimeout = le16dec(frm+5); baseqctl = le16dec(frm+7); tid = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_TID); IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "recv ADDBA request: dialogtoken %u baparamset 0x%x " "(tid %d bufsiz %d) batimeout %d baseqctl %d:%d amsdu %d", dialogtoken, baparamset, tid, _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_BUFSIZ), batimeout, _IEEE80211_MASKSHIFT(baseqctl, IEEE80211_BASEQ_START), _IEEE80211_MASKSHIFT(baseqctl, IEEE80211_BASEQ_FRAG), _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_AMSDU)); rap = &ni->ni_rx_ampdu[tid]; /* Send ADDBA response */ args[0] = dialogtoken; /* * NB: We ack only if the sta associated with HT and * the ap is configured to do AMPDU rx (the latter * violates the 11n spec and is mostly for testing). */ if ((ni->ni_flags & IEEE80211_NODE_AMPDU_RX) && (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_RX)) { /* XXX TODO: handle ampdu_rx_start failure */ ic->ic_ampdu_rx_start(ni, rap, baparamset, batimeout, baseqctl); args[1] = IEEE80211_STATUS_SUCCESS; } else { IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "reject ADDBA request: %s", ni->ni_flags & IEEE80211_NODE_AMPDU_RX ? "administratively disabled" : "not negotiated for station"); vap->iv_stats.is_addba_reject++; args[1] = IEEE80211_STATUS_UNSPECIFIED; } /* XXX honor rap flags? */ args[2] = IEEE80211_BAPS_POLICY_IMMEDIATE | _IEEE80211_SHIFTMASK(tid, IEEE80211_BAPS_TID) | _IEEE80211_SHIFTMASK(rap->rxa_wnd, IEEE80211_BAPS_BUFSIZ) ; /* * TODO: we're out of iv_flags_ht fields; once * this is extended we should make this configurable. */ if ((baparamset & IEEE80211_BAPS_AMSDU) && (ni->ni_flags & IEEE80211_NODE_AMSDU_RX) && (vap->iv_htcaps & IEEE80211_HTC_RX_AMSDU_AMPDU)) args[2] |= IEEE80211_BAPS_AMSDU; args[3] = 0; args[4] = 0; ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_RESPONSE, args); return 0; } static int ht_recv_action_ba_addba_response(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_tx_ampdu *tap; uint8_t dialogtoken, policy; uint16_t baparamset, batimeout, code; int tid; #ifdef IEEE80211_DEBUG int amsdu, bufsiz; #endif dialogtoken = frm[2]; code = le16dec(frm+3); baparamset = le16dec(frm+5); tid = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_TID); #ifdef IEEE80211_DEBUG bufsiz = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_BUFSIZ); amsdu = !! _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_AMSDU); #endif policy = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_BAPS_POLICY); batimeout = le16dec(frm+7); tap = &ni->ni_tx_ampdu[tid]; if ((tap->txa_flags & IEEE80211_AGGR_XCHGPEND) == 0) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "no pending ADDBA, tid %d dialogtoken %u " "code %d", tid, dialogtoken, code); vap->iv_stats.is_addba_norequest++; return 0; } if (dialogtoken != tap->txa_token) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "dialogtoken mismatch: waiting for %d, " "received %d, tid %d code %d", tap->txa_token, dialogtoken, tid, code); vap->iv_stats.is_addba_badtoken++; return 0; } /* NB: assumes IEEE80211_AGGR_IMMEDIATE is 1 */ if (policy != (tap->txa_flags & IEEE80211_AGGR_IMMEDIATE)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "policy mismatch: expecting %s, " "received %s, tid %d code %d", tap->txa_flags & IEEE80211_AGGR_IMMEDIATE, policy, tid, code); vap->iv_stats.is_addba_badpolicy++; return 0; } #if 0 /* XXX we take MIN in ieee80211_addba_response */ if (bufsiz > IEEE80211_AGGR_BAWMAX) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "BA window too large: max %d, " "received %d, tid %d code %d", bufsiz, IEEE80211_AGGR_BAWMAX, tid, code); vap->iv_stats.is_addba_badbawinsize++; return 0; } #endif IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "recv ADDBA response: dialogtoken %u code %d " "baparamset 0x%x (tid %d bufsiz %d amsdu %d) batimeout %d", dialogtoken, code, baparamset, tid, bufsiz, amsdu, batimeout); ic->ic_addba_response(ni, tap, code, baparamset, batimeout); return 0; } static int ht_recv_action_ba_delba(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211_rx_ampdu *rap; struct ieee80211_tx_ampdu *tap; uint16_t baparamset; #ifdef IEEE80211_DEBUG uint16_t code; #endif int tid; baparamset = le16dec(frm+2); #ifdef IEEE80211_DEBUG code = le16dec(frm+4); #endif tid = _IEEE80211_MASKSHIFT(baparamset, IEEE80211_DELBAPS_TID); IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "recv DELBA: baparamset 0x%x (tid %d initiator %d) " "code %d", baparamset, tid, _IEEE80211_MASKSHIFT(baparamset, IEEE80211_DELBAPS_INIT), code); if ((baparamset & IEEE80211_DELBAPS_INIT) == 0) { tap = &ni->ni_tx_ampdu[tid]; ic->ic_addba_stop(ni, tap); } else { rap = &ni->ni_rx_ampdu[tid]; ic->ic_ampdu_rx_stop(ni, rap); } return 0; } static int ht_recv_action_ht_txchwidth(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { int chw; chw = (frm[2] == IEEE80211_A_HT_TXCHWIDTH_2040) ? 40 : 20; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: HT txchwidth, width %d%s", __func__, chw, ni->ni_chw != chw ? "*" : ""); if (chw != ni->ni_chw) { /* XXX does this need to change the ht40 station count? */ ni->ni_chw = chw; /* XXX notify on change */ } return 0; } static int ht_recv_action_ht_mimopwrsave(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { const struct ieee80211_action_ht_mimopowersave *mps = (const struct ieee80211_action_ht_mimopowersave *) frm; /* XXX check iv_htcaps */ if (mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_ENA) ni->ni_flags |= IEEE80211_NODE_MIMO_PS; else ni->ni_flags &= ~IEEE80211_NODE_MIMO_PS; if (mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_MODE) ni->ni_flags |= IEEE80211_NODE_MIMO_RTS; else ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS; /* XXX notify on change */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: HT MIMO PS (%s%s)", __func__, (ni->ni_flags & IEEE80211_NODE_MIMO_PS) ? "on" : "off", (ni->ni_flags & IEEE80211_NODE_MIMO_RTS) ? "+rts" : "" ); return 0; } /* * Transmit processing. */ /* * Check if A-MPDU should be requested/enabled for a stream. * We require a traffic rate above a per-AC threshold and we * also handle backoff from previous failed attempts. * * Drivers may override this method to bring in information * such as link state conditions in making the decision. */ static int ieee80211_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { struct ieee80211vap *vap = ni->ni_vap; if (tap->txa_avgpps < vap->iv_ampdu_mintraffic[TID_TO_WME_AC(tap->txa_tid)]) return 0; /* XXX check rssi? */ if (tap->txa_attempts >= ieee80211_addba_maxtries && ieee80211_time_after(ticks, tap->txa_nextrequest)) { /* * Don't retry too often; txa_nextrequest is set * to the minimum interval we'll retry after * ieee80211_addba_maxtries failed attempts are made. */ return 0; } IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni, "enable AMPDU on tid %d (%s), avgpps %d pkts %d attempt %d", tap->txa_tid, ieee80211_wme_acnames[TID_TO_WME_AC(tap->txa_tid)], tap->txa_avgpps, tap->txa_pkts, tap->txa_attempts); return 1; } /* * Request A-MPDU tx aggregation. Setup local state and * issue an ADDBA request. BA use will only happen after * the other end replies with ADDBA response. */ int ieee80211_ampdu_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { struct ieee80211com *ic = ni->ni_ic; uint16_t args[5]; int tid, dialogtoken; static int tokens = 0; /* XXX */ /* XXX locking */ if ((tap->txa_flags & IEEE80211_AGGR_SETUP) == 0) { /* do deferred setup of state */ ampdu_tx_setup(tap); } /* XXX hack for not doing proper locking */ tap->txa_flags &= ~IEEE80211_AGGR_NAK; dialogtoken = (tokens+1) % 63; /* XXX */ tid = tap->txa_tid; /* * XXX TODO: This is racy with any other parallel TX going on. :( */ tap->txa_start = ni->ni_txseqs[tid]; args[0] = dialogtoken; args[1] = 0; /* NB: status code not used */ args[2] = IEEE80211_BAPS_POLICY_IMMEDIATE | _IEEE80211_SHIFTMASK(tid, IEEE80211_BAPS_TID) | _IEEE80211_SHIFTMASK(IEEE80211_AGGR_BAWMAX, IEEE80211_BAPS_BUFSIZ) ; /* XXX TODO: this should be a flag, not iv_htcaps */ if ((ni->ni_flags & IEEE80211_NODE_AMSDU_TX) && (ni->ni_vap->iv_htcaps & IEEE80211_HTC_TX_AMSDU_AMPDU)) args[2] |= IEEE80211_BAPS_AMSDU; args[3] = 0; /* batimeout */ /* NB: do first so there's no race against reply */ if (!ic->ic_addba_request(ni, tap, dialogtoken, args[2], args[3])) { /* unable to setup state, don't make request */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: could not setup BA stream for TID %d AC %d", __func__, tap->txa_tid, TID_TO_WME_AC(tap->txa_tid)); /* defer next try so we don't slam the driver with requests */ tap->txa_attempts = ieee80211_addba_maxtries; /* NB: check in case driver wants to override */ if (tap->txa_nextrequest <= ticks) tap->txa_nextrequest = ticks + ieee80211_addba_backoff; return 0; } tokens = dialogtoken; /* allocate token */ /* NB: after calling ic_addba_request so driver can set txa_start */ args[4] = _IEEE80211_SHIFTMASK(tap->txa_start, IEEE80211_BASEQ_START) | _IEEE80211_SHIFTMASK(0, IEEE80211_BASEQ_FRAG) ; return ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_REQUEST, args); } /* * Terminate an AMPDU tx stream. State is reclaimed * and the peer notified with a DelBA Action frame. */ void ieee80211_ampdu_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int reason) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; uint16_t args[4]; /* XXX locking */ tap->txa_flags &= ~IEEE80211_AGGR_BARPEND; if (IEEE80211_AMPDU_RUNNING(tap)) { IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: stop BA stream for TID %d (reason: %d (%s))", __func__, tap->txa_tid, reason, ieee80211_reason_to_string(reason)); vap->iv_stats.is_ampdu_stop++; ic->ic_addba_stop(ni, tap); args[0] = tap->txa_tid; args[1] = IEEE80211_DELBAPS_INIT; args[2] = reason; /* XXX reason code */ ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_DELBA, args); } else { IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: BA stream for TID %d not running " "(reason: %d (%s))", __func__, tap->txa_tid, reason, ieee80211_reason_to_string(reason)); vap->iv_stats.is_ampdu_stop_failed++; } } /* XXX */ static void bar_start_timer(struct ieee80211_tx_ampdu *tap); static void bar_timeout(void *arg) { struct ieee80211_tx_ampdu *tap = arg; struct ieee80211_node *ni = tap->txa_ni; KASSERT((tap->txa_flags & IEEE80211_AGGR_XCHGPEND) == 0, ("bar/addba collision, flags 0x%x", tap->txa_flags)); IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: tid %u flags 0x%x attempts %d", __func__, tap->txa_tid, tap->txa_flags, tap->txa_attempts); /* guard against race with bar_tx_complete */ if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) == 0) return; /* XXX ? */ if (tap->txa_attempts >= ieee80211_bar_maxtries) { struct ieee80211com *ic = ni->ni_ic; ni->ni_vap->iv_stats.is_ampdu_bar_tx_fail++; /* * If (at least) the last BAR TX timeout was due to * an ieee80211_send_bar() failures, then we need * to make sure we notify the driver that a BAR * TX did occur and fail. This gives the driver * a chance to undo any queue pause that may * have occurred. */ ic->ic_bar_response(ni, tap, 1); ieee80211_ampdu_stop(ni, tap, IEEE80211_REASON_TIMEOUT); } else { ni->ni_vap->iv_stats.is_ampdu_bar_tx_retry++; if (ieee80211_send_bar(ni, tap, tap->txa_seqpending) != 0) { IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: failed to TX, starting timer\n", __func__); /* * If ieee80211_send_bar() fails here, the * timer may have stopped and/or the pending * flag may be clear. Because of this, * fake the BARPEND and reset the timer. * A retransmission attempt will then occur * during the next timeout. */ /* XXX locking */ tap->txa_flags |= IEEE80211_AGGR_BARPEND; bar_start_timer(tap); } } } static void bar_start_timer(struct ieee80211_tx_ampdu *tap) { IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N, tap->txa_ni, "%s: called", __func__); callout_reset(&tap->txa_timer, ieee80211_bar_timeout, bar_timeout, tap); } static void bar_stop_timer(struct ieee80211_tx_ampdu *tap) { IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N, tap->txa_ni, "%s: called", __func__); callout_stop(&tap->txa_timer); } static void bar_tx_complete(struct ieee80211_node *ni, void *arg, int status) { struct ieee80211_tx_ampdu *tap = arg; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: tid %u flags 0x%x pending %d status %d", __func__, tap->txa_tid, tap->txa_flags, callout_pending(&tap->txa_timer), status); ni->ni_vap->iv_stats.is_ampdu_bar_tx++; /* XXX locking */ if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) && callout_pending(&tap->txa_timer)) { struct ieee80211com *ic = ni->ni_ic; if (status == 0) /* ACK'd */ bar_stop_timer(tap); ic->ic_bar_response(ni, tap, status); /* NB: just let timer expire so we pace requests */ } } static void ieee80211_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int status) { IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N, tap->txa_ni, "%s: called", __func__); if (status == 0) { /* got ACK */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "BAR moves BA win <%u:%u> (%u frames) txseq %u tid %u", tap->txa_start, IEEE80211_SEQ_ADD(tap->txa_start, tap->txa_wnd-1), tap->txa_qframes, tap->txa_seqpending, tap->txa_tid); /* NB: timer already stopped in bar_tx_complete */ tap->txa_start = tap->txa_seqpending; tap->txa_flags &= ~IEEE80211_AGGR_BARPEND; } } /* * Transmit a BAR frame to the specified node. The * BAR contents are drawn from the supplied aggregation * state associated with the node. * * NB: we only handle immediate ACK w/ compressed bitmap. */ int ieee80211_send_bar(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, ieee80211_seq seq) { #define senderr(_x, _v) do { vap->iv_stats._v++; ret = _x; goto bad; } while (0) struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_frame_bar *bar; struct mbuf *m; uint16_t barctl, barseqctl; uint8_t *frm; int tid, ret; IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N, tap->txa_ni, "%s: called", __func__); if ((tap->txa_flags & IEEE80211_AGGR_RUNNING) == 0) { /* no ADDBA response, should not happen */ /* XXX stat+msg */ return EINVAL; } /* XXX locking */ bar_stop_timer(tap); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom, sizeof(*bar)); if (m == NULL) senderr(ENOMEM, is_tx_nobuf); if (!ieee80211_add_callback(m, bar_tx_complete, tap)) { m_freem(m); senderr(ENOMEM, is_tx_nobuf); /* XXX */ /* NOTREACHED */ } bar = mtod(m, struct ieee80211_frame_bar *); bar->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_CTL | IEEE80211_FC0_SUBTYPE_BAR; bar->i_fc[1] = 0; IEEE80211_ADDR_COPY(bar->i_ra, ni->ni_macaddr); IEEE80211_ADDR_COPY(bar->i_ta, vap->iv_myaddr); tid = tap->txa_tid; barctl = (tap->txa_flags & IEEE80211_AGGR_IMMEDIATE ? 0 : IEEE80211_BAR_NOACK) | IEEE80211_BAR_COMP | _IEEE80211_SHIFTMASK(tid, IEEE80211_BAR_TID) ; barseqctl = _IEEE80211_SHIFTMASK(seq, IEEE80211_BAR_SEQ_START); /* NB: known to have proper alignment */ bar->i_ctl = htole16(barctl); bar->i_seq = htole16(barseqctl); m->m_pkthdr.len = m->m_len = sizeof(struct ieee80211_frame_bar); M_WME_SETAC(m, WME_AC_VO); IEEE80211_NODE_STAT(ni, tx_mgmt); /* XXX tx_ctl? */ /* XXX locking */ /* init/bump attempts counter */ if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) == 0) tap->txa_attempts = 1; else tap->txa_attempts++; tap->txa_seqpending = seq; tap->txa_flags |= IEEE80211_AGGR_BARPEND; IEEE80211_NOTE(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_11N, ni, "send BAR: tid %u ctl 0x%x start %u (attempt %d)", tid, barctl, seq, tap->txa_attempts); /* * ic_raw_xmit will free the node reference * regardless of queue/TX success or failure. */ IEEE80211_TX_LOCK(ic); ret = ieee80211_raw_output(vap, ni, m, NULL); IEEE80211_TX_UNLOCK(ic); if (ret != 0) { IEEE80211_NOTE(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_11N, ni, "send BAR: failed: (ret = %d)\n", ret); /* xmit failed, clear state flag */ tap->txa_flags &= ~IEEE80211_AGGR_BARPEND; vap->iv_stats.is_ampdu_bar_tx_fail++; return ret; } /* XXX hack against tx complete happening before timer is started */ if (tap->txa_flags & IEEE80211_AGGR_BARPEND) bar_start_timer(tap); return 0; bad: IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N, tap->txa_ni, "%s: bad! ret=%d", __func__, ret); vap->iv_stats.is_ampdu_bar_tx_fail++; ieee80211_free_node(ni); return ret; #undef senderr } static int ht_action_output(struct ieee80211_node *ni, struct mbuf *m) { struct ieee80211_bpf_params params; memset(¶ms, 0, sizeof(params)); params.ibp_pri = WME_AC_VO; params.ibp_rate0 = ni->ni_txparms->mgmtrate; /* NB: we know all frames are unicast */ params.ibp_try0 = ni->ni_txparms->maxretry; params.ibp_power = ni->ni_txpower; return ieee80211_mgmt_output(ni, m, IEEE80211_FC0_SUBTYPE_ACTION, ¶ms); } #define ADDSHORT(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = (v) >> 8; \ frm += 2; \ } while (0) /* * Send an action management frame. The arguments are stuff * into a frame without inspection; the caller is assumed to * prepare them carefully (e.g. based on the aggregation state). */ static int ht_send_action_ba_addba(struct ieee80211_node *ni, int category, int action, void *arg0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint16_t *args = arg0; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "send ADDBA %s: dialogtoken %d status %d " "baparamset 0x%x (tid %d amsdu %d) batimeout 0x%x baseqctl 0x%x", (action == IEEE80211_ACTION_BA_ADDBA_REQUEST) ? "request" : "response", args[0], args[1], args[2], _IEEE80211_MASKSHIFT(args[2], IEEE80211_BAPS_TID), _IEEE80211_MASKSHIFT(args[2], IEEE80211_BAPS_AMSDU), args[3], args[4]); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ /* XXX may action payload */ + sizeof(struct ieee80211_action_ba_addbaresponse) ); if (m != NULL) { *frm++ = category; *frm++ = action; *frm++ = args[0]; /* dialog token */ if (action == IEEE80211_ACTION_BA_ADDBA_RESPONSE) ADDSHORT(frm, args[1]); /* status code */ ADDSHORT(frm, args[2]); /* baparamset */ ADDSHORT(frm, args[3]); /* batimeout */ if (action == IEEE80211_ACTION_BA_ADDBA_REQUEST) ADDSHORT(frm, args[4]); /* baseqctl */ m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return ht_action_output(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int ht_send_action_ba_delba(struct ieee80211_node *ni, int category, int action, void *arg0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint16_t *args = arg0; struct mbuf *m; uint16_t baparamset; uint8_t *frm; baparamset = _IEEE80211_SHIFTMASK(args[0], IEEE80211_DELBAPS_TID) | args[1] ; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "send DELBA action: tid %d, initiator %d reason %d (%s)", args[0], args[1], args[2], ieee80211_reason_to_string(args[2])); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ /* XXX may action payload */ + sizeof(struct ieee80211_action_ba_addbaresponse) ); if (m != NULL) { *frm++ = category; *frm++ = action; ADDSHORT(frm, baparamset); ADDSHORT(frm, args[2]); /* reason code */ m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return ht_action_output(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int ht_send_action_ht_txchwidth(struct ieee80211_node *ni, int category, int action, void *arg0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "send HT txchwidth: width %d", IEEE80211_IS_CHAN_HT40(ni->ni_chan) ? 40 : 20); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ /* XXX may action payload */ + sizeof(struct ieee80211_action_ba_addbaresponse) ); if (m != NULL) { *frm++ = category; *frm++ = action; *frm++ = IEEE80211_IS_CHAN_HT40(ni->ni_chan) ? IEEE80211_A_HT_TXCHWIDTH_2040 : IEEE80211_A_HT_TXCHWIDTH_20; m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return ht_action_output(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } #undef ADDSHORT /* * Construct the MCS bit mask for inclusion in an HT capabilities * information element. */ static void ieee80211_set_mcsset(struct ieee80211com *ic, uint8_t *frm) { int i; uint8_t txparams; KASSERT((ic->ic_rxstream > 0 && ic->ic_rxstream <= 4), ("ic_rxstream %d out of range", ic->ic_rxstream)); KASSERT((ic->ic_txstream > 0 && ic->ic_txstream <= 4), ("ic_txstream %d out of range", ic->ic_txstream)); for (i = 0; i < ic->ic_rxstream * 8; i++) setbit(frm, i); if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) && (ic->ic_htcaps & IEEE80211_HTC_RXMCS32)) setbit(frm, 32); if (ic->ic_htcaps & IEEE80211_HTC_RXUNEQUAL) { if (ic->ic_rxstream >= 2) { for (i = 33; i <= 38; i++) setbit(frm, i); } if (ic->ic_rxstream >= 3) { for (i = 39; i <= 52; i++) setbit(frm, i); } if (ic->ic_rxstream >= 4) { for (i = 53; i <= 76; i++) setbit(frm, i); } } txparams = 0x1; /* TX MCS set defined */ if (ic->ic_rxstream != ic->ic_txstream) { txparams |= 0x2; /* TX RX MCS not equal */ txparams |= (ic->ic_txstream - 1) << 2; /* num TX streams */ if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) txparams |= 0x16; /* TX unequal modulation sup */ } frm[12] = txparams; } /* * Add body of an HTCAP information element. */ static uint8_t * ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni) { #define ADDSHORT(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = (v) >> 8; \ frm += 2; \ } while (0) struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; uint16_t caps, extcaps; int rxmax, density; /* HT capabilities */ caps = vap->iv_htcaps & 0xffff; /* * Note channel width depends on whether we are operating as * a sta or not. When operating as a sta we are generating * a request based on our desired configuration. Otherwise * we are operational and the channel attributes identify * how we've been setup (which might be different if a fixed * channel is specified). */ if (vap->iv_opmode == IEEE80211_M_STA) { /* override 20/40 use based on config */ if (vap->iv_flags_ht & IEEE80211_FHT_USEHT40) caps |= IEEE80211_HTCAP_CHWIDTH40; else caps &= ~IEEE80211_HTCAP_CHWIDTH40; /* Start by using the advertised settings */ rxmax = _IEEE80211_MASKSHIFT(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU); density = _IEEE80211_MASKSHIFT(ni->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY); IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N, "%s: advertised rxmax=%d, density=%d, vap rxmax=%d, density=%d\n", __func__, rxmax, density, vap->iv_ampdu_rxmax, vap->iv_ampdu_density); /* Cap at VAP rxmax */ if (rxmax > vap->iv_ampdu_rxmax) rxmax = vap->iv_ampdu_rxmax; /* * If the VAP ampdu density value greater, use that. * * (Larger density value == larger minimum gap between A-MPDU * subframes.) */ if (vap->iv_ampdu_density > density) density = vap->iv_ampdu_density; /* * NB: Hardware might support HT40 on some but not all * channels. We can't determine this earlier because only * after association the channel is upgraded to HT based * on the negotiated capabilities. */ if (ni->ni_chan != IEEE80211_CHAN_ANYC && findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40U) == NULL && findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40D) == NULL) caps &= ~IEEE80211_HTCAP_CHWIDTH40; } else { /* override 20/40 use based on current channel */ if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) caps |= IEEE80211_HTCAP_CHWIDTH40; else caps &= ~IEEE80211_HTCAP_CHWIDTH40; /* XXX TODO should it start by using advertised settings? */ rxmax = vap->iv_ampdu_rxmax; density = vap->iv_ampdu_density; } /* adjust short GI based on channel and config */ if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0) caps &= ~IEEE80211_HTCAP_SHORTGI20; if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0 || (caps & IEEE80211_HTCAP_CHWIDTH40) == 0) caps &= ~IEEE80211_HTCAP_SHORTGI40; /* adjust STBC based on receive capabilities */ if ((vap->iv_flags_ht & IEEE80211_FHT_STBC_RX) == 0) caps &= ~IEEE80211_HTCAP_RXSTBC; /* adjust LDPC based on receive capabilites */ if ((vap->iv_flags_ht & IEEE80211_FHT_LDPC_RX) == 0) caps &= ~IEEE80211_HTCAP_LDPC; ADDSHORT(frm, caps); /* HT parameters */ *frm = _IEEE80211_SHIFTMASK(rxmax, IEEE80211_HTCAP_MAXRXAMPDU) | _IEEE80211_SHIFTMASK(density, IEEE80211_HTCAP_MPDUDENSITY) ; frm++; /* pre-zero remainder of ie */ memset(frm, 0, sizeof(struct ieee80211_ie_htcap) - __offsetof(struct ieee80211_ie_htcap, hc_mcsset)); /* supported MCS set */ /* * XXX: For sta mode the rate set should be restricted based * on the AP's capabilities, but ni_htrates isn't setup when * we're called to form an AssocReq frame so for now we're * restricted to the device capabilities. */ ieee80211_set_mcsset(ni->ni_ic, frm); frm += __offsetof(struct ieee80211_ie_htcap, hc_extcap) - __offsetof(struct ieee80211_ie_htcap, hc_mcsset); /* HT extended capabilities */ extcaps = vap->iv_htextcaps & 0xffff; ADDSHORT(frm, extcaps); frm += sizeof(struct ieee80211_ie_htcap) - __offsetof(struct ieee80211_ie_htcap, hc_txbf); return frm; #undef ADDSHORT } /* * Add 802.11n HT capabilities information element */ uint8_t * ieee80211_add_htcap(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_HTCAP; frm[1] = sizeof(struct ieee80211_ie_htcap) - 2; return ieee80211_add_htcap_body(frm + 2, ni); } /* * Non-associated probe request - add HT capabilities based on * the current channel configuration. */ static uint8_t * ieee80211_add_htcap_body_ch(uint8_t *frm, struct ieee80211vap *vap, struct ieee80211_channel *c) { #define ADDSHORT(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = (v) >> 8; \ frm += 2; \ } while (0) struct ieee80211com *ic = vap->iv_ic; uint16_t caps, extcaps; int rxmax, density; /* HT capabilities */ caps = vap->iv_htcaps & 0xffff; /* * We don't use this in STA mode; only in IBSS mode. * So in IBSS mode we base our HTCAP flags on the * given channel. */ /* override 20/40 use based on current channel */ if (IEEE80211_IS_CHAN_HT40(c)) caps |= IEEE80211_HTCAP_CHWIDTH40; else caps &= ~IEEE80211_HTCAP_CHWIDTH40; /* Use the currently configured values */ rxmax = vap->iv_ampdu_rxmax; density = vap->iv_ampdu_density; /* adjust short GI based on channel and config */ if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0) caps &= ~IEEE80211_HTCAP_SHORTGI20; if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0 || (caps & IEEE80211_HTCAP_CHWIDTH40) == 0) caps &= ~IEEE80211_HTCAP_SHORTGI40; ADDSHORT(frm, caps); /* HT parameters */ *frm = _IEEE80211_SHIFTMASK(rxmax, IEEE80211_HTCAP_MAXRXAMPDU) | _IEEE80211_SHIFTMASK(density, IEEE80211_HTCAP_MPDUDENSITY) ; frm++; /* pre-zero remainder of ie */ memset(frm, 0, sizeof(struct ieee80211_ie_htcap) - __offsetof(struct ieee80211_ie_htcap, hc_mcsset)); /* supported MCS set */ /* * XXX: For sta mode the rate set should be restricted based * on the AP's capabilities, but ni_htrates isn't setup when * we're called to form an AssocReq frame so for now we're * restricted to the device capabilities. */ ieee80211_set_mcsset(ic, frm); frm += __offsetof(struct ieee80211_ie_htcap, hc_extcap) - __offsetof(struct ieee80211_ie_htcap, hc_mcsset); /* HT extended capabilities */ extcaps = vap->iv_htextcaps & 0xffff; ADDSHORT(frm, extcaps); frm += sizeof(struct ieee80211_ie_htcap) - __offsetof(struct ieee80211_ie_htcap, hc_txbf); return frm; #undef ADDSHORT } /* * Add 802.11n HT capabilities information element */ uint8_t * ieee80211_add_htcap_ch(uint8_t *frm, struct ieee80211vap *vap, struct ieee80211_channel *c) { frm[0] = IEEE80211_ELEMID_HTCAP; frm[1] = sizeof(struct ieee80211_ie_htcap) - 2; return ieee80211_add_htcap_body_ch(frm + 2, vap, c); } /* * Add Broadcom OUI wrapped standard HTCAP ie; this is * used for compatibility w/ pre-draft implementations. */ uint8_t * ieee80211_add_htcap_vendor(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_VENDOR; frm[1] = 4 + sizeof(struct ieee80211_ie_htcap) - 2; frm[2] = (BCM_OUI >> 0) & 0xff; frm[3] = (BCM_OUI >> 8) & 0xff; frm[4] = (BCM_OUI >> 16) & 0xff; frm[5] = BCM_OUI_HTCAP; return ieee80211_add_htcap_body(frm + 6, ni); } /* * Construct the MCS bit mask of basic rates * for inclusion in an HT information element. */ static void ieee80211_set_basic_htrates(uint8_t *frm, const struct ieee80211_htrateset *rs) { int i; for (i = 0; i < rs->rs_nrates; i++) { int r = rs->rs_rates[i] & IEEE80211_RATE_VAL; if ((rs->rs_rates[i] & IEEE80211_RATE_BASIC) && r < IEEE80211_HTRATE_MAXSIZE) { /* NB: this assumes a particular implementation */ setbit(frm, r); } } } /* * Update the HTINFO ie for a beacon frame. */ void ieee80211_ht_update_beacon(struct ieee80211vap *vap, struct ieee80211_beacon_offsets *bo) { #define PROTMODE (IEEE80211_HTINFO_OPMODE|IEEE80211_HTINFO_NONHT_PRESENT) struct ieee80211_node *ni; const struct ieee80211_channel *bsschan; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_ie_htinfo *ht = (struct ieee80211_ie_htinfo *) bo->bo_htinfo; ni = ieee80211_ref_node(vap->iv_bss); bsschan = ni->ni_chan; /* XXX only update on channel change */ ht->hi_ctrlchannel = ieee80211_chan2ieee(ic, bsschan); if (vap->iv_flags_ht & IEEE80211_FHT_RIFS) ht->hi_byte1 = IEEE80211_HTINFO_RIFSMODE_PERM; else ht->hi_byte1 = IEEE80211_HTINFO_RIFSMODE_PROH; if (IEEE80211_IS_CHAN_HT40U(bsschan)) ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_ABOVE; else if (IEEE80211_IS_CHAN_HT40D(bsschan)) ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_BELOW; else ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_NONE; if (IEEE80211_IS_CHAN_HT40(bsschan)) ht->hi_byte1 |= IEEE80211_HTINFO_TXWIDTH_2040; /* protection mode */ /* * XXX TODO: this uses the global flag, not the per-VAP flag. * Eventually (once the protection modes are done per-channel * rather than per-VAP) we can flip this over to be per-VAP but * using the channel protection mode. */ ht->hi_byte2 = (ht->hi_byte2 &~ PROTMODE) | ic->ic_curhtprotmode; ieee80211_free_node(ni); /* XXX propagate to vendor ie's */ #undef PROTMODE } /* * Add body of an HTINFO information element. * * NB: We don't use struct ieee80211_ie_htinfo because we can * be called to fillin both a standard ie and a compat ie that * has a vendor OUI at the front. */ static uint8_t * ieee80211_add_htinfo_body(uint8_t *frm, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; /* pre-zero remainder of ie */ memset(frm, 0, sizeof(struct ieee80211_ie_htinfo) - 2); /* primary/control channel center */ *frm++ = ieee80211_chan2ieee(ic, ni->ni_chan); if (vap->iv_flags_ht & IEEE80211_FHT_RIFS) frm[0] = IEEE80211_HTINFO_RIFSMODE_PERM; else frm[0] = IEEE80211_HTINFO_RIFSMODE_PROH; if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan)) frm[0] |= IEEE80211_HTINFO_2NDCHAN_ABOVE; else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan)) frm[0] |= IEEE80211_HTINFO_2NDCHAN_BELOW; else frm[0] |= IEEE80211_HTINFO_2NDCHAN_NONE; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) frm[0] |= IEEE80211_HTINFO_TXWIDTH_2040; /* * Add current protection mode. Unlike for beacons, * this will respect the per-VAP flags. */ frm[1] = vap->iv_curhtprotmode; frm += 5; /* basic MCS set */ ieee80211_set_basic_htrates(frm, &ni->ni_htrates); frm += sizeof(struct ieee80211_ie_htinfo) - __offsetof(struct ieee80211_ie_htinfo, hi_basicmcsset); return frm; } /* * Add 802.11n HT information element. */ uint8_t * ieee80211_add_htinfo(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_HTINFO; frm[1] = sizeof(struct ieee80211_ie_htinfo) - 2; return ieee80211_add_htinfo_body(frm + 2, ni); } /* * Add Broadcom OUI wrapped standard HTINFO ie; this is * used for compatibility w/ pre-draft implementations. */ uint8_t * ieee80211_add_htinfo_vendor(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_VENDOR; frm[1] = 4 + sizeof(struct ieee80211_ie_htinfo) - 2; frm[2] = (BCM_OUI >> 0) & 0xff; frm[3] = (BCM_OUI >> 8) & 0xff; frm[4] = (BCM_OUI >> 16) & 0xff; frm[5] = BCM_OUI_HTINFO; return ieee80211_add_htinfo_body(frm + 6, ni); } diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index c3dd0302fb39..3a60feadcdab 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -1,3718 +1,3718 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Bruce Simpson. * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 */ /* * Internet Group Management Protocol (IGMP) routines. * [RFC1112, RFC2236, RFC3376] * * Written by Steve Deering, Stanford, May 1988. * Modified by Rosen Sharma, Stanford, Aug 1994. * Modified by Bill Fenner, Xerox PARC, Feb 1995. * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995. * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson. * * MULTICAST Revision: 3.5.1.4 */ #include #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_IGMPV3 #define KTR_IGMPV3 KTR_INET #endif #define IGMP_SLOWHZ 2 /* 2 slow timeouts per second */ #define IGMP_FASTHZ 5 /* 5 fast timeouts per second */ #define IGMP_RESPONSE_BURST_INTERVAL (IGMP_FASTHZ / 2) static struct igmp_ifsoftc * igi_alloc_locked(struct ifnet *); static void igi_delete_locked(const struct ifnet *); static void igmp_dispatch_queue(struct mbufq *, int, const int); static void igmp_fasttimo_vnet(void); static void igmp_final_leave(struct in_multi *, struct igmp_ifsoftc *); static int igmp_handle_state_change(struct in_multi *, struct igmp_ifsoftc *); static int igmp_initial_join(struct in_multi *, struct igmp_ifsoftc *); static int igmp_input_v1_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v2_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v3_query(struct ifnet *, const struct ip *, /*const*/ struct igmpv3 *); static int igmp_input_v3_group_query(struct in_multi *, struct igmp_ifsoftc *, int, /*const*/ struct igmpv3 *); static int igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *, /*const*/ struct igmp *); static int igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *, /*const*/ struct igmp *); static void igmp_intr(struct mbuf *); static int igmp_isgroupreported(const struct in_addr); static struct mbuf * igmp_ra_alloc(void); #ifdef KTR static char * igmp_rec_type_to_str(const int); #endif static void igmp_set_version(struct igmp_ifsoftc *, const int); static void igmp_slowtimo_vnet(void); static int igmp_v1v2_queue_report(struct in_multi *, const int); static void igmp_v1v2_process_group_timer(struct in_multi *, const int); static void igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *); static void igmp_v2_update_group(struct in_multi *, const int); static void igmp_v3_cancel_link_timers(struct igmp_ifsoftc *); static void igmp_v3_dispatch_general_query(struct igmp_ifsoftc *); static struct mbuf * igmp_v3_encap_report(struct ifnet *, struct mbuf *); static int igmp_v3_enqueue_group_record(struct mbufq *, struct in_multi *, const int, const int, const int); static int igmp_v3_enqueue_filter_change(struct mbufq *, struct in_multi *); static void igmp_v3_process_group_timers(struct in_multi_head *, struct mbufq *, struct mbufq *, struct in_multi *, const int); static int igmp_v3_merge_state_changes(struct in_multi *, struct mbufq *); static void igmp_v3_suppress_group_record(struct in_multi *); static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS); static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS); static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS); static int sysctl_igmp_stat(SYSCTL_HANDLER_ARGS); static const struct netisr_handler igmp_nh = { .nh_name = "igmp", .nh_handler = igmp_intr, .nh_proto = NETISR_IGMP, .nh_policy = NETISR_POLICY_SOURCE, }; /* * System-wide globals. * * Unlocked access to these is OK, except for the global IGMP output * queue. The IGMP subsystem lock ends up being system-wide for the moment, * because all VIMAGEs have to share a global output queue, as netisrs * themselves are not virtualized. * * Locking: * * The permitted lock order is: IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. * Any may be taken independently; if any are held at the same * time, the above lock order must be followed. * * All output is delegated to the netisr. * Now that Giant has been eliminated, the netisr may be inlined. * * IN_MULTI_LIST_LOCK covers in_multi. * * IGMP_LOCK covers igmp_ifsoftc and any global variables in this file, * including the output queue. * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of * per-link state iterators. * * igmp_ifsoftc is valid as long as PF_INET is attached to the interface, * therefore it is not refcounted. * We allow unlocked reads of igmp_ifsoftc when accessed via in_multi. * * Reference counting * * IGMP acquires its own reference every time an in_multi is passed to * it and the group is being joined for the first time. * * IGMP releases its reference(s) on in_multi in a deferred way, * because the operations which process the release run as part of * a loop whose control variables are directly affected by the release * (that, and not recursing on the IF_ADDR_LOCK). * * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds * to a vnet in ifp->if_vnet. * * SMPng: XXX We may potentially race operations on ifma_protospec. * The problem is that we currently lack a clean way of taking the * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing, * as anything which modifies ifma needs to be covered by that lock. * So check for ifma_protospec being NULL before proceeding. */ struct mtx igmp_mtx; struct mbuf *m_raopt; /* Router Alert option */ static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); /* * VIMAGE-wide globals. * * The IGMPv3 timers themselves need to run per-image, however, for * historical reasons, timers run globally. This needs to be improved. * An ifnet can only be in one vimage at a time, and the loopback * ifnet, loif, is itself virtualized. * It would otherwise be possible to seriously hose IGMP state, * and create inconsistencies in upstream multicast routing, if you have * multiple VIMAGEs running on the same link joining different multicast * groups, UNLESS the "primary IP address" is different. This is because * IGMP for IPv4 does not force link-local addresses to be used for each * node, unlike MLD for IPv6. * Obviously the IGMPv3 per-interface state has per-vimage granularity * also as a result. * * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection * policy to control the address used by IGMP on the link. */ VNET_DEFINE_STATIC(int, interface_timers_running); /* IGMPv3 general * query response */ VNET_DEFINE_STATIC(int, state_change_timers_running); /* IGMPv3 state-change * retransmit */ VNET_DEFINE_STATIC(int, current_state_timers_running); /* IGMPv1/v2 host * report; IGMPv3 g/sg * query response */ #define V_interface_timers_running VNET(interface_timers_running) #define V_state_change_timers_running VNET(state_change_timers_running) #define V_current_state_timers_running VNET(current_state_timers_running) VNET_PCPUSTAT_DEFINE(struct igmpstat, igmpstat); VNET_PCPUSTAT_SYSINIT(igmpstat); VNET_PCPUSTAT_SYSUNINIT(igmpstat); VNET_DEFINE_STATIC(LIST_HEAD(, igmp_ifsoftc), igi_head) = LIST_HEAD_INITIALIZER(igi_head); VNET_DEFINE_STATIC(struct timeval, igmp_gsrdelay) = {10, 0}; #define V_igi_head VNET(igi_head) #define V_igmp_gsrdelay VNET(igmp_gsrdelay) VNET_DEFINE_STATIC(int, igmp_recvifkludge) = 1; VNET_DEFINE_STATIC(int, igmp_sendra) = 1; VNET_DEFINE_STATIC(int, igmp_sendlocal) = 1; VNET_DEFINE_STATIC(int, igmp_v1enable) = 1; VNET_DEFINE_STATIC(int, igmp_v2enable) = 1; VNET_DEFINE_STATIC(int, igmp_legacysupp); VNET_DEFINE_STATIC(int, igmp_default_version) = IGMP_VERSION_3; #define V_igmp_recvifkludge VNET(igmp_recvifkludge) #define V_igmp_sendra VNET(igmp_sendra) #define V_igmp_sendlocal VNET(igmp_sendlocal) #define V_igmp_v1enable VNET(igmp_v1enable) #define V_igmp_v2enable VNET(igmp_v2enable) #define V_igmp_legacysupp VNET(igmp_legacysupp) #define V_igmp_default_version VNET(igmp_default_version) /* * Virtualized sysctls. */ SYSCTL_PROC(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(igmpstat), 0, sysctl_igmp_stat, "S,igmpstat", "IGMP statistics (struct igmpstat, netinet/igmp_var.h)"); SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(igmp_recvifkludge), 0, "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address"); SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(igmp_sendra), 0, "Send IP Router Alert option in IGMPv2/v3 messages"); SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(igmp_sendlocal), 0, "Send IGMP membership reports for 224.0.0.0/24 groups"); SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(igmp_v1enable), 0, "Enable backwards compatibility with IGMPv1"); SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(igmp_v2enable), 0, "Enable backwards compatibility with IGMPv2"); SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(igmp_legacysupp), 0, "Allow v1/v2 reports to suppress v3 group responses"); SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I", "Default version of IGMP to run on each interface"); SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I", "Rate limit for IGMPv3 Group-and-Source queries in seconds"); /* * Non-virtualized sysctls. */ static SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_igmp_ifinfo, "Per-interface IGMPv3 state"); static __inline void igmp_save_context(struct mbuf *m, struct ifnet *ifp) { #ifdef VIMAGE m->m_pkthdr.PH_loc.ptr = ifp->if_vnet; #endif /* VIMAGE */ m->m_pkthdr.rcvif = ifp; m->m_pkthdr.flowid = ifp->if_index; } static __inline void igmp_scrub_context(struct mbuf *m) { m->m_pkthdr.PH_loc.ptr = NULL; m->m_pkthdr.flowid = 0; } /* * Restore context from a queued IGMP output chain. * Return saved ifindex. * * VIMAGE: The assertion is there to make sure that we * actually called CURVNET_SET() with what's in the mbuf chain. */ static __inline uint32_t igmp_restore_context(struct mbuf *m) { #ifdef notyet #if defined(VIMAGE) && defined(INVARIANTS) KASSERT(curvnet == (m->m_pkthdr.PH_loc.ptr), ("%s: called when curvnet was not restored", __func__)); #endif #endif return (m->m_pkthdr.flowid); } /* * IGMP statistics. */ static int sysctl_igmp_stat(SYSCTL_HANDLER_ARGS) { struct igmpstat igps0; int error; char *p; error = sysctl_wire_old_buffer(req, sizeof(struct igmpstat)); if (error) return (error); if (req->oldptr != NULL) { if (req->oldlen < sizeof(struct igmpstat)) error = ENOMEM; else { /* * Copy the counters, and explicitly set the struct's * version and length fields. */ COUNTER_ARRAY_COPY(VNET(igmpstat), &igps0, sizeof(struct igmpstat) / sizeof(uint64_t)); igps0.igps_version = IGPS_VERSION_3; igps0.igps_len = IGPS_VERSION3_LEN; error = SYSCTL_OUT(req, &igps0, sizeof(struct igmpstat)); } } else req->validlen = sizeof(struct igmpstat); if (error) goto out; if (req->newptr != NULL) { if (req->newlen < sizeof(struct igmpstat)) error = ENOMEM; else error = SYSCTL_IN(req, &igps0, sizeof(igps0)); if (error) goto out; /* * igps0 must be "all zero". */ p = (char *)&igps0; while (p < (char *)&igps0 + sizeof(igps0) && *p == '\0') p++; if (p != (char *)&igps0 + sizeof(igps0)) { error = EINVAL; goto out; } COUNTER_ARRAY_ZERO(VNET(igmpstat), sizeof(struct igmpstat) / sizeof(uint64_t)); } out: return (error); } /* * Retrieve or set default IGMP version. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by IGMP lock. */ static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS) { int error; int new; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); IGMP_LOCK(); new = V_igmp_default_version; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) goto out_locked; if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) { error = EINVAL; goto out_locked; } CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d", V_igmp_default_version, new); V_igmp_default_version = new; out_locked: IGMP_UNLOCK(); return (error); } /* * Retrieve or set threshold between group-source queries in seconds. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by IGMP lock. */ static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS) { int error; int i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); IGMP_LOCK(); i = V_igmp_gsrdelay.tv_sec; error = sysctl_handle_int(oidp, &i, 0, req); if (error || !req->newptr) goto out_locked; if (i < -1 || i >= 60) { error = EINVAL; goto out_locked; } CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d", V_igmp_gsrdelay.tv_sec, i); V_igmp_gsrdelay.tv_sec = i; out_locked: IGMP_UNLOCK(); return (error); } /* * Expose struct igmp_ifsoftc to userland, keyed by ifindex. * For use by ifmcstat(8). * * SMPng: NOTE: Does an unlocked ifindex space read. * VIMAGE: Assume curvnet set by caller. The node handler itself * is not directly virtualized. */ static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS) { struct epoch_tracker et; int *name; int error; u_int namelen; struct ifnet *ifp; struct igmp_ifsoftc *igi; name = (int *)arg1; namelen = arg2; if (req->newptr != NULL) return (EPERM); if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo)); if (error) return (error); IN_MULTI_LIST_LOCK(); IGMP_LOCK(); error = ENOENT; NET_EPOCH_ENTER(et); ifp = ifnet_byindex(name[0]); NET_EPOCH_EXIT(et); if (ifp == NULL) goto out_locked; LIST_FOREACH(igi, &V_igi_head, igi_link) { if (ifp == igi->igi_ifp) { struct igmp_ifinfo info; info.igi_version = igi->igi_version; info.igi_v1_timer = igi->igi_v1_timer; info.igi_v2_timer = igi->igi_v2_timer; info.igi_v3_timer = igi->igi_v3_timer; info.igi_flags = igi->igi_flags; info.igi_rv = igi->igi_rv; info.igi_qi = igi->igi_qi; info.igi_qri = igi->igi_qri; info.igi_uri = igi->igi_uri; error = SYSCTL_OUT(req, &info, sizeof(info)); break; } } out_locked: IGMP_UNLOCK(); IN_MULTI_LIST_UNLOCK(); return (error); } /* * Dispatch an entire queue of pending packet chains * using the netisr. * VIMAGE: Assumes the vnet pointer has been set. */ static void igmp_dispatch_queue(struct mbufq *mq, int limit, const int loop) { struct epoch_tracker et; struct mbuf *m; NET_EPOCH_ENTER(et); while ((m = mbufq_dequeue(mq)) != NULL) { CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, mq, m); if (loop) m->m_flags |= M_IGMP_LOOP; netisr_dispatch(NETISR_IGMP, m); if (--limit == 0) break; } NET_EPOCH_EXIT(et); } /* * Filter outgoing IGMP report state by group. * * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1). * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are * disabled for all groups in the 224.0.0.0/24 link-local scope. However, * this may break certain IGMP snooping switches which rely on the old * report behaviour. * * Return zero if the given group is one for which IGMP reports * should be suppressed, or non-zero if reports should be issued. */ static __inline int igmp_isgroupreported(const struct in_addr addr) { if (in_allhosts(addr) || ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) return (0); return (1); } /* * Construct a Router Alert option to use in outgoing packets. */ static struct mbuf * igmp_ra_alloc(void) { struct mbuf *m; struct ipoption *p; m = m_get(M_WAITOK, MT_DATA); p = mtod(m, struct ipoption *); p->ipopt_dst.s_addr = INADDR_ANY; p->ipopt_list[0] = (char)IPOPT_RA; /* Router Alert Option */ p->ipopt_list[1] = 0x04; /* 4 bytes long */ p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */ p->ipopt_list[3] = 0x00; /* pad byte */ m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1]; return (m); } /* * Attach IGMP when PF_INET is attached to an interface. */ struct igmp_ifsoftc * igmp_domifattach(struct ifnet *ifp) { struct igmp_ifsoftc *igi; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi = igi_alloc_locked(ifp); if (!(ifp->if_flags & IFF_MULTICAST)) igi->igi_flags |= IGIF_SILENT; IGMP_UNLOCK(); return (igi); } /* * VIMAGE: assume curvnet set by caller. */ static struct igmp_ifsoftc * igi_alloc_locked(/*const*/ struct ifnet *ifp) { struct igmp_ifsoftc *igi; IGMP_LOCK_ASSERT(); igi = malloc(sizeof(struct igmp_ifsoftc), M_IGMP, M_NOWAIT|M_ZERO); if (igi == NULL) goto out; igi->igi_ifp = ifp; igi->igi_version = V_igmp_default_version; igi->igi_flags = 0; igi->igi_rv = IGMP_RV_INIT; igi->igi_qi = IGMP_QI_INIT; igi->igi_qri = IGMP_QRI_INIT; igi->igi_uri = IGMP_URI_INIT; mbufq_init(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS); LIST_INSERT_HEAD(&V_igi_head, igi, igi_link); CTR2(KTR_IGMPV3, "allocate igmp_ifsoftc for ifp %p(%s)", ifp, ifp->if_xname); out: return (igi); } /* * Hook for ifdetach. * * NOTE: Some finalization tasks need to run before the protocol domain * is detached, but also before the link layer does its cleanup. * * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK(). * XXX This is also bitten by unlocked ifma_protospec access. */ void igmp_ifdetach(struct ifnet *ifp) { struct epoch_tracker et; struct igmp_ifsoftc *igi; struct ifmultiaddr *ifma; struct in_multi *inm; struct in_multi_head inm_free_tmp; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); SLIST_INIT(&inm_free_tmp); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; if (igi->igi_version == IGMP_VERSION_3) { IF_ADDR_WLOCK(ifp); NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = inm_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; if (inm->inm_state == IGMP_LEAVING_MEMBER) inm_rele_locked(&inm_free_tmp, inm); inm_clear_recorded(inm); } NET_EPOCH_EXIT(et); IF_ADDR_WUNLOCK(ifp); inm_release_list_deferred(&inm_free_tmp); } IGMP_UNLOCK(); } /* * Hook for domifdetach. */ void igmp_domifdetach(struct ifnet *ifp) { CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi_delete_locked(ifp); IGMP_UNLOCK(); } static void igi_delete_locked(const struct ifnet *ifp) { struct igmp_ifsoftc *igi, *tigi; CTR3(KTR_IGMPV3, "%s: freeing igmp_ifsoftc for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK_ASSERT(); LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) { if (igi->igi_ifp == ifp) { /* * Free deferred General Query responses. */ mbufq_drain(&igi->igi_gq); LIST_REMOVE(igi, igi_link); free(igi, M_IGMP); return; } } } /* * Process a received IGMPv1 query. * Return non-zero if the message should be dropped. * * VIMAGE: The curvnet pointer is derived from the input ifp. */ static int igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, const struct igmp *igmp) { struct ifmultiaddr *ifma; struct igmp_ifsoftc *igi; struct in_multi *inm; NET_EPOCH_ASSERT(); /* * IGMPv1 Host Mmembership Queries SHOULD always be addressed to * 224.0.0.1. They are always treated as General Queries. * igmp_group is always ignored. Do not drop it as a userland * daemon may wish to see it. * XXX SMPng: unlocked increments in igmpstat assumed atomic. */ if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) { IGMPSTAT_INC(igps_rcv_badqueries); return (0); } IGMPSTAT_INC(igps_rcv_gen_queries); IN_MULTI_LIST_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Switch to IGMPv1 host compatibility mode. */ igmp_set_version(igi, IGMP_VERSION_1); CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname); /* * Start the timers in all of our group records * for the interface on which the query arrived, * except those which are already running. */ CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = inm_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; if (inm->inm_timer != 0) continue; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_V1V2_MAX_RI * IGMP_FASTHZ); V_current_state_timers_running = 1; break; case IGMP_LEAVING_MEMBER: break; } } out_locked: IGMP_UNLOCK(); IN_MULTI_LIST_UNLOCK(); return (0); } /* * Process a received IGMPv2 general or group-specific query. */ static int igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, const struct igmp *igmp) { struct ifmultiaddr *ifma; struct igmp_ifsoftc *igi; struct in_multi *inm; int is_general_query; uint16_t timer; NET_EPOCH_ASSERT(); is_general_query = 0; /* * Validate address fields upfront. * XXX SMPng: unlocked increments in igmpstat assumed atomic. */ if (in_nullhost(igmp->igmp_group)) { /* * IGMPv2 General Query. * If this was not sent to the all-hosts group, ignore it. */ if (!in_allhosts(ip->ip_dst)) return (0); IGMPSTAT_INC(igps_rcv_gen_queries); is_general_query = 1; } else { /* IGMPv2 Group-Specific Query. */ IGMPSTAT_INC(igps_rcv_group_queries); } IN_MULTI_LIST_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Ignore v2 query if in v1 Compatibility Mode. */ if (igi->igi_version == IGMP_VERSION_1) goto out_locked; igmp_set_version(igi, IGMP_VERSION_2); timer = igmp->igmp_code * IGMP_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; if (is_general_query) { /* * For each reporting group joined on this * interface, kick the report timer. */ CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)", ifp, ifp->if_xname); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = inm_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; igmp_v2_update_group(inm, timer); } } else { /* * Group-specific IGMPv2 query, we need only * look up the single group to process it. */ inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { CTR3(KTR_IGMPV3, "process v2 query 0x%08x on ifp %p(%s)", ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); igmp_v2_update_group(inm, timer); } } out_locked: IGMP_UNLOCK(); IN_MULTI_LIST_UNLOCK(); return (0); } /* * Update the report timer on a group in response to an IGMPv2 query. * * If we are becoming the reporting member for this group, start the timer. * If we already are the reporting member for this group, and timer is * below the threshold, reset it. * * We may be updating the group for the first time since we switched * to IGMPv3. If we are, then we must clear any recorded source lists, * and transition to REPORTING state; the group timer is overloaded * for group and group-source query responses. * * Unlike IGMPv3, the delay per group should be jittered * to avoid bursts of IGMPv2 reports. */ static void igmp_v2_update_group(struct in_multi *inm, const int timer) { CTR4(KTR_IGMPV3, "0x%08x: %s/%s timer=%d", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname, timer); IN_MULTI_LIST_LOCK_ASSERT(); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_REPORTING_MEMBER: if (inm->inm_timer != 0 && inm->inm_timer <= timer) { CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, " "skipping.", __func__); break; } /* FALLTHROUGH */ case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__); inm->inm_state = IGMP_REPORTING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; break; case IGMP_SLEEPING_MEMBER: CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__); inm->inm_state = IGMP_AWAKENING_MEMBER; break; case IGMP_LEAVING_MEMBER: break; } } /* * Process a received IGMPv3 general, group-specific or * group-and-source-specific query. * Assumes m has already been pulled up to the full IGMP message length. * Return 0 if successful, otherwise an appropriate error code is returned. */ static int igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, /*const*/ struct igmpv3 *igmpv3) { struct igmp_ifsoftc *igi; struct in_multi *inm; int is_general_query; uint32_t maxresp, nsrc, qqi; uint16_t timer; uint8_t qrv; is_general_query = 0; CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname); maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */ if (maxresp >= 128) { maxresp = IGMP_MANT(igmpv3->igmp_code) << (IGMP_EXP(igmpv3->igmp_code) + 3); } /* * Robustness must never be less than 2 for on-wire IGMPv3. * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make * an exception for interfaces whose IGMPv3 state changes * are redirected to loopback (e.g. MANET). */ qrv = IGMP_QRV(igmpv3->igmp_misc); if (qrv < 2) { CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__, qrv, IGMP_RV_INIT); qrv = IGMP_RV_INIT; } qqi = igmpv3->igmp_qqi; if (qqi >= 128) { qqi = IGMP_MANT(igmpv3->igmp_qqi) << (IGMP_EXP(igmpv3->igmp_qqi) + 3); } timer = maxresp * IGMP_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; nsrc = ntohs(igmpv3->igmp_numsrc); /* * Validate address fields and versions upfront before * accepting v3 query. * XXX SMPng: Unlocked access to igmpstat counters here. */ if (in_nullhost(igmpv3->igmp_group)) { /* * IGMPv3 General Query. * * General Queries SHOULD be directed to 224.0.0.1. * A general query with a source list has undefined * behaviour; discard it. */ IGMPSTAT_INC(igps_rcv_gen_queries); if (!in_allhosts(ip->ip_dst) || nsrc > 0) { IGMPSTAT_INC(igps_rcv_badqueries); return (0); } is_general_query = 1; } else { /* Group or group-source specific query. */ if (nsrc == 0) IGMPSTAT_INC(igps_rcv_group_queries); else IGMPSTAT_INC(igps_rcv_gsr_queries); } IN_MULTI_LIST_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Discard the v3 query if we're in Compatibility Mode. * The RFC is not obviously worded that hosts need to stay in * compatibility mode until the Old Version Querier Present * timer expires. */ if (igi->igi_version != IGMP_VERSION_3) { CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)", igi->igi_version, ifp, ifp->if_xname); goto out_locked; } igmp_set_version(igi, IGMP_VERSION_3); igi->igi_rv = qrv; igi->igi_qi = qqi; igi->igi_qri = maxresp; CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi, maxresp); if (is_general_query) { /* * Schedule a current-state report on this ifp for * all groups, possibly containing source lists. * If there is a pending General Query response * scheduled earlier than the selected delay, do * not schedule any other reports. * Otherwise, reset the interface timer. */ CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)", ifp, ifp->if_xname); if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) { igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer); V_interface_timers_running = 1; } } else { /* * Group-source-specific queries are throttled on * a per-group basis to defeat denial-of-service attempts. * Queries for groups we are not a member of on this * link are simply ignored. */ inm = inm_lookup(ifp, igmpv3->igmp_group); if (inm == NULL) goto out_locked; if (nsrc > 0) { if (!ratecheck(&inm->inm_lastgsrtv, &V_igmp_gsrdelay)) { CTR1(KTR_IGMPV3, "%s: GS query throttled.", __func__); IGMPSTAT_INC(igps_drop_gsr_queries); goto out_locked; } } CTR3(KTR_IGMPV3, "process v3 0x%08x query on ifp %p(%s)", ntohl(igmpv3->igmp_group.s_addr), ifp, ifp->if_xname); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no * further report need be scheduled. * Otherwise, prepare to respond to the * group-specific or group-and-source query. */ if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) igmp_input_v3_group_query(inm, igi, timer, igmpv3); } out_locked: IGMP_UNLOCK(); IN_MULTI_LIST_UNLOCK(); return (0); } /* * Process a received IGMPv3 group-specific or group-and-source-specific * query. * Return <0 if any error occurred. Currently this is ignored. */ static int igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifsoftc *igi, int timer, /*const*/ struct igmpv3 *igmpv3) { int retval; uint16_t nsrc; IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); retval = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LEAVING_MEMBER: return (retval); break; case IGMP_REPORTING_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: break; } nsrc = ntohs(igmpv3->igmp_numsrc); /* * Deal with group-specific queries upfront. * If any group query is already pending, purge any recorded * source-list state if it exists, and schedule a query response * for this group-specific query. */ if (nsrc == 0) { if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) { inm_clear_recorded(inm); timer = min(inm->inm_timer, timer); } inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; return (retval); } /* * Deal with the case where a group-and-source-specific query has * been received but a group-specific query is already pending. */ if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) { timer = min(inm->inm_timer, timer); inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; return (retval); } /* * Finally, deal with the case where a group-and-source-specific * query has been received, where a response to a previous g-s-r * query exists, or none exists. * In this case, we need to parse the source-list which the Querier * has provided us with and check if we have any source list filter * entries at T1 for these sources. If we do not, there is no need * schedule a report and the query may be dropped. * If we do, we must record them and schedule a current-state * report for those sources. * FIXME: Handling source lists larger than 1 mbuf requires that * we pass the mbuf chain pointer down to this function, and use * m_getptr() to walk the chain. */ if (inm->inm_nsrc > 0) { const struct in_addr *ap; int i, nrecorded; ap = (const struct in_addr *)(igmpv3 + 1); nrecorded = 0; for (i = 0; i < nsrc; i++, ap++) { retval = inm_record_source(inm, ap->s_addr); if (retval < 0) break; nrecorded += retval; } if (nrecorded > 0) { CTR1(KTR_IGMPV3, "%s: schedule response to SG query", __func__); inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; } } return (retval); } /* * Process a received IGMPv1 host membership report. * * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; struct in_multi *inm; IGMPSTAT_INC(igps_rcv_reports); if (ifp->if_flags & IFF_LOOPBACK) return (0); if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || !in_hosteq(igmp->igmp_group, ip->ip_dst)) { IGMPSTAT_INC(igps_rcv_badreports); return (EINVAL); } /* * RFC 3376, Section 4.2.13, 9.2, 9.3: * Booting clients may use the source address 0.0.0.0. Some * IGMP daemons may not know how to use IP_RECVIF to determine * the interface upon which this message was received. * Replace 0.0.0.0 with the subnet address if told to do so. */ if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { IFP_TO_IA(ifp, ia); if (ia != NULL) ip->ip_src.s_addr = htonl(ia->ia_subnet); } CTR3(KTR_IGMPV3, "process v1 report 0x%08x on ifp %p(%s)", ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); /* * IGMPv1 report suppression. * If we are a member of this group, and our membership should be * reported, stop our group timer and transition to the 'lazy' state. */ IN_MULTI_LIST_LOCK(); inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { struct igmp_ifsoftc *igi; igi = inm->inm_igi; if (igi == NULL) { KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp)); goto out_locked; } IGMPSTAT_INC(igps_rcv_ourreports); /* * If we are in IGMPv3 host mode, do not allow the * other host's IGMPv1 report to suppress our reports * unless explicitly configured to do so. */ if (igi->igi_version == IGMP_VERSION_3) { if (V_igmp_legacysupp) igmp_v3_suppress_group_record(inm); goto out_locked; } inm->inm_timer = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for 0x%08x on ifp %p(%s)", ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); case IGMP_SLEEPING_MEMBER: inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_REPORTING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for 0x%08x on ifp %p(%s)", ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); if (igi->igi_version == IGMP_VERSION_1) inm->inm_state = IGMP_LAZY_MEMBER; else if (igi->igi_version == IGMP_VERSION_2) inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } out_locked: IN_MULTI_LIST_UNLOCK(); return (0); } /* * Process a received IGMPv2 host membership report. * * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; struct in_multi *inm; /* * Make sure we don't hear our own membership report. Fast * leave requires knowing that we are the only member of a * group. */ IFP_TO_IA(ifp, ia); if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) { return (0); } IGMPSTAT_INC(igps_rcv_reports); if (ifp->if_flags & IFF_LOOPBACK) { return (0); } if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || !in_hosteq(igmp->igmp_group, ip->ip_dst)) { IGMPSTAT_INC(igps_rcv_badreports); return (EINVAL); } /* * RFC 3376, Section 4.2.13, 9.2, 9.3: * Booting clients may use the source address 0.0.0.0. Some * IGMP daemons may not know how to use IP_RECVIF to determine * the interface upon which this message was received. * Replace 0.0.0.0 with the subnet address if told to do so. */ if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { if (ia != NULL) ip->ip_src.s_addr = htonl(ia->ia_subnet); } CTR3(KTR_IGMPV3, "process v2 report 0x%08x on ifp %p(%s)", ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); /* * IGMPv2 report suppression. * If we are a member of this group, and our membership should be * reported, and our group timer is pending or about to be reset, * stop our group timer by transitioning to the 'lazy' state. */ IN_MULTI_LIST_LOCK(); inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { struct igmp_ifsoftc *igi; igi = inm->inm_igi; KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp)); IGMPSTAT_INC(igps_rcv_ourreports); /* * If we are in IGMPv3 host mode, do not allow the * other host's IGMPv1 report to suppress our reports * unless explicitly configured to do so. */ if (igi->igi_version == IGMP_VERSION_3) { if (V_igmp_legacysupp) igmp_v3_suppress_group_record(inm); goto out_locked; } inm->inm_timer = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for 0x%08x on ifp %p(%s)", ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); case IGMP_LAZY_MEMBER: inm->inm_state = IGMP_LAZY_MEMBER; break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } out_locked: IN_MULTI_LIST_UNLOCK(); return (0); } int igmp_input(struct mbuf **mp, int *offp, int proto) { int iphlen; struct ifnet *ifp; struct igmp *igmp; struct ip *ip; struct mbuf *m; int igmplen; int minlen; int queryver; CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, *mp, *offp); m = *mp; ifp = m->m_pkthdr.rcvif; *mp = NULL; IGMPSTAT_INC(igps_rcv_total); ip = mtod(m, struct ip *); iphlen = *offp; igmplen = ntohs(ip->ip_len) - iphlen; /* * Validate lengths. */ if (igmplen < IGMP_MINLEN) { IGMPSTAT_INC(igps_rcv_tooshort); m_freem(m); return (IPPROTO_DONE); } /* * Always pullup to the minimum size for v1/v2 or v3 * to amortize calls to m_pullup(). */ minlen = iphlen; if (igmplen >= IGMP_V3_QUERY_MINLEN) minlen += IGMP_V3_QUERY_MINLEN; else minlen += IGMP_MINLEN; if ((!M_WRITABLE(m) || m->m_len < minlen) && (m = m_pullup(m, minlen)) == NULL) { IGMPSTAT_INC(igps_rcv_tooshort); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); /* * Validate checksum. */ m->m_data += iphlen; m->m_len -= iphlen; igmp = mtod(m, struct igmp *); if (in_cksum(m, igmplen)) { IGMPSTAT_INC(igps_rcv_badsum); m_freem(m); return (IPPROTO_DONE); } m->m_data -= iphlen; m->m_len += iphlen; /* * IGMP control traffic is link-scope, and must have a TTL of 1. * DVMRP traffic (e.g. mrinfo, mtrace) is an exception; * probe packets may come from beyond the LAN. */ if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) { IGMPSTAT_INC(igps_rcv_badttl); m_freem(m); return (IPPROTO_DONE); } switch (igmp->igmp_type) { case IGMP_HOST_MEMBERSHIP_QUERY: if (igmplen == IGMP_MINLEN) { if (igmp->igmp_code == 0) queryver = IGMP_VERSION_1; else queryver = IGMP_VERSION_2; } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { queryver = IGMP_VERSION_3; } else { IGMPSTAT_INC(igps_rcv_tooshort); m_freem(m); return (IPPROTO_DONE); } switch (queryver) { case IGMP_VERSION_1: IGMPSTAT_INC(igps_rcv_v1v2_queries); if (!V_igmp_v1enable) break; if (igmp_input_v1_query(ifp, ip, igmp) != 0) { m_freem(m); return (IPPROTO_DONE); } break; case IGMP_VERSION_2: IGMPSTAT_INC(igps_rcv_v1v2_queries); if (!V_igmp_v2enable) break; if (igmp_input_v2_query(ifp, ip, igmp) != 0) { m_freem(m); return (IPPROTO_DONE); } break; case IGMP_VERSION_3: { struct igmpv3 *igmpv3; uint16_t igmpv3len; uint16_t nsrc; IGMPSTAT_INC(igps_rcv_v3_queries); igmpv3 = (struct igmpv3 *)igmp; /* * Validate length based on source count. */ nsrc = ntohs(igmpv3->igmp_numsrc); if (nsrc * sizeof(in_addr_t) > UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) { IGMPSTAT_INC(igps_rcv_tooshort); m_freem(m); return (IPPROTO_DONE); } /* * m_pullup() may modify m, so pullup in * this scope. */ igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN + sizeof(struct in_addr) * nsrc; if ((!M_WRITABLE(m) || m->m_len < igmpv3len) && (m = m_pullup(m, igmpv3len)) == NULL) { IGMPSTAT_INC(igps_rcv_tooshort); return (IPPROTO_DONE); } igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *) + iphlen); if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) { m_freem(m); return (IPPROTO_DONE); } } break; } break; case IGMP_v1_HOST_MEMBERSHIP_REPORT: if (!V_igmp_v1enable) break; if (igmp_input_v1_report(ifp, ip, igmp) != 0) { m_freem(m); return (IPPROTO_DONE); } break; case IGMP_v2_HOST_MEMBERSHIP_REPORT: if (!V_igmp_v2enable) break; if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); if (igmp_input_v2_report(ifp, ip, igmp) != 0) { m_freem(m); return (IPPROTO_DONE); } break; case IGMP_v3_HOST_MEMBERSHIP_REPORT: /* * Hosts do not need to process IGMPv3 membership reports, * as report suppression is no longer required. */ if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); break; default: break; } /* * Pass all valid IGMP packets up to any process(es) listening on a * raw IGMP socket. */ *mp = m; return (rip_input(mp, offp, proto)); } /* * Fast timeout handler (global). * VIMAGE: Timeout handlers are expected to service all vimages. */ static struct callout igmpfast_callout; static void igmp_fasttimo(void *arg __unused) { struct epoch_tracker et; VNET_ITERATOR_DECL(vnet_iter); NET_EPOCH_ENTER(et); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_fasttimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); NET_EPOCH_EXIT(et); callout_reset(&igmpfast_callout, hz / IGMP_FASTHZ, igmp_fasttimo, NULL); } /* * Fast timeout handler (per-vnet). * Sends are shuffled off to a netisr to deal with Giant. * * VIMAGE: Assume caller has set up our curvnet. */ static void igmp_fasttimo_vnet(void) { struct mbufq scq; /* State-change packets */ struct mbufq qrq; /* Query response packets */ struct ifnet *ifp; struct igmp_ifsoftc *igi; struct ifmultiaddr *ifma; struct in_multi *inm; struct in_multi_head inm_free_tmp; int loop, uri_fasthz; loop = 0; uri_fasthz = 0; /* * Quick check to see if any work needs to be done, in order to * minimize the overhead of fasttimo processing. * SMPng: XXX Unlocked reads. */ if (!V_current_state_timers_running && !V_interface_timers_running && !V_state_change_timers_running) return; SLIST_INIT(&inm_free_tmp); IN_MULTI_LIST_LOCK(); IGMP_LOCK(); /* * IGMPv3 General Query response timer processing. */ if (V_interface_timers_running) { CTR1(KTR_IGMPV3, "%s: interface timers running", __func__); V_interface_timers_running = 0; LIST_FOREACH(igi, &V_igi_head, igi_link) { if (igi->igi_v3_timer == 0) { /* Do nothing. */ } else if (--igi->igi_v3_timer == 0) { igmp_v3_dispatch_general_query(igi); } else { V_interface_timers_running = 1; } } } if (!V_current_state_timers_running && !V_state_change_timers_running) goto out_locked; V_current_state_timers_running = 0; V_state_change_timers_running = 0; CTR1(KTR_IGMPV3, "%s: state change timers running", __func__); /* * IGMPv1/v2/v3 host report and state-change timer processing. * Note: Processing a v3 group timer may remove a node. */ LIST_FOREACH(igi, &V_igi_head, igi_link) { ifp = igi->igi_ifp; if (igi->igi_version == IGMP_VERSION_3) { loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri * IGMP_FASTHZ); mbufq_init(&qrq, IGMP_MAX_G_GS_PACKETS); mbufq_init(&scq, IGMP_MAX_STATE_CHANGE_PACKETS); } IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = inm_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; switch (igi->igi_version) { case IGMP_VERSION_1: case IGMP_VERSION_2: igmp_v1v2_process_group_timer(inm, igi->igi_version); break; case IGMP_VERSION_3: igmp_v3_process_group_timers(&inm_free_tmp, &qrq, &scq, inm, uri_fasthz); break; } } IF_ADDR_WUNLOCK(ifp); if (igi->igi_version == IGMP_VERSION_3) { igmp_dispatch_queue(&qrq, 0, loop); igmp_dispatch_queue(&scq, 0, loop); /* * Free the in_multi reference(s) for this * IGMP lifecycle. */ inm_release_list_deferred(&inm_free_tmp); } } out_locked: IGMP_UNLOCK(); IN_MULTI_LIST_UNLOCK(); } /* * Update host report group timer for IGMPv1/v2. * Will update the global pending timer flags. */ static void igmp_v1v2_process_group_timer(struct in_multi *inm, const int version) { int report_timer_expired; IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); if (inm->inm_timer == 0) { report_timer_expired = 0; } else if (--inm->inm_timer == 0) { report_timer_expired = 1; } else { V_current_state_timers_running = 1; return; } switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: break; case IGMP_REPORTING_MEMBER: if (report_timer_expired) { inm->inm_state = IGMP_IDLE_MEMBER; (void)igmp_v1v2_queue_report(inm, (version == IGMP_VERSION_2) ? IGMP_v2_HOST_MEMBERSHIP_REPORT : IGMP_v1_HOST_MEMBERSHIP_REPORT); } break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } /* * Update a group's timers for IGMPv3. * Will update the global pending timer flags. * Note: Unlocked read from igi. */ static void igmp_v3_process_group_timers(struct in_multi_head *inmh, struct mbufq *qrq, struct mbufq *scq, struct in_multi *inm, const int uri_fasthz) { int query_response_timer_expired; int state_change_retransmit_timer_expired; IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); query_response_timer_expired = 0; state_change_retransmit_timer_expired = 0; /* * During a transition from v1/v2 compatibility mode back to v3, * a group record in REPORTING state may still have its group * timer active. This is a no-op in this function; it is easier * to deal with it here than to complicate the slow-timeout path. */ if (inm->inm_timer == 0) { query_response_timer_expired = 0; } else if (--inm->inm_timer == 0) { query_response_timer_expired = 1; } else { V_current_state_timers_running = 1; } if (inm->inm_sctimer == 0) { state_change_retransmit_timer_expired = 0; } else if (--inm->inm_sctimer == 0) { state_change_retransmit_timer_expired = 1; } else { V_state_change_timers_running = 1; } /* We are in fasttimo, so be quick about it. */ if (!state_change_retransmit_timer_expired && !query_response_timer_expired) return; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_IDLE_MEMBER: break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: /* * Respond to a previously pending Group-Specific * or Group-and-Source-Specific query by enqueueing * the appropriate Current-State report for * immediate transmission. */ if (query_response_timer_expired) { int retval __unused; retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1, (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); inm->inm_state = IGMP_REPORTING_MEMBER; /* XXX Clear recorded sources for next time. */ inm_clear_recorded(inm); } /* FALLTHROUGH */ case IGMP_REPORTING_MEMBER: case IGMP_LEAVING_MEMBER: if (state_change_retransmit_timer_expired) { /* * State-change retransmission timer fired. * If there are any further pending retransmissions, * set the global pending state-change flag, and * reset the timer. */ if (--inm->inm_scrv > 0) { inm->inm_sctimer = uri_fasthz; V_state_change_timers_running = 1; } /* * Retransmit the previously computed state-change * report. If there are no further pending * retransmissions, the mbuf queue will be consumed. * Update T0 state to T1 as we have now sent * a state-change. */ (void)igmp_v3_merge_state_changes(inm, scq); inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); /* * If we are leaving the group for good, make sure * we release IGMP's reference to it. * This release must be deferred using a SLIST, * as we are called from a loop which traverses * the in_ifmultiaddr TAILQ. */ if (inm->inm_state == IGMP_LEAVING_MEMBER && inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; inm_rele_locked(inmh, inm); } } break; } } /* * Suppress a group's pending response to a group or source/group query. * * Do NOT suppress state changes. This leads to IGMPv3 inconsistency. * Do NOT update ST1/ST0 as this operation merely suppresses * the currently pending group record. * Do NOT suppress the response to a general query. It is possible but * it would require adding another state or flag. */ static void igmp_v3_suppress_group_record(struct in_multi *inm) { IN_MULTI_LIST_LOCK_ASSERT(); KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3, ("%s: not IGMPv3 mode on link", __func__)); if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) return; if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) inm_clear_recorded(inm); inm->inm_timer = 0; inm->inm_state = IGMP_REPORTING_MEMBER; } /* * Switch to a different IGMP version on the given interface, * as per Section 7.2.1. */ static void igmp_set_version(struct igmp_ifsoftc *igi, const int version) { int old_version_timer; IGMP_LOCK_ASSERT(); CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__, version, igi->igi_ifp, igi->igi_ifp->if_xname); if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) { /* * Compute the "Older Version Querier Present" timer as per * Section 8.12. */ old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri; old_version_timer *= IGMP_SLOWHZ; if (version == IGMP_VERSION_1) { igi->igi_v1_timer = old_version_timer; igi->igi_v2_timer = 0; } else if (version == IGMP_VERSION_2) { igi->igi_v1_timer = 0; igi->igi_v2_timer = old_version_timer; } } if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { if (igi->igi_version != IGMP_VERSION_2) { igi->igi_version = IGMP_VERSION_2; igmp_v3_cancel_link_timers(igi); } } else if (igi->igi_v1_timer > 0) { if (igi->igi_version != IGMP_VERSION_1) { igi->igi_version = IGMP_VERSION_1; igmp_v3_cancel_link_timers(igi); } } } /* * Cancel pending IGMPv3 timers for the given link and all groups * joined on it; state-change, general-query, and group-query timers. * * Only ever called on a transition from v3 to Compatibility mode. Kill * the timers stone dead (this may be expensive for large N groups), they * will be restarted if Compatibility Mode deems that they must be due to * query processing. */ static void igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in_multi *inm; struct in_multi_head inm_free_tmp; CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__, igi->igi_ifp, igi->igi_ifp->if_xname); IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); NET_EPOCH_ASSERT(); SLIST_INIT(&inm_free_tmp); /* * Stop the v3 General Query Response on this link stone dead. * If fasttimo is woken up due to V_interface_timers_running, * the flag will be cleared if there are no pending link timers. */ igi->igi_v3_timer = 0; /* * Now clear the current-state and state-change report timers * for all memberships scoped to this link. */ ifp = igi->igi_ifp; IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = inm_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: /* * These states are either not relevant in v3 mode, * or are unreported. Do nothing. */ break; case IGMP_LEAVING_MEMBER: /* * If we are leaving the group and switching to * compatibility mode, we need to release the final * reference held for issuing the INCLUDE {}, and * transition to REPORTING to ensure the host leave * message is sent upstream to the old querier -- * transition to NOT would lose the leave and race. */ inm_rele_locked(&inm_free_tmp, inm); /* FALLTHROUGH */ case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: inm_clear_recorded(inm); /* FALLTHROUGH */ case IGMP_REPORTING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; break; } /* * Always clear state-change and group report timers. * Free any pending IGMPv3 state-change records. */ inm->inm_sctimer = 0; inm->inm_timer = 0; mbufq_drain(&inm->inm_scq); } IF_ADDR_WUNLOCK(ifp); inm_release_list_deferred(&inm_free_tmp); } /* * Update the Older Version Querier Present timers for a link. * See Section 7.2.1 of RFC 3376. */ static void igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *igi) { IGMP_LOCK_ASSERT(); if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) { /* * IGMPv1 and IGMPv2 Querier Present timers expired. * * Revert to IGMPv3. */ if (igi->igi_version != IGMP_VERSION_3) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_version = IGMP_VERSION_3; } } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { /* * IGMPv1 Querier Present timer expired, * IGMPv2 Querier Present timer running. * If IGMPv2 was disabled since last timeout, * revert to IGMPv3. * If IGMPv2 is enabled, revert to IGMPv2. */ if (!V_igmp_v2enable) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v2_timer = 0; igi->igi_version = IGMP_VERSION_3; } else { --igi->igi_v2_timer; if (igi->igi_version != IGMP_VERSION_2) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_2, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_version = IGMP_VERSION_2; igmp_v3_cancel_link_timers(igi); } } } else if (igi->igi_v1_timer > 0) { /* * IGMPv1 Querier Present timer running. * Stop IGMPv2 timer if running. * * If IGMPv1 was disabled since last timeout, * revert to IGMPv3. * If IGMPv1 is enabled, reset IGMPv2 timer if running. */ if (!V_igmp_v1enable) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v1_timer = 0; igi->igi_version = IGMP_VERSION_3; } else { --igi->igi_v1_timer; } if (igi->igi_v2_timer > 0) { CTR3(KTR_IGMPV3, "%s: cancel v2 timer on %p(%s)", __func__, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v2_timer = 0; } } } /* * Global slowtimo handler. * VIMAGE: Timeout handlers are expected to service all vimages. */ static struct callout igmpslow_callout; static void igmp_slowtimo(void *arg __unused) { struct epoch_tracker et; VNET_ITERATOR_DECL(vnet_iter); NET_EPOCH_ENTER(et); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_slowtimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); NET_EPOCH_EXIT(et); callout_reset(&igmpslow_callout, hz / IGMP_SLOWHZ, igmp_slowtimo, NULL); } /* * Per-vnet slowtimo handler. */ static void igmp_slowtimo_vnet(void) { struct igmp_ifsoftc *igi; IGMP_LOCK(); LIST_FOREACH(igi, &V_igi_head, igi_link) { igmp_v1v2_process_querier_timers(igi); } IGMP_UNLOCK(); } /* * Dispatch an IGMPv1/v2 host report or leave message. * These are always small enough to fit inside a single mbuf. */ static int igmp_v1v2_queue_report(struct in_multi *inm, const int type) { struct epoch_tracker et; struct ifnet *ifp; struct igmp *igmp; struct ip *ip; struct mbuf *m; IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); ifp = inm->inm_ifp; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOMEM); M_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp)); m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp); m->m_data += sizeof(struct ip); m->m_len = sizeof(struct igmp); igmp = mtod(m, struct igmp *); igmp->igmp_type = type; igmp->igmp_code = 0; igmp->igmp_group = inm->inm_addr; igmp->igmp_cksum = 0; igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp)); m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_len = htons(sizeof(struct ip) + sizeof(struct igmp)); ip->ip_off = 0; ip->ip_p = IPPROTO_IGMP; ip->ip_src.s_addr = INADDR_ANY; if (type == IGMP_HOST_LEAVE_MESSAGE) ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP); else ip->ip_dst = inm->inm_addr; igmp_save_context(m, ifp); m->m_flags |= M_IGMPV2; if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) m->m_flags |= M_IGMP_LOOP; CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m); NET_EPOCH_ENTER(et); netisr_dispatch(NETISR_IGMP, m); NET_EPOCH_EXIT(et); return (0); } /* * Process a state change from the upper layer for the given IPv4 group. * * Each socket holds a reference on the in_multi in its own ip_moptions. * The socket layer will have made the necessary updates to.the group * state, it is now up to IGMP to issue a state change report if there * has been any change between T0 (when the last state-change was issued) * and T1 (now). * * We use the IGMPv3 state machine at group level. The IGMP module * however makes the decision as to which IGMP protocol version to speak. * A state change *from* INCLUDE {} always means an initial join. * A state change *to* INCLUDE {} always means a final leave. * * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can * save ourselves a bunch of work; any exclusive mode groups need not * compute source filter lists. * * VIMAGE: curvnet should have been set by caller, as this routine * is called from the socket option handlers. */ int igmp_change_state(struct in_multi *inm) { struct igmp_ifsoftc *igi; struct ifnet *ifp; int error; error = 0; IN_MULTI_LOCK_ASSERT(); /* * Try to detect if the upper layer just asked us to change state * for an interface which has now gone away. */ KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->inm_ifma->ifma_ifp; if (ifp == NULL) return (0); /* * Sanity check that netinet's notion of ifp is the * same as net's. */ KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp)); /* * If we detect a state transition to or from MCAST_UNDEFINED * for this group, then we are starting or finishing an IGMP * life cycle for this group. */ if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) { CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__, inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode); if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_IGMPV3, "%s: initial join", __func__); error = igmp_initial_join(inm, igi); goto out_locked; } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_IGMPV3, "%s: final leave", __func__); igmp_final_leave(inm, igi); goto out_locked; } } else { CTR1(KTR_IGMPV3, "%s: filter set change", __func__); } error = igmp_handle_state_change(inm, igi); out_locked: IGMP_UNLOCK(); return (error); } /* * Perform the initial join for an IGMP group. * * When joining a group: * If the group should have its IGMP traffic suppressed, do nothing. * IGMPv1 starts sending IGMPv1 host membership reports. * IGMPv2 starts sending IGMPv2 host membership reports. * IGMPv3 will schedule an IGMPv3 state-change report containing the * initial state of the membership. */ static int igmp_initial_join(struct in_multi *inm, struct igmp_ifsoftc *igi) { struct ifnet *ifp; struct mbufq *mq; int error, retval, syncstates; CTR4(KTR_IGMPV3, "%s: initial join 0x%08x on ifp %p(%s)", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname); error = 0; syncstates = 1; ifp = inm->inm_ifp; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); /* * Groups joined on loopback or marked as 'not reported', * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and * are never reported in any IGMP protocol exchanges. * All other groups enter the appropriate IGMP state machine * for the version in use on this link. * A link marked as IGIF_SILENT causes IGMP to be completely * disabled for the link. */ if ((ifp->if_flags & IFF_LOOPBACK) || (igi->igi_flags & IGIF_SILENT) || !igmp_isgroupreported(inm->inm_addr)) { CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); inm->inm_state = IGMP_SILENT_MEMBER; inm->inm_timer = 0; } else { /* * Deal with overlapping in_multi lifecycle. * If this group was LEAVING, then make sure * we drop the reference we picked up to keep the * group around for the final INCLUDE {} enqueue. */ if (igi->igi_version == IGMP_VERSION_3 && inm->inm_state == IGMP_LEAVING_MEMBER) { MPASS(inm->inm_refcount > 1); inm_rele_locked(NULL, inm); } inm->inm_state = IGMP_REPORTING_MEMBER; switch (igi->igi_version) { case IGMP_VERSION_1: case IGMP_VERSION_2: inm->inm_state = IGMP_IDLE_MEMBER; error = igmp_v1v2_queue_report(inm, (igi->igi_version == IGMP_VERSION_2) ? IGMP_v2_HOST_MEMBERSHIP_REPORT : IGMP_v1_HOST_MEMBERSHIP_REPORT); if (error == 0) { inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_V1V2_MAX_RI * IGMP_FASTHZ); V_current_state_timers_running = 1; } break; case IGMP_VERSION_3: /* * Defer update of T0 to T1, until the first copy * of the state change has been transmitted. */ syncstates = 0; /* * Immediately enqueue a State-Change Report for * this interface, freeing any previous reports. * Don't kick the timers if there is nothing to do, * or if an error occurred. */ mq = &inm->inm_scq; mbufq_drain(mq); retval = igmp_v3_enqueue_group_record(mq, inm, 1, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) { error = retval * -1; break; } /* * Schedule transmission of pending state-change * report up to RV times for this link. The timer * will fire at the next igmp_fasttimo (~200ms), * giving us an opportunity to merge the reports. */ if (igi->igi_flags & IGIF_LOOPBACK) { inm->inm_scrv = 1; } else { KASSERT(igi->igi_rv > 1, ("%s: invalid robustness %d", __func__, igi->igi_rv)); inm->inm_scrv = igi->igi_rv; } inm->inm_sctimer = 1; V_state_change_timers_running = 1; error = 0; break; } } /* * Only update the T0 state if state change is atomic, * i.e. we don't need to wait for a timer to fire before we * can consider the state change to have been communicated. */ if (syncstates) { inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); } return (error); } /* * Issue an intermediate state change during the IGMP life-cycle. */ static int igmp_handle_state_change(struct in_multi *inm, struct igmp_ifsoftc *igi) { struct ifnet *ifp; int retval; CTR4(KTR_IGMPV3, "%s: state change for 0x%08x on ifp %p(%s)", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname); ifp = inm->inm_ifp; IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); if ((ifp->if_flags & IFF_LOOPBACK) || (igi->igi_flags & IGIF_SILENT) || !igmp_isgroupreported(inm->inm_addr) || (igi->igi_version != IGMP_VERSION_3)) { if (!igmp_isgroupreported(inm->inm_addr)) { CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); } CTR1(KTR_IGMPV3, "%s: nothing to do", __func__); inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); return (0); } mbufq_drain(&inm->inm_scq); retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) return (-retval); /* * If record(s) were enqueued, start the state-change * report timer for this group. */ inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv); inm->inm_sctimer = 1; V_state_change_timers_running = 1; return (0); } /* * Perform the final leave for an IGMP group. * * When leaving a group: * IGMPv1 does nothing. * IGMPv2 sends a host leave message, if and only if we are the reporter. * IGMPv3 enqueues a state-change report containing a transition * to INCLUDE {} for immediate transmission. */ static void igmp_final_leave(struct in_multi *inm, struct igmp_ifsoftc *igi) { int syncstates; syncstates = 1; CTR4(KTR_IGMPV3, "%s: final leave 0x%08x on ifp %p(%s)", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname); IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_LEAVING_MEMBER: /* Already leaving or left; do nothing. */ CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: if (igi->igi_version == IGMP_VERSION_2) { #ifdef INVARIANTS if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) panic("%s: IGMPv3 state reached, not IGMPv3 mode", __func__); #endif igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE); inm->inm_state = IGMP_NOT_MEMBER; } else if (igi->igi_version == IGMP_VERSION_3) { /* * Stop group timer and all pending reports. * Immediately enqueue a state-change report * TO_IN {} to be sent on the next fast timeout, * giving us an opportunity to merge reports. */ mbufq_drain(&inm->inm_scq); inm->inm_timer = 0; if (igi->igi_flags & IGIF_LOOPBACK) { inm->inm_scrv = 1; } else { inm->inm_scrv = igi->igi_rv; } CTR4(KTR_IGMPV3, "%s: Leaving 0x%08x/%s with %d " "pending retransmissions.", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname, inm->inm_scrv); if (inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; inm->inm_sctimer = 0; } else { int retval __unused; inm_acquire_locked(inm); retval = igmp_v3_enqueue_group_record( &inm->inm_scq, inm, 1, 0, 0); KASSERT(retval != 0, ("%s: enqueue record = %d", __func__, retval)); inm->inm_state = IGMP_LEAVING_MEMBER; inm->inm_sctimer = 1; V_state_change_timers_running = 1; syncstates = 0; } break; } break; case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: /* Our reports are suppressed; do nothing. */ break; } if (syncstates) { inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for 0x%08x/%s", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); } } /* * Enqueue an IGMPv3 group record to the given output queue. * * XXX This function could do with having the allocation code * split out, and the multiple-tree-walks coalesced into a single * routine as has been done in igmp_v3_enqueue_filter_change(). * * If is_state_change is zero, a current-state record is appended. * If is_state_change is non-zero, a state-change report is appended. * * If is_group_query is non-zero, an mbuf packet chain is allocated. * If is_group_query is zero, and if there is a packet with free space * at the tail of the queue, it will be appended to providing there * is enough free space. * Otherwise a new mbuf packet chain is allocated. * * If is_source_query is non-zero, each source is checked to see if * it was recorded for a Group-Source query, and will be omitted if * it is not both in-mode and recorded. * * The function will attempt to allocate leading space in the packet * for the IP/IGMP header to be prepended without fragmenting the chain. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm, const int is_state_change, const int is_group_query, const int is_source_query) { struct igmp_grouprec ig; struct igmp_grouprec *pig; struct ifnet *ifp; struct ip_msource *ims, *nims; struct mbuf *m0, *m, *md; int is_filter_list_change; int minrec0len, m0srcs, msrcs, nbytes, off; int record_has_sources; int now; int type; in_addr_t naddr; uint8_t mode; IN_MULTI_LIST_LOCK_ASSERT(); ifp = inm->inm_ifp; is_filter_list_change = 0; m = NULL; m0 = NULL; m0srcs = 0; msrcs = 0; nbytes = 0; nims = NULL; record_has_sources = 1; pig = NULL; type = IGMP_DO_NOTHING; mode = inm->inm_st[1].iss_fmode; /* * If we did not transition out of ASM mode during t0->t1, * and there are no source nodes to process, we can skip * the generation of source records. */ if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 && inm->inm_nsrc == 0) record_has_sources = 0; if (is_state_change) { /* * Queue a state change record. * If the mode did not change, and there are non-ASM * listeners or source filters present, * we potentially need to issue two records for the group. * If we are transitioning to MCAST_UNDEFINED, we need * not send any sources. * If there are ASM listeners, and there was no filter * mode transition of any kind, do nothing. */ if (mode != inm->inm_st[0].iss_fmode) { if (mode == MCAST_EXCLUDE) { CTR1(KTR_IGMPV3, "%s: change to EXCLUDE", __func__); type = IGMP_CHANGE_TO_EXCLUDE_MODE; } else { CTR1(KTR_IGMPV3, "%s: change to INCLUDE", __func__); type = IGMP_CHANGE_TO_INCLUDE_MODE; if (mode == MCAST_UNDEFINED) record_has_sources = 0; } } else { if (record_has_sources) { is_filter_list_change = 1; } else { type = IGMP_DO_NOTHING; } } } else { /* * Queue a current state record. */ if (mode == MCAST_EXCLUDE) { type = IGMP_MODE_IS_EXCLUDE; } else if (mode == MCAST_INCLUDE) { type = IGMP_MODE_IS_INCLUDE; KASSERT(inm->inm_st[1].iss_asm == 0, ("%s: inm %p is INCLUDE but ASM count is %d", __func__, inm, inm->inm_st[1].iss_asm)); } } /* * Generate the filter list changes using a separate function. */ if (is_filter_list_change) return (igmp_v3_enqueue_filter_change(mq, inm)); if (type == IGMP_DO_NOTHING) { CTR3(KTR_IGMPV3, "%s: nothing to do for 0x%08x/%s", __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); return (0); } /* * If any sources are present, we must be able to fit at least * one in the trailing space of the tail packet's mbuf, * ideally more. */ minrec0len = sizeof(struct igmp_grouprec); if (record_has_sources) minrec0len += sizeof(in_addr_t); CTR4(KTR_IGMPV3, "%s: queueing %s for 0x%08x/%s", __func__, igmp_rec_type_to_str(type), ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); /* * Check if we have a packet in the tail of the queue for this * group into which the first group record for this group will fit. * Otherwise allocate a new packet. * Always allocate leading space for IP+RA_OPT+IGMP+REPORT. * Note: Group records for G/GSR query responses MUST be sent * in their own packet. */ m0 = mbufq_last(mq); if (!is_group_query && m0 != NULL && (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && (m0->m_pkthdr.len + minrec0len) < (ifp->if_mtu - IGMP_LEADINGSPACE)) { m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); m = m0; CTR1(KTR_IGMPV3, "%s: use existing packet", __func__); } else { if (mbufq_full(mq)) { CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); return (-ENOMEM); } m = NULL; m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); if (!is_state_change && !is_group_query) { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; } if (m == NULL) { m = m_gethdr(M_NOWAIT, MT_DATA); if (m) M_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) return (-ENOMEM); igmp_save_context(m, ifp); CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__); } /* * Append group record. * If we have sources, we don't know how many yet. */ ig.ig_type = type; ig.ig_datalen = 0; ig.ig_numsrc = 0; ig.ig_group = inm->inm_addr; if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(struct igmp_grouprec); /* * Append as many sources as will fit in the first packet. * If we are appending to a new packet, the chain allocation * may potentially use clusters; use m_getptr() in this case. * If we are appending to an existing packet, we need to obtain * a pointer to the group record after m_append(), in case a new * mbuf was allocated. * Only append sources which are in-mode at t1. If we are * transitioning to MCAST_UNDEFINED state on the group, do not * include source entries. * Only report recorded sources in our filter set when responding * to a group-source query. */ if (record_has_sources) { if (m == m0) { md = m_last(m); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + md->m_len - nbytes); } else { md = m_getptr(m, 0, &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); } msrcs = 0; RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) { CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, ims->ims_haddr); now = ims_get_mode(inm, ims, 1); CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { CTR1(KTR_IGMPV3, "%s: skip node", __func__); continue; } if (is_source_query && ims->ims_stp == 0) { CTR1(KTR_IGMPV3, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_IGMPV3, "%s: append node", __func__); naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(in_addr_t); ++msrcs; if (msrcs == m0srcs) break; } CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__, msrcs); pig->ig_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(in_addr_t)); } if (is_source_query && msrcs == 0) { CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__); if (m != m0) m_freem(m); return (0); } /* * We are good to go with first packet. */ if (m != m0) { CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__); m->m_pkthdr.vt_nrecs = 1; mbufq_enqueue(mq, m); } else m->m_pkthdr.vt_nrecs++; /* * No further work needed if no source list in packet(s). */ if (!record_has_sources) return (nbytes); /* * Whilst sources remain to be announced, we need to allocate * a new packet and fill out as many sources as will fit. * Always try for a cluster first. */ while (nims != NULL) { if (mbufq_full(mq)) { CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); return (-ENOMEM); } m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; if (m == NULL) { m = m_gethdr(M_NOWAIT, MT_DATA); if (m) M_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) return (-ENOMEM); igmp_save_context(m, ifp); md = m_getptr(m, 0, &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__); if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } m->m_pkthdr.vt_nrecs = 1; nbytes += sizeof(struct igmp_grouprec); m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); msrcs = 0; RB_FOREACH_FROM(ims, ip_msource_tree, nims) { CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, ims->ims_haddr); now = ims_get_mode(inm, ims, 1); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { CTR1(KTR_IGMPV3, "%s: skip node", __func__); continue; } if (is_source_query && ims->ims_stp == 0) { CTR1(KTR_IGMPV3, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_IGMPV3, "%s: append node", __func__); naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } ++msrcs; if (msrcs == m0srcs) break; } pig->ig_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(in_addr_t)); CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__); mbufq_enqueue(mq, m); } return (nbytes); } /* * Type used to mark record pass completion. * We exploit the fact we can cast to this easily from the * current filter modes on each ip_msource node. */ typedef enum { REC_NONE = 0x00, /* MCAST_UNDEFINED */ REC_ALLOW = 0x01, /* MCAST_INCLUDE */ REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ REC_FULL = REC_ALLOW | REC_BLOCK } rectype_t; /* * Enqueue an IGMPv3 filter list change to the given output queue. * * Source list filter state is held in an RB-tree. When the filter list * for a group is changed without changing its mode, we need to compute * the deltas between T0 and T1 for each source in the filter set, * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. * * As we may potentially queue two record types, and the entire R-B tree * needs to be walked at once, we break this out into its own function * so we can generate a tightly packed queue of packets. * * XXX This could be written to only use one tree walk, although that makes * serializing into the mbuf chains a bit harder. For now we do two walks * which makes things easier on us, and it may or may not be harder on * the L2 cache. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int igmp_v3_enqueue_filter_change(struct mbufq *mq, struct in_multi *inm) { static const int MINRECLEN = sizeof(struct igmp_grouprec) + sizeof(in_addr_t); struct ifnet *ifp; struct igmp_grouprec ig; struct igmp_grouprec *pig; struct ip_msource *ims, *nims; struct mbuf *m, *m0, *md; in_addr_t naddr; int m0srcs, nbytes, npbytes, off, rsrcs, schanged; #ifdef KTR int nallow, nblock; #endif uint8_t mode, now, then; rectype_t crt, drt, nrt; IN_MULTI_LIST_LOCK_ASSERT(); if (inm->inm_nsrc == 0 || (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) return (0); ifp = inm->inm_ifp; /* interface */ mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */ crt = REC_NONE; /* current group record type */ drt = REC_NONE; /* mask of completed group record types */ nrt = REC_NONE; /* record type for current node */ m0srcs = 0; /* # source which will fit in current mbuf chain */ nbytes = 0; /* # of bytes appended to group's state-change queue */ npbytes = 0; /* # of bytes appended this packet */ rsrcs = 0; /* # sources encoded in current record */ schanged = 0; /* # nodes encoded in overall filter change */ #ifdef KTR nallow = 0; /* # of source entries in ALLOW_NEW */ nblock = 0; /* # of source entries in BLOCK_OLD */ #endif nims = NULL; /* next tree node pointer */ /* * For each possible filter record mode. * The first kind of source we encounter tells us which * is the first kind of record we start appending. * If a node transitioned to UNDEFINED at t1, its mode is treated * as the inverse of the group's filter mode. */ while (drt != REC_FULL) { do { m0 = mbufq_last(mq); if (m0 != NULL && (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && (m0->m_pkthdr.len + MINRECLEN) < (ifp->if_mtu - IGMP_LEADINGSPACE)) { m = m0; m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); CTR1(KTR_IGMPV3, "%s: use previous packet", __func__); } else { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; if (m == NULL) { m = m_gethdr(M_NOWAIT, MT_DATA); if (m) M_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) { CTR1(KTR_IGMPV3, "%s: m_get*() failed", __func__); return (-ENOMEM); } m->m_pkthdr.vt_nrecs = 0; igmp_save_context(m, ifp); m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); npbytes = 0; CTR1(KTR_IGMPV3, "%s: allocated new packet", __func__); } /* * Append the IGMP group record header to the * current packet's data area. * Recalculate pointer to free space for next * group record, in case m_append() allocated * a new mbuf or cluster. */ memset(&ig, 0, sizeof(ig)); ig.ig_group = inm->inm_addr; if (!m_append(m, sizeof(ig), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed", __func__); return (-ENOMEM); } npbytes += sizeof(struct igmp_grouprec); if (m != m0) { /* new packet; offset in c hain */ md = m_getptr(m, npbytes - sizeof(struct igmp_grouprec), &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); } else { /* current packet; offset from last append */ md = m_last(m); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + md->m_len - sizeof(struct igmp_grouprec)); } /* * Begin walking the tree for this record type * pass, or continue from where we left off * previously if we had to allocate a new packet. * Only report deltas in-mode at t1. * We need not report included sources as allowed * if we are in inclusive mode on the group, * however the converse is not true. */ rsrcs = 0; if (nims == NULL) nims = RB_MIN(ip_msource_tree, &inm->inm_srcs); RB_FOREACH_FROM(ims, ip_msource_tree, nims) { CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, ims->ims_haddr); now = ims_get_mode(inm, ims, 1); then = ims_get_mode(inm, ims, 0); CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d", __func__, then, now); if (now == then) { CTR1(KTR_IGMPV3, "%s: skip unchanged", __func__); continue; } if (mode == MCAST_EXCLUDE && now == MCAST_INCLUDE) { CTR1(KTR_IGMPV3, "%s: skip IN src on EX group", __func__); continue; } nrt = (rectype_t)now; if (nrt == REC_NONE) nrt = (rectype_t)(~mode & REC_FULL); if (schanged++ == 0) { crt = nrt; } else if (crt != nrt) continue; naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed", __func__); return (-ENOMEM); } #ifdef KTR nallow += !!(crt == REC_ALLOW); nblock += !!(crt == REC_BLOCK); #endif if (++rsrcs == m0srcs) break; } /* * If we did not append any tree nodes on this * pass, back out of allocations. */ if (rsrcs == 0) { npbytes -= sizeof(struct igmp_grouprec); if (m != m0) { CTR1(KTR_IGMPV3, "%s: m_free(m)", __func__); m_freem(m); } else { CTR1(KTR_IGMPV3, "%s: m_adj(m, -ig)", __func__); m_adj(m, -((int)sizeof( struct igmp_grouprec))); } continue; } npbytes += (rsrcs * sizeof(in_addr_t)); if (crt == REC_ALLOW) pig->ig_type = IGMP_ALLOW_NEW_SOURCES; else if (crt == REC_BLOCK) pig->ig_type = IGMP_BLOCK_OLD_SOURCES; pig->ig_numsrc = htons(rsrcs); /* * Count the new group record, and enqueue this * packet if it wasn't already queued. */ m->m_pkthdr.vt_nrecs++; if (m != m0) mbufq_enqueue(mq, m); nbytes += npbytes; } while (nims != NULL); drt |= crt; crt = (~crt & REC_FULL); } CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, nallow, nblock); return (nbytes); } static int igmp_v3_merge_state_changes(struct in_multi *inm, struct mbufq *scq) { struct mbufq *gq; struct mbuf *m; /* pending state-change */ struct mbuf *m0; /* copy of pending state-change */ struct mbuf *mt; /* last state-change in packet */ int docopy, domerge; u_int recslen; docopy = 0; domerge = 0; recslen = 0; IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); /* * If there are further pending retransmissions, make a writable * copy of each queued state-change message before merging. */ if (inm->inm_scrv > 0) docopy = 1; gq = &inm->inm_scq; #ifdef KTR if (mbufq_first(gq) == NULL) { CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty", __func__, inm); } #endif m = mbufq_first(gq); while (m != NULL) { /* * Only merge the report into the current packet if * there is sufficient space to do so; an IGMPv3 report * packet may only contain 65,535 group records. * Always use a simple mbuf chain concatentation to do this, * as large state changes for single groups may have * allocated clusters. */ domerge = 0; mt = mbufq_last(scq); if (mt != NULL) { recslen = m_length(m, NULL); if ((mt->m_pkthdr.vt_nrecs + m->m_pkthdr.vt_nrecs <= IGMP_V3_REPORT_MAXRECS) && (mt->m_pkthdr.len + recslen <= (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) domerge = 1; } if (!domerge && mbufq_full(gq)) { CTR2(KTR_IGMPV3, "%s: outbound queue full, skipping whole packet %p", __func__, m); mt = m->m_nextpkt; if (!docopy) m_freem(m); m = mt; continue; } if (!docopy) { CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m); m0 = mbufq_dequeue(gq); m = m0->m_nextpkt; } else { CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m); m0 = m_dup(m, M_NOWAIT); if (m0 == NULL) return (ENOMEM); m0->m_nextpkt = NULL; m = m->m_nextpkt; } if (!domerge) { CTR3(KTR_IGMPV3, "%s: queueing %p to scq %p)", __func__, m0, scq); mbufq_enqueue(scq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ CTR3(KTR_IGMPV3, "%s: merging %p with scq tail %p)", __func__, m0, mt); mtl = m_last(mt); m0->m_flags &= ~M_PKTHDR; mt->m_pkthdr.len += recslen; mt->m_pkthdr.vt_nrecs += m0->m_pkthdr.vt_nrecs; mtl->m_next = m0; } } return (0); } /* * Respond to a pending IGMPv3 General Query. */ static void igmp_v3_dispatch_general_query(struct igmp_ifsoftc *igi) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in_multi *inm; int retval __unused, loop; IN_MULTI_LIST_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); NET_EPOCH_ASSERT(); KASSERT(igi->igi_version == IGMP_VERSION_3, ("%s: called when version %d", __func__, igi->igi_version)); /* * Check that there are some packets queued. If so, send them first. * For large number of groups the reply to general query can take * many packets, we should finish sending them before starting of * queuing the new reply. */ - if (mbufq_len(&igi->igi_gq) != 0) + if (!mbufq_empty(&igi->igi_gq)) goto send; ifp = igi->igi_ifp; CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = inm_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; KASSERT(ifp == inm->inm_ifp, ("%s: inconsistent ifp", __func__)); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; retval = igmp_v3_enqueue_group_record(&igi->igi_gq, inm, 0, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } send: loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop); /* * Slew transmission of bursts over 500ms intervals. */ if (mbufq_first(&igi->igi_gq) != NULL) { igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY( IGMP_RESPONSE_BURST_INTERVAL); V_interface_timers_running = 1; } } /* * Transmit the next pending IGMP message in the output queue. * * We get called from netisr_processqueue(). A mutex private to igmpoq * will be acquired and released around this routine. * * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. * MRT: Nothing needs to be done, as IGMP traffic is always local to * a link and uses a link-scope multicast address. */ static void igmp_intr(struct mbuf *m) { struct ip_moptions imo; struct ifnet *ifp; struct mbuf *ipopts, *m0; int error; uint32_t ifindex; CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m); /* * Set VNET image pointer from enqueued mbuf chain * before doing anything else. Whilst we use interface * indexes to guard against interface detach, they are * unique to each VIMAGE and must be retrieved. */ CURVNET_SET((struct vnet *)(m->m_pkthdr.PH_loc.ptr)); ifindex = igmp_restore_context(m); /* * Check if the ifnet still exists. This limits the scope of * any race in the absence of a global ifp lock for low cost * (an array lookup). */ ifp = ifnet_byindex(ifindex); if (ifp == NULL) { CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.", __func__, m, ifindex); m_freem(m); IPSTAT_INC(ips_noroute); goto out; } ipopts = V_igmp_sendra ? m_raopt : NULL; imo.imo_multicast_ttl = 1; imo.imo_multicast_vif = -1; imo.imo_multicast_loop = (V_ip_mrouter != NULL); /* * If the user requested that IGMP traffic be explicitly * redirected to the loopback interface (e.g. they are running a * MANET interface and the routing protocol needs to see the * updates), handle this now. */ if (m->m_flags & M_IGMP_LOOP) imo.imo_multicast_ifp = V_loif; else imo.imo_multicast_ifp = ifp; if (m->m_flags & M_IGMPV2) { m0 = m; } else { m0 = igmp_v3_encap_report(ifp, m); if (m0 == NULL) { CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m); m_freem(m); IPSTAT_INC(ips_odropped); goto out; } } igmp_scrub_context(m0); m_clrprotoflags(m); m0->m_pkthdr.rcvif = V_loif; #ifdef MAC mac_netinet_igmp_send(ifp, m0); #endif error = ip_output(m0, ipopts, NULL, 0, &imo, NULL); if (error) { CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error); goto out; } IGMPSTAT_INC(igps_snd_reports); out: /* * We must restore the existing vnet pointer before * continuing as we are run from netisr context. */ CURVNET_RESTORE(); } /* * Encapsulate an IGMPv3 report. * * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf * chain has already had its IP/IGMPv3 header prepended. In this case * the function will not attempt to prepend; the lengths and checksums * will however be re-computed. * * Returns a pointer to the new mbuf chain head, or NULL if the * allocation failed. */ static struct mbuf * igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m) { struct igmp_report *igmp; struct ip *ip; int hdrlen, igmpreclen; KASSERT((m->m_flags & M_PKTHDR), ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); igmpreclen = m_length(m, NULL); hdrlen = sizeof(struct ip) + sizeof(struct igmp_report); if (m->m_flags & M_IGMPV3_HDR) { igmpreclen -= hdrlen; } else { M_PREPEND(m, hdrlen, M_NOWAIT); if (m == NULL) return (NULL); m->m_flags |= M_IGMPV3_HDR; } CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen); m->m_data += sizeof(struct ip); m->m_len -= sizeof(struct ip); igmp = mtod(m, struct igmp_report *); igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT; igmp->ir_rsv1 = 0; igmp->ir_rsv2 = 0; igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs); igmp->ir_cksum = 0; igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen); m->m_pkthdr.vt_nrecs = 0; m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); ip = mtod(m, struct ip *); ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; ip->ip_len = htons(hdrlen + igmpreclen); ip->ip_off = htons(IP_DF); ip->ip_p = IPPROTO_IGMP; ip->ip_sum = 0; ip->ip_src.s_addr = INADDR_ANY; if (m->m_flags & M_IGMP_LOOP) { struct in_ifaddr *ia; IFP_TO_IA(ifp, ia); if (ia != NULL) ip->ip_src = ia->ia_addr.sin_addr; } ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP); return (m); } #ifdef KTR static char * igmp_rec_type_to_str(const int type) { switch (type) { case IGMP_CHANGE_TO_EXCLUDE_MODE: return "TO_EX"; break; case IGMP_CHANGE_TO_INCLUDE_MODE: return "TO_IN"; break; case IGMP_MODE_IS_EXCLUDE: return "MODE_EX"; break; case IGMP_MODE_IS_INCLUDE: return "MODE_IN"; break; case IGMP_ALLOW_NEW_SOURCES: return "ALLOW_NEW"; break; case IGMP_BLOCK_OLD_SOURCES: return "BLOCK_OLD"; break; default: break; } return "unknown"; } #endif #ifdef VIMAGE static void vnet_igmp_init(const void *unused __unused) { netisr_register_vnet(&igmp_nh); } VNET_SYSINIT(vnet_igmp_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_igmp_init, NULL); static void vnet_igmp_uninit(const void *unused __unused) { /* This can happen when we shutdown the entire network stack. */ CTR1(KTR_IGMPV3, "%s: tearing down", __func__); netisr_unregister_vnet(&igmp_nh); } VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_igmp_uninit, NULL); #endif #ifdef DDB DB_SHOW_COMMAND(igi_list, db_show_igi_list) { struct igmp_ifsoftc *igi, *tigi; LIST_HEAD(_igi_list, igmp_ifsoftc) *igi_head; if (!have_addr) { db_printf("usage: show igi_list \n"); return; } igi_head = (struct _igi_list *)addr; LIST_FOREACH_SAFE(igi, igi_head, igi_link, tigi) { db_printf("igmp_ifsoftc %p:\n", igi); db_printf(" ifp %p\n", igi->igi_ifp); db_printf(" version %u\n", igi->igi_version); db_printf(" v1_timer %u\n", igi->igi_v1_timer); db_printf(" v2_timer %u\n", igi->igi_v2_timer); db_printf(" v3_timer %u\n", igi->igi_v3_timer); db_printf(" flags %#x\n", igi->igi_flags); db_printf(" rv %u\n", igi->igi_rv); db_printf(" qi %u\n", igi->igi_qi); db_printf(" qri %u\n", igi->igi_qri); db_printf(" uri %u\n", igi->igi_uri); /* struct mbufq igi_gq; */ db_printf("\n"); } } #endif static int igmp_modevent(module_t mod, int type, void *unused __unused) { switch (type) { case MOD_LOAD: CTR1(KTR_IGMPV3, "%s: initializing", __func__); IGMP_LOCK_INIT(); m_raopt = igmp_ra_alloc(); netisr_register(&igmp_nh); callout_init(&igmpslow_callout, 1); callout_reset(&igmpslow_callout, hz / IGMP_SLOWHZ, igmp_slowtimo, NULL); callout_init(&igmpfast_callout, 1); callout_reset(&igmpfast_callout, hz / IGMP_FASTHZ, igmp_fasttimo, NULL); break; case MOD_UNLOAD: CTR1(KTR_IGMPV3, "%s: tearing down", __func__); netisr_unregister(&igmp_nh); m_free(m_raopt); m_raopt = NULL; IGMP_LOCK_DESTROY(); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t igmp_mod = { "igmp", igmp_modevent, 0 }; DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE); diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c index 309c9d11204f..6a64d1830afd 100644 --- a/sys/netinet6/mld6.c +++ b/sys/netinet6/mld6.c @@ -1,3328 +1,3328 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009 Bruce Simpson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ */ /*- * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_MLD #define KTR_MLD KTR_INET6 #endif static void mli_delete_locked(struct ifnet *); static void mld_dispatch_packet(struct mbuf *); static void mld_dispatch_queue(struct mbufq *, int); static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *); static void mld_fasttimo_vnet(struct in6_multi_head *inmh); static int mld_handle_state_change(struct in6_multi *, struct mld_ifsoftc *); static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *, const int); #ifdef KTR static char * mld_rec_type_to_str(const int); #endif static void mld_set_version(struct mld_ifsoftc *, const int); static void mld_slowtimo_vnet(void); static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, /*const*/ struct mld_hdr *); static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, /*const*/ struct mld_hdr *); static void mld_v1_process_group_timer(struct in6_multi_head *, struct in6_multi *); static void mld_v1_process_querier_timers(struct mld_ifsoftc *); static int mld_v1_transmit_report(struct in6_multi *, const int); static void mld_v1_update_group(struct in6_multi *, const int); static void mld_v2_cancel_link_timers(struct mld_ifsoftc *); static void mld_v2_dispatch_general_query(struct mld_ifsoftc *); static struct mbuf * mld_v2_encap_report(struct ifnet *, struct mbuf *); static int mld_v2_enqueue_filter_change(struct mbufq *, struct in6_multi *); static int mld_v2_enqueue_group_record(struct mbufq *, struct in6_multi *, const int, const int, const int, const int); static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, struct mbuf *, struct mldv2_query *, const int, const int); static int mld_v2_merge_state_changes(struct in6_multi *, struct mbufq *); static void mld_v2_process_group_timers(struct in6_multi_head *, struct mbufq *, struct mbufq *, struct in6_multi *, const int); static int mld_v2_process_group_query(struct in6_multi *, struct mld_ifsoftc *mli, int, struct mbuf *, struct mldv2_query *, const int); static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); /* * Normative references: RFC 2710, RFC 3590, RFC 3810. * * Locking: * * The MLD subsystem lock ends up being system-wide for the moment, * but could be per-VIMAGE later on. * * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. * Any may be taken independently; if any are held at the same * time, the above lock order must be followed. * * IN6_MULTI_LOCK covers in_multi. * * MLD_LOCK covers per-link state and any global variables in this file. * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of * per-link state iterators. * * XXX LOR PREVENTION * A special case for IPv6 is the in6_setscope() routine. ip6_output() * will not accept an ifp; it wants an embedded scope ID, unlike * ip_output(), which happily takes the ifp given to it. The embedded * scope ID is only used by MLD to select the outgoing interface. * * During interface attach and detach, MLD will take MLD_LOCK *after* * the IF_AFDATA_LOCK. * As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call * it with MLD_LOCK held without triggering an LOR. A netisr with indirect * dispatch could work around this, but we'd rather not do that, as it * can introduce other races. * * As such, we exploit the fact that the scope ID is just the interface * index, and embed it in the IPv6 destination address accordingly. * This is potentially NOT VALID for MLDv1 reports, as they * are always sent to the multicast group itself; as MLDv2 * reports are always sent to ff02::16, this is not an issue * when MLDv2 is in use. * * This does not however eliminate the LOR when ip6_output() itself * calls in6_setscope() internally whilst MLD_LOCK is held. This will * trigger a LOR warning in WITNESS when the ifnet is detached. * * The right answer is probably to make IF_AFDATA_LOCK an rwlock, given * how it's used across the network stack. Here we're simply exploiting * the fact that MLD runs at a similar layer in the stack to scope6.c. * * VIMAGE: * * Each in6_multi corresponds to an ifp, and each ifp corresponds * to a vnet in ifp->if_vnet. */ static struct mtx mld_mtx; static MALLOC_DEFINE(M_MLD, "mld", "mld state"); #define MLD_EMBEDSCOPE(pin6, zoneid) \ if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \ IN6_IS_ADDR_MC_INTFACELOCAL(pin6)) \ (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) \ /* * VIMAGE-wide globals. */ VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0}; VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head); VNET_DEFINE_STATIC(int, interface_timers_running6); VNET_DEFINE_STATIC(int, state_change_timers_running6); VNET_DEFINE_STATIC(int, current_state_timers_running6); #define V_mld_gsrdelay VNET(mld_gsrdelay) #define V_mli_head VNET(mli_head) #define V_interface_timers_running6 VNET(interface_timers_running6) #define V_state_change_timers_running6 VNET(state_change_timers_running6) #define V_current_state_timers_running6 VNET(current_state_timers_running6) SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "IPv6 Multicast Listener Discovery"); /* * Virtualized sysctls. */ SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I", "Rate limit for MLDv2 Group-and-Source queries in seconds"); /* * Non-virtualized sysctls. */ static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo, "Per-interface MLDv2 state"); static int mld_v1enable = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN, &mld_v1enable, 0, "Enable fallback to MLDv1"); static int mld_v2enable = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN, &mld_v2enable, 0, "Enable MLDv2"); static int mld_use_allow = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN, &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); /* * Packed Router Alert option structure declaration. */ struct mld_raopt { struct ip6_hbh hbh; struct ip6_opt pad; struct ip6_opt_router ra; } __packed; /* * Router Alert hop-by-hop option header. */ static struct mld_raopt mld_ra = { .hbh = { 0, 0 }, .pad = { .ip6o_type = IP6OPT_PADN, 0 }, .ra = { .ip6or_type = IP6OPT_ROUTER_ALERT, .ip6or_len = IP6OPT_RTALERT_LEN - 2, .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF), .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF) } }; static struct ip6_pktopts mld_po; static __inline void mld_save_context(struct mbuf *m, struct ifnet *ifp) { #ifdef VIMAGE m->m_pkthdr.PH_loc.ptr = ifp->if_vnet; #endif /* VIMAGE */ m->m_pkthdr.rcvif = ifp; m->m_pkthdr.flowid = ifp->if_index; } static __inline void mld_scrub_context(struct mbuf *m) { m->m_pkthdr.PH_loc.ptr = NULL; m->m_pkthdr.flowid = 0; } /* * Restore context from a queued output chain. * Return saved ifindex. * * VIMAGE: The assertion is there to make sure that we * actually called CURVNET_SET() with what's in the mbuf chain. */ static __inline uint32_t mld_restore_context(struct mbuf *m) { #if defined(VIMAGE) && defined(INVARIANTS) KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr, ("%s: called when curvnet was not restored: cuvnet %p m ptr %p", __func__, curvnet, m->m_pkthdr.PH_loc.ptr)); #endif return (m->m_pkthdr.flowid); } /* * Retrieve or set threshold between group-source queries in seconds. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by MLD lock. */ static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS) { int error; int i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); MLD_LOCK(); i = V_mld_gsrdelay.tv_sec; error = sysctl_handle_int(oidp, &i, 0, req); if (error || !req->newptr) goto out_locked; if (i < -1 || i >= 60) { error = EINVAL; goto out_locked; } CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d", V_mld_gsrdelay.tv_sec, i); V_mld_gsrdelay.tv_sec = i; out_locked: MLD_UNLOCK(); return (error); } /* * Expose struct mld_ifsoftc to userland, keyed by ifindex. * For use by ifmcstat(8). * * VIMAGE: Assume curvnet set by caller. The node handler itself * is not directly virtualized. */ static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) { struct epoch_tracker et; int *name; int error; u_int namelen; struct ifnet *ifp; struct mld_ifsoftc *mli; name = (int *)arg1; namelen = arg2; if (req->newptr != NULL) return (EPERM); if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo)); if (error) return (error); IN6_MULTI_LOCK(); IN6_MULTI_LIST_LOCK(); MLD_LOCK(); NET_EPOCH_ENTER(et); error = ENOENT; ifp = ifnet_byindex(name[0]); if (ifp == NULL) goto out_locked; LIST_FOREACH(mli, &V_mli_head, mli_link) { if (ifp == mli->mli_ifp) { struct mld_ifinfo info; info.mli_version = mli->mli_version; info.mli_v1_timer = mli->mli_v1_timer; info.mli_v2_timer = mli->mli_v2_timer; info.mli_flags = mli->mli_flags; info.mli_rv = mli->mli_rv; info.mli_qi = mli->mli_qi; info.mli_qri = mli->mli_qri; info.mli_uri = mli->mli_uri; error = SYSCTL_OUT(req, &info, sizeof(info)); break; } } out_locked: NET_EPOCH_EXIT(et); MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); IN6_MULTI_UNLOCK(); return (error); } /* * Dispatch an entire queue of pending packet chains. * VIMAGE: Assumes the vnet pointer has been set. */ static void mld_dispatch_queue(struct mbufq *mq, int limit) { struct mbuf *m; while ((m = mbufq_dequeue(mq)) != NULL) { CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m); mld_dispatch_packet(m); if (--limit == 0) break; } } /* * Filter outgoing MLD report state by group. * * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1) * and node-local addresses. However, kernel and socket consumers * always embed the KAME scope ID in the address provided, so strip it * when performing comparison. * Note: This is not the same as the *multicast* scope. * * Return zero if the given group is one for which MLD reports * should be suppressed, or non-zero if reports should be issued. */ static __inline int mld_is_addr_reported(const struct in6_addr *addr) { KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__)); if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) return (0); if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) { struct in6_addr tmp = *addr; in6_clearscope(&tmp); if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) return (0); } return (1); } /* * Attach MLD when PF_INET6 is attached to an interface. Assumes that the * current VNET is set by the caller. */ struct mld_ifsoftc * mld_domifattach(struct ifnet *ifp) { struct mld_ifsoftc *mli; CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_WAITOK | M_ZERO); mli->mli_ifp = ifp; mli->mli_version = MLD_VERSION_2; mli->mli_flags = 0; mli->mli_rv = MLD_RV_INIT; mli->mli_qi = MLD_QI_INIT; mli->mli_qri = MLD_QRI_INIT; mli->mli_uri = MLD_URI_INIT; mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); if ((ifp->if_flags & IFF_MULTICAST) == 0) mli->mli_flags |= MLIF_SILENT; if (mld_use_allow) mli->mli_flags |= MLIF_USEALLOW; MLD_LOCK(); LIST_INSERT_HEAD(&V_mli_head, mli, mli_link); MLD_UNLOCK(); return (mli); } /* * Hook for ifdetach. * * NOTE: Some finalization tasks need to run before the protocol domain * is detached, but also before the link layer does its cleanup. * Run before link-layer cleanup; cleanup groups, but do not free MLD state. * * SMPng: Caller must hold IN6_MULTI_LOCK(). * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator. * XXX This routine is also bitten by unlocked ifma_protospec access. */ void mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh) { struct epoch_tracker et; struct mld_ifsoftc *mli; struct ifmultiaddr *ifma; struct in6_multi *inm; CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK(); mli = MLD_IFINFO(ifp); IF_ADDR_WLOCK(ifp); /* * Extract list of in6_multi associated with the detaching ifp * which the PF_INET6 layer is about to release. */ NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; in6m_disconnect_locked(inmh, inm); if (mli->mli_version == MLD_VERSION_2) { in6m_clear_recorded(inm); /* * We need to release the final reference held * for issuing the INCLUDE {}. */ if (inm->in6m_state == MLD_LEAVING_MEMBER) { inm->in6m_state = MLD_NOT_MEMBER; in6m_rele_locked(inmh, inm); } } } NET_EPOCH_EXIT(et); IF_ADDR_WUNLOCK(ifp); MLD_UNLOCK(); } /* * Hook for domifdetach. * Runs after link-layer cleanup; free MLD state. * * SMPng: Normally called with IF_AFDATA_LOCK held. */ void mld_domifdetach(struct ifnet *ifp) { CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); MLD_LOCK(); mli_delete_locked(ifp); MLD_UNLOCK(); } static void mli_delete_locked(struct ifnet *ifp) { struct mld_ifsoftc *mli, *tmli; CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)", __func__, ifp, if_name(ifp)); MLD_LOCK_ASSERT(); LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) { if (mli->mli_ifp == ifp) { /* * Free deferred General Query responses. */ mbufq_drain(&mli->mli_gq); LIST_REMOVE(mli, mli_link); free(mli, M_MLD); return; } } } /* * Process a received MLDv1 general or address-specific query. * Assumes that the query header has been pulled up to sizeof(mld_hdr). * * NOTE: Can't be fully const correct as we temporarily embed scope ID in * mld_addr. This is OK as we own the mbuf chain. */ static int mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { struct ifmultiaddr *ifma; struct mld_ifsoftc *mli; struct in6_multi *inm; int is_general_query; uint16_t timer; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif NET_EPOCH_ASSERT(); is_general_query = 0; if (!mld_v1enable) { CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); return (0); } /* * RFC3810 Section 6.2: MLD queries must originate from * a router's link-local address. */ if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (0); } /* * Do address field validation upfront before we accept * the query. */ if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { /* * MLDv1 General Query. * If this was not sent to the all-nodes group, ignore it. */ struct in6_addr dst; dst = ip6->ip6_dst; in6_clearscope(&dst); if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) return (EINVAL); is_general_query = 1; } else { /* * Embed scope ID of receiving interface in MLD query for * lookup whilst we don't hold other locks. */ in6_setscope(&mld->mld_addr, ifp, NULL); } IN6_MULTI_LIST_LOCK(); MLD_LOCK(); /* * Switch to MLDv1 host compatibility mode. */ mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); mld_set_version(mli, MLD_VERSION_1); timer = (ntohs(mld->mld_maxdelay) * MLD_FASTHZ) / MLD_TIMER_SCALE; if (timer == 0) timer = 1; if (is_general_query) { /* * For each reporting group joined on this * interface, kick the report timer. */ CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", ifp, if_name(ifp)); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; mld_v1_update_group(inm, timer); } } else { /* * MLDv1 Group-Specific Query. * If this is a group-specific MLDv1 query, we need only * look up the single group to process it. */ inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm != NULL) { CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); mld_v1_update_group(inm, timer); } /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); } MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); return (0); } /* * Update the report timer on a group in response to an MLDv1 query. * * If we are becoming the reporting member for this group, start the timer. * If we already are the reporting member for this group, and timer is * below the threshold, reset it. * * We may be updating the group for the first time since we switched * to MLDv2. If we are, then we must clear any recorded source lists, * and transition to REPORTING state; the group timer is overloaded * for group and group-source query responses. * * Unlike MLDv2, the delay per group should be jittered * to avoid bursts of MLDv1 reports. */ static void mld_v1_update_group(struct in6_multi *inm, const int timer) { #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp), timer); IN6_MULTI_LIST_LOCK_ASSERT(); switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: break; case MLD_REPORTING_MEMBER: if (inm->in6m_timer != 0 && inm->in6m_timer <= timer) { CTR1(KTR_MLD, "%s: REPORTING and timer running, " "skipping.", __func__); break; } /* FALLTHROUGH */ case MLD_SG_QUERY_PENDING_MEMBER: case MLD_G_QUERY_PENDING_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_AWAKENING_MEMBER: CTR1(KTR_MLD, "%s: ->REPORTING", __func__); inm->in6m_state = MLD_REPORTING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; break; case MLD_SLEEPING_MEMBER: CTR1(KTR_MLD, "%s: ->AWAKENING", __func__); inm->in6m_state = MLD_AWAKENING_MEMBER; break; case MLD_LEAVING_MEMBER: break; } } /* * Process a received MLDv2 general, group-specific or * group-and-source-specific query. * * Assumes that mld points to a struct mldv2_query which is stored in * contiguous memory. * * Return 0 if successful, otherwise an appropriate error code is returned. */ static int mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len) { struct mld_ifsoftc *mli; struct in6_multi *inm; uint32_t maxdelay, nsrc, qqi; int is_general_query; uint16_t timer; uint8_t qrv; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif NET_EPOCH_ASSERT(); if (!mld_v2enable) { CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (0); } /* * RFC3810 Section 6.2: MLD queries must originate from * a router's link-local address. */ if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (0); } is_general_query = 0; CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ if (maxdelay >= 32768) { maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << (MLD_MRC_EXP(maxdelay) + 3); } timer = (maxdelay * MLD_FASTHZ) / MLD_TIMER_SCALE; if (timer == 0) timer = 1; qrv = MLD_QRV(mld->mld_misc); if (qrv < 2) { CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__, qrv, MLD_RV_INIT); qrv = MLD_RV_INIT; } qqi = mld->mld_qqi; if (qqi >= 128) { qqi = MLD_QQIC_MANT(mld->mld_qqi) << (MLD_QQIC_EXP(mld->mld_qqi) + 3); } nsrc = ntohs(mld->mld_numsrc); if (nsrc > MLD_MAX_GS_SOURCES) return (EMSGSIZE); if (icmp6len < sizeof(struct mldv2_query) + (nsrc * sizeof(struct in6_addr))) return (EMSGSIZE); /* * Do further input validation upfront to avoid resetting timers * should we need to discard this query. */ if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { /* * A general query with a source list has undefined * behaviour; discard it. */ if (nsrc > 0) return (EINVAL); is_general_query = 1; } else { /* * Embed scope ID of receiving interface in MLD query for * lookup whilst we don't hold other locks (due to KAME * locking lameness). We own this mbuf chain just now. */ in6_setscope(&mld->mld_addr, ifp, NULL); } IN6_MULTI_LIST_LOCK(); MLD_LOCK(); mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); /* * Discard the v2 query if we're in Compatibility Mode. * The RFC is pretty clear that hosts need to stay in MLDv1 mode * until the Old Version Querier Present timer expires. */ if (mli->mli_version != MLD_VERSION_2) goto out_locked; mld_set_version(mli, MLD_VERSION_2); mli->mli_rv = qrv; mli->mli_qi = qqi; mli->mli_qri = maxdelay; CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi, maxdelay); if (is_general_query) { /* * MLDv2 General Query. * * Schedule a current-state report on this ifp for * all groups, possibly containing source lists. * * If there is a pending General Query response * scheduled earlier than the selected delay, do * not schedule any other reports. * Otherwise, reset the interface timer. */ CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", ifp, if_name(ifp)); if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); V_interface_timers_running6 = 1; } } else { /* * MLDv2 Group-specific or Group-and-source-specific Query. * * Group-source-specific queries are throttled on * a per-group basis to defeat denial-of-service attempts. * Queries for groups we are not a member of on this * link are simply ignored. */ inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm == NULL) goto out_locked; if (nsrc > 0) { if (!ratecheck(&inm->in6m_lastgsrtv, &V_mld_gsrdelay)) { CTR1(KTR_MLD, "%s: GS query throttled.", __func__); goto out_locked; } } CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)", ifp, if_name(ifp)); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no * further report need be scheduled. * Otherwise, prepare to respond to the * group-specific or group-and-source query. */ if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) mld_v2_process_group_query(inm, mli, timer, m, mld, off); /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); } out_locked: MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); return (0); } /* * Process a received MLDv2 group-specific or group-and-source-specific * query. * Return <0 if any error occurred. Currently this is ignored. */ static int mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, int timer, struct mbuf *m0, struct mldv2_query *mld, const int off) { int retval; uint16_t nsrc; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); retval = 0; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_LAZY_MEMBER: case MLD_AWAKENING_MEMBER: case MLD_IDLE_MEMBER: case MLD_LEAVING_MEMBER: return (retval); break; case MLD_REPORTING_MEMBER: case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: break; } nsrc = ntohs(mld->mld_numsrc); /* Length should be checked by calling function. */ KASSERT((m0->m_flags & M_PKTHDR) == 0 || m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) + nsrc * sizeof(struct in6_addr), ("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)", m0->m_pkthdr.len, off + sizeof(struct mldv2_query) + nsrc * sizeof(struct in6_addr), m0)); /* * Deal with group-specific queries upfront. * If any group query is already pending, purge any recorded * source-list state if it exists, and schedule a query response * for this group-specific query. */ if (nsrc == 0) { if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) { in6m_clear_recorded(inm); timer = min(inm->in6m_timer, timer); } inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; return (retval); } /* * Deal with the case where a group-and-source-specific query has * been received but a group-specific query is already pending. */ if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) { timer = min(inm->in6m_timer, timer); inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; return (retval); } /* * Finally, deal with the case where a group-and-source-specific * query has been received, where a response to a previous g-s-r * query exists, or none exists. * In this case, we need to parse the source-list which the Querier * has provided us with and check if we have any source list filter * entries at T1 for these sources. If we do not, there is no need * schedule a report and the query may be dropped. * If we do, we must record them and schedule a current-state * report for those sources. */ if (inm->in6m_nsrc > 0) { struct in6_addr srcaddr; int i, nrecorded; int soff; soff = off + sizeof(struct mldv2_query); nrecorded = 0; for (i = 0; i < nsrc; i++) { m_copydata(m0, soff, sizeof(struct in6_addr), (caddr_t)&srcaddr); retval = in6m_record_source(inm, &srcaddr); if (retval < 0) break; nrecorded += retval; soff += sizeof(struct in6_addr); } if (nrecorded > 0) { CTR1(KTR_MLD, "%s: schedule response to SG query", __func__); inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; } } return (retval); } /* * Process a received MLDv1 host membership report. * Assumes mld points to mld_hdr in pulled up mbuf chain. * * NOTE: Can't be fully const correct as we temporarily embed scope ID in * mld_addr. This is OK as we own the mbuf chain. */ static int mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { struct in6_addr src, dst; struct in6_ifaddr *ia; struct in6_multi *inm; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif NET_EPOCH_ASSERT(); if (!mld_v1enable) { CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); return (0); } if (ifp->if_flags & IFF_LOOPBACK) return (0); /* * MLDv1 reports must originate from a host's link-local address, * or the unspecified address (when booting). */ src = ip6->ip6_src; in6_clearscope(&src); if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (EINVAL); } /* * RFC2710 Section 4: MLDv1 reports must pertain to a multicast * group, and must be directed to the group itself. */ dst = ip6->ip6_dst; in6_clearscope(&dst); if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_dst), ifp, if_name(ifp)); return (EINVAL); } /* * Make sure we don't hear our own membership report, as fast * leave requires knowing that we are the only member of a * group. Assume we used the link-local address if available, * otherwise look for ::. * * XXX Note that scope ID comparison is needed for the address * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be * performed for the on-wire address. */ ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) || (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) { if (ia != NULL) ifa_free(&ia->ia_ifa); return (0); } if (ia != NULL) ifa_free(&ia->ia_ifa); CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); /* * Embed scope ID of receiving interface in MLD query for lookup * whilst we don't hold other locks (due to KAME locking lameness). */ if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) in6_setscope(&mld->mld_addr, ifp, NULL); IN6_MULTI_LIST_LOCK(); MLD_LOCK(); /* * MLDv1 report suppression. * If we are a member of this group, and our membership should be * reported, and our group timer is pending or about to be reset, * stop our group timer by transitioning to the 'lazy' state. */ inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm != NULL) { struct mld_ifsoftc *mli; mli = inm->in6m_mli; KASSERT(mli != NULL, ("%s: no mli for ifp %p", __func__, ifp)); /* * If we are in MLDv2 host mode, do not allow the * other host's MLDv1 report to suppress our reports. */ if (mli->mli_version == MLD_VERSION_2) goto out_locked; inm->in6m_timer = 0; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_SLEEPING_MEMBER: break; case MLD_REPORTING_MEMBER: case MLD_IDLE_MEMBER: case MLD_AWAKENING_MEMBER: CTR3(KTR_MLD, "report suppressed for %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); case MLD_LAZY_MEMBER: inm->in6m_state = MLD_LAZY_MEMBER; break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: case MLD_LEAVING_MEMBER: break; } } out_locked: MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); return (0); } /* * MLD input path. * * Assume query messages which fit in a single ICMPv6 message header * have been pulled up. * Assume that userland will want to see the message, even if it * otherwise fails kernel input validation; do not free it. * Pullup may however free the mbuf chain m if it fails. * * Return IPPROTO_DONE if we freed m. Otherwise, return 0. */ int mld_input(struct mbuf **mp, int off, int icmp6len) { struct ifnet *ifp; struct ip6_hdr *ip6; struct mbuf *m; struct mld_hdr *mld; int mldlen; m = *mp; CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off); ifp = m->m_pkthdr.rcvif; /* Pullup to appropriate size. */ if (m->m_len < off + sizeof(*mld)) { m = m_pullup(m, off + sizeof(*mld)); if (m == NULL) { ICMP6STAT_INC(icp6s_badlen); return (IPPROTO_DONE); } } mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); if (mld->mld_type == MLD_LISTENER_QUERY && icmp6len >= sizeof(struct mldv2_query)) { mldlen = sizeof(struct mldv2_query); } else { mldlen = sizeof(struct mld_hdr); } if (m->m_len < off + mldlen) { m = m_pullup(m, off + mldlen); if (m == NULL) { ICMP6STAT_INC(icp6s_badlen); return (IPPROTO_DONE); } } *mp = m; ip6 = mtod(m, struct ip6_hdr *); mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); /* * Userland needs to see all of this traffic for implementing * the endpoint discovery portion of multicast routing. */ switch (mld->mld_type) { case MLD_LISTENER_QUERY: icmp6_ifstat_inc(ifp, ifs6_in_mldquery); if (icmp6len == sizeof(struct mld_hdr)) { if (mld_v1_input_query(ifp, ip6, mld) != 0) return (0); } else if (icmp6len >= sizeof(struct mldv2_query)) { if (mld_v2_input_query(ifp, ip6, m, (struct mldv2_query *)mld, off, icmp6len) != 0) return (0); } break; case MLD_LISTENER_REPORT: icmp6_ifstat_inc(ifp, ifs6_in_mldreport); if (mld_v1_input_report(ifp, ip6, mld) != 0) return (0); break; case MLDV2_LISTENER_REPORT: icmp6_ifstat_inc(ifp, ifs6_in_mldreport); break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(ifp, ifs6_in_mlddone); break; default: break; } return (0); } /* * Fast timeout handler (global). * VIMAGE: Timeout handlers are expected to service all vimages. */ static struct callout mldfast_callout; static void mld_fasttimo(void *arg __unused) { struct epoch_tracker et; struct in6_multi_head inmh; VNET_ITERATOR_DECL(vnet_iter); SLIST_INIT(&inmh); NET_EPOCH_ENTER(et); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_fasttimo_vnet(&inmh); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); NET_EPOCH_EXIT(et); in6m_release_list_deferred(&inmh); callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL); } /* * Fast timeout handler (per-vnet). * * VIMAGE: Assume caller has set up our curvnet. */ static void mld_fasttimo_vnet(struct in6_multi_head *inmh) { struct mbufq scq; /* State-change packets */ struct mbufq qrq; /* Query response packets */ struct ifnet *ifp; struct mld_ifsoftc *mli; struct ifmultiaddr *ifma; struct in6_multi *inm; int uri_fasthz; uri_fasthz = 0; /* * Quick check to see if any work needs to be done, in order to * minimize the overhead of fasttimo processing. * SMPng: XXX Unlocked reads. */ if (!V_current_state_timers_running6 && !V_interface_timers_running6 && !V_state_change_timers_running6) return; IN6_MULTI_LIST_LOCK(); MLD_LOCK(); /* * MLDv2 General Query response timer processing. */ if (V_interface_timers_running6) { CTR1(KTR_MLD, "%s: interface timers running", __func__); V_interface_timers_running6 = 0; LIST_FOREACH(mli, &V_mli_head, mli_link) { if (mli->mli_v2_timer == 0) { /* Do nothing. */ } else if (--mli->mli_v2_timer == 0) { mld_v2_dispatch_general_query(mli); } else { V_interface_timers_running6 = 1; } } } if (!V_current_state_timers_running6 && !V_state_change_timers_running6) goto out_locked; V_current_state_timers_running6 = 0; V_state_change_timers_running6 = 0; CTR1(KTR_MLD, "%s: state change timers running", __func__); /* * MLD host report and state-change timer processing. * Note: Processing a v2 group timer may remove a node. */ LIST_FOREACH(mli, &V_mli_head, mli_link) { ifp = mli->mli_ifp; if (mli->mli_version == MLD_VERSION_2) { uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * MLD_FASTHZ); mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS); mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS); } IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; switch (mli->mli_version) { case MLD_VERSION_1: mld_v1_process_group_timer(inmh, inm); break; case MLD_VERSION_2: mld_v2_process_group_timers(inmh, &qrq, &scq, inm, uri_fasthz); break; } } IF_ADDR_WUNLOCK(ifp); switch (mli->mli_version) { case MLD_VERSION_1: /* * Transmit reports for this lifecycle. This * is done while not holding IF_ADDR_LOCK * since this can call * in6ifa_ifpforlinklocal() which locks * IF_ADDR_LOCK internally as well as * ip6_output() to transmit a packet. */ while ((inm = SLIST_FIRST(inmh)) != NULL) { SLIST_REMOVE_HEAD(inmh, in6m_defer); (void)mld_v1_transmit_report(inm, MLD_LISTENER_REPORT); } break; case MLD_VERSION_2: mld_dispatch_queue(&qrq, 0); mld_dispatch_queue(&scq, 0); break; } } out_locked: MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); } /* * Update host report group timer. * Will update the global pending timer flags. */ static void mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm) { int report_timer_expired; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); if (inm->in6m_timer == 0) { report_timer_expired = 0; } else if (--inm->in6m_timer == 0) { report_timer_expired = 1; } else { V_current_state_timers_running6 = 1; return; } switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: break; case MLD_REPORTING_MEMBER: if (report_timer_expired) { inm->in6m_state = MLD_IDLE_MEMBER; SLIST_INSERT_HEAD(inmh, inm, in6m_defer); } break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: case MLD_LEAVING_MEMBER: break; } } /* * Update a group's timers for MLDv2. * Will update the global pending timer flags. * Note: Unlocked read from mli. */ static void mld_v2_process_group_timers(struct in6_multi_head *inmh, struct mbufq *qrq, struct mbufq *scq, struct in6_multi *inm, const int uri_fasthz) { int query_response_timer_expired; int state_change_retransmit_timer_expired; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); query_response_timer_expired = 0; state_change_retransmit_timer_expired = 0; /* * During a transition from compatibility mode back to MLDv2, * a group record in REPORTING state may still have its group * timer active. This is a no-op in this function; it is easier * to deal with it here than to complicate the slow-timeout path. */ if (inm->in6m_timer == 0) { query_response_timer_expired = 0; } else if (--inm->in6m_timer == 0) { query_response_timer_expired = 1; } else { V_current_state_timers_running6 = 1; } if (inm->in6m_sctimer == 0) { state_change_retransmit_timer_expired = 0; } else if (--inm->in6m_sctimer == 0) { state_change_retransmit_timer_expired = 1; } else { V_state_change_timers_running6 = 1; } /* We are in fasttimo, so be quick about it. */ if (!state_change_retransmit_timer_expired && !query_response_timer_expired) return; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_LAZY_MEMBER: case MLD_AWAKENING_MEMBER: case MLD_IDLE_MEMBER: break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: /* * Respond to a previously pending Group-Specific * or Group-and-Source-Specific query by enqueueing * the appropriate Current-State report for * immediate transmission. */ if (query_response_timer_expired) { int retval __unused; retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1, (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER), 0); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); inm->in6m_state = MLD_REPORTING_MEMBER; in6m_clear_recorded(inm); } /* FALLTHROUGH */ case MLD_REPORTING_MEMBER: case MLD_LEAVING_MEMBER: if (state_change_retransmit_timer_expired) { /* * State-change retransmission timer fired. * If there are any further pending retransmissions, * set the global pending state-change flag, and * reset the timer. */ if (--inm->in6m_scrv > 0) { inm->in6m_sctimer = uri_fasthz; V_state_change_timers_running6 = 1; } /* * Retransmit the previously computed state-change * report. If there are no further pending * retransmissions, the mbuf queue will be consumed. * Update T0 state to T1 as we have now sent * a state-change. */ (void)mld_v2_merge_state_changes(inm, scq); in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); /* * If we are leaving the group for good, make sure * we release MLD's reference to it. * This release must be deferred using a SLIST, * as we are called from a loop which traverses * the in_ifmultiaddr TAILQ. */ if (inm->in6m_state == MLD_LEAVING_MEMBER && inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; in6m_disconnect_locked(inmh, inm); in6m_rele_locked(inmh, inm); } } break; } } /* * Switch to a different version on the given interface, * as per Section 9.12. */ static void mld_set_version(struct mld_ifsoftc *mli, const int version) { int old_version_timer; MLD_LOCK_ASSERT(); CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__, version, mli->mli_ifp, if_name(mli->mli_ifp)); if (version == MLD_VERSION_1) { /* * Compute the "Older Version Querier Present" timer as per * Section 9.12. */ old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; old_version_timer *= MLD_SLOWHZ; mli->mli_v1_timer = old_version_timer; } if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) { mli->mli_version = MLD_VERSION_1; mld_v2_cancel_link_timers(mli); } } /* * Cancel pending MLDv2 timers for the given link and all groups * joined on it; state-change, general-query, and group-query timers. */ static void mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) { struct epoch_tracker et; struct in6_multi_head inmh; struct ifmultiaddr *ifma; struct ifnet *ifp; struct in6_multi *inm; CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, mli->mli_ifp, if_name(mli->mli_ifp)); SLIST_INIT(&inmh); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); /* * Fast-track this potentially expensive operation * by checking all the global 'timer pending' flags. */ if (!V_interface_timers_running6 && !V_state_change_timers_running6 && !V_current_state_timers_running6) return; mli->mli_v2_timer = 0; ifp = mli->mli_ifp; IF_ADDR_WLOCK(ifp); NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: break; case MLD_LEAVING_MEMBER: /* * If we are leaving the group and switching * version, we need to release the final * reference held for issuing the INCLUDE {}. */ if (inm->in6m_refcount == 1) in6m_disconnect_locked(&inmh, inm); in6m_rele_locked(&inmh, inm); /* FALLTHROUGH */ case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: in6m_clear_recorded(inm); /* FALLTHROUGH */ case MLD_REPORTING_MEMBER: inm->in6m_sctimer = 0; inm->in6m_timer = 0; inm->in6m_state = MLD_REPORTING_MEMBER; /* * Free any pending MLDv2 state-change records. */ mbufq_drain(&inm->in6m_scq); break; } } NET_EPOCH_EXIT(et); IF_ADDR_WUNLOCK(ifp); in6m_release_list_deferred(&inmh); } /* * Global slowtimo handler. * VIMAGE: Timeout handlers are expected to service all vimages. */ static struct callout mldslow_callout; static void mld_slowtimo(void *arg __unused) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_slowtimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL); } /* * Per-vnet slowtimo handler. */ static void mld_slowtimo_vnet(void) { struct mld_ifsoftc *mli; MLD_LOCK(); LIST_FOREACH(mli, &V_mli_head, mli_link) { mld_v1_process_querier_timers(mli); } MLD_UNLOCK(); } /* * Update the Older Version Querier Present timers for a link. * See Section 9.12 of RFC 3810. */ static void mld_v1_process_querier_timers(struct mld_ifsoftc *mli) { MLD_LOCK_ASSERT(); if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { /* * MLDv1 Querier Present timer expired; revert to MLDv2. */ CTR5(KTR_MLD, "%s: transition from v%d -> v%d on %p(%s)", __func__, mli->mli_version, MLD_VERSION_2, mli->mli_ifp, if_name(mli->mli_ifp)); mli->mli_version = MLD_VERSION_2; } } /* * Transmit an MLDv1 report immediately. */ static int mld_v1_transmit_report(struct in6_multi *in6m, const int type) { struct ifnet *ifp; struct in6_ifaddr *ia; struct ip6_hdr *ip6; struct mbuf *mh, *md; struct mld_hdr *mld; NET_EPOCH_ASSERT(); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); ifp = in6m->in6m_ifp; /* in process of being freed */ if (ifp == NULL) return (0); ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); /* ia may be NULL if link-local address is tentative. */ mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); return (ENOMEM); } md = m_get(M_NOWAIT, MT_DATA); if (md == NULL) { m_free(mh); if (ia != NULL) ifa_free(&ia->ia_ifa); return (ENOMEM); } mh->m_next = md; /* * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so * that ether_output() does not need to allocate another mbuf * for the header in the most common case. */ M_ALIGN(mh, sizeof(struct ip6_hdr)); mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); mh->m_len = sizeof(struct ip6_hdr); ip6 = mtod(mh, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; ip6->ip6_dst = in6m->in6m_addr; md->m_len = sizeof(struct mld_hdr); mld = mtod(md, struct mld_hdr *); mld->mld_type = type; mld->mld_code = 0; mld->mld_cksum = 0; mld->mld_maxdelay = 0; mld->mld_reserved = 0; mld->mld_addr = in6m->in6m_addr; in6_clearscope(&mld->mld_addr); mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), sizeof(struct mld_hdr)); mld_save_context(mh, ifp); mh->m_flags |= M_MLDV1; mld_dispatch_packet(mh); if (ia != NULL) ifa_free(&ia->ia_ifa); return (0); } /* * Process a state change from the upper layer for the given IPv6 group. * * Each socket holds a reference on the in_multi in its own ip_moptions. * The socket layer will have made the necessary updates to.the group * state, it is now up to MLD to issue a state change report if there * has been any change between T0 (when the last state-change was issued) * and T1 (now). * * We use the MLDv2 state machine at group level. The MLd module * however makes the decision as to which MLD protocol version to speak. * A state change *from* INCLUDE {} always means an initial join. * A state change *to* INCLUDE {} always means a final leave. * * If delay is non-zero, and the state change is an initial multicast * join, the state change report will be delayed by 'delay' ticks * in units of MLD_FASTHZ if MLDv1 is active on the link; otherwise * the initial MLDv2 state change report will be delayed by whichever * is sooner, a pending state-change timer or delay itself. * * VIMAGE: curvnet should have been set by caller, as this routine * is called from the socket option handlers. */ int mld_change_state(struct in6_multi *inm, const int delay) { struct mld_ifsoftc *mli; struct ifnet *ifp; int error; IN6_MULTI_LIST_LOCK_ASSERT(); error = 0; /* * Check if the in6_multi has already been disconnected. */ if (inm->in6m_ifp == NULL) { CTR1(KTR_MLD, "%s: inm is disconnected", __func__); return (0); } /* * Try to detect if the upper layer just asked us to change state * for an interface which has now gone away. */ KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->in6m_ifma->ifma_ifp; if (ifp == NULL) return (0); /* * Sanity check that netinet6's notion of ifp is the * same as net's. */ KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); MLD_LOCK(); mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); /* * If we detect a state transition to or from MCAST_UNDEFINED * for this group, then we are starting or finishing an MLD * life cycle for this group. */ if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) { CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__, inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode); if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_MLD, "%s: initial join", __func__); error = mld_initial_join(inm, mli, delay); goto out_locked; } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_MLD, "%s: final leave", __func__); mld_final_leave(inm, mli); goto out_locked; } } else { CTR1(KTR_MLD, "%s: filter set change", __func__); } error = mld_handle_state_change(inm, mli); out_locked: MLD_UNLOCK(); return (error); } /* * Perform the initial join for an MLD group. * * When joining a group: * If the group should have its MLD traffic suppressed, do nothing. * MLDv1 starts sending MLDv1 host membership reports. * MLDv2 will schedule an MLDv2 state-change report containing the * initial state of the membership. * * If the delay argument is non-zero, then we must delay sending the * initial state change for delay ticks (in units of MLD_FASTHZ). */ static int mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, const int delay) { struct epoch_tracker et; struct ifnet *ifp; struct mbufq *mq; int error, retval, syncstates; int odelay; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, if_name(inm->in6m_ifp)); error = 0; syncstates = 1; ifp = inm->in6m_ifp; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); /* * Groups joined on loopback or marked as 'not reported', * enter the MLD_SILENT_MEMBER state and * are never reported in any protocol exchanges. * All other groups enter the appropriate state machine * for the version in use on this link. * A link marked as MLIF_SILENT causes MLD to be completely * disabled for the link. */ if ((ifp->if_flags & IFF_LOOPBACK) || (mli->mli_flags & MLIF_SILENT) || !mld_is_addr_reported(&inm->in6m_addr)) { CTR1(KTR_MLD, "%s: not kicking state machine for silent group", __func__); inm->in6m_state = MLD_SILENT_MEMBER; inm->in6m_timer = 0; } else { /* * Deal with overlapping in_multi lifecycle. * If this group was LEAVING, then make sure * we drop the reference we picked up to keep the * group around for the final INCLUDE {} enqueue. */ if (mli->mli_version == MLD_VERSION_2 && inm->in6m_state == MLD_LEAVING_MEMBER) { inm->in6m_refcount--; MPASS(inm->in6m_refcount > 0); } inm->in6m_state = MLD_REPORTING_MEMBER; switch (mli->mli_version) { case MLD_VERSION_1: /* * If a delay was provided, only use it if * it is greater than the delay normally * used for an MLDv1 state change report, * and delay sending the initial MLDv1 report * by not transitioning to the IDLE state. */ odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * MLD_FASTHZ); if (delay) { inm->in6m_timer = max(delay, odelay); V_current_state_timers_running6 = 1; } else { inm->in6m_state = MLD_IDLE_MEMBER; NET_EPOCH_ENTER(et); error = mld_v1_transmit_report(inm, MLD_LISTENER_REPORT); NET_EPOCH_EXIT(et); if (error == 0) { inm->in6m_timer = odelay; V_current_state_timers_running6 = 1; } } break; case MLD_VERSION_2: /* * Defer update of T0 to T1, until the first copy * of the state change has been transmitted. */ syncstates = 0; /* * Immediately enqueue a State-Change Report for * this interface, freeing any previous reports. * Don't kick the timers if there is nothing to do, * or if an error occurred. */ mq = &inm->in6m_scq; mbufq_drain(mq); retval = mld_v2_enqueue_group_record(mq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) { error = retval * -1; break; } /* * Schedule transmission of pending state-change * report up to RV times for this link. The timer * will fire at the next mld_fasttimo (~200ms), * giving us an opportunity to merge the reports. * * If a delay was provided to this function, only * use this delay if sooner than the existing one. */ KASSERT(mli->mli_rv > 1, ("%s: invalid robustness %d", __func__, mli->mli_rv)); inm->in6m_scrv = mli->mli_rv; if (delay) { if (inm->in6m_sctimer > 1) { inm->in6m_sctimer = min(inm->in6m_sctimer, delay); } else inm->in6m_sctimer = delay; } else inm->in6m_sctimer = 1; V_state_change_timers_running6 = 1; error = 0; break; } } /* * Only update the T0 state if state change is atomic, * i.e. we don't need to wait for a timer to fire before we * can consider the state change to have been communicated. */ if (syncstates) { in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); } return (error); } /* * Issue an intermediate state change during the life-cycle. */ static int mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli) { struct ifnet *ifp; int retval; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, if_name(inm->in6m_ifp)); ifp = inm->in6m_ifp; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); if ((ifp->if_flags & IFF_LOOPBACK) || (mli->mli_flags & MLIF_SILENT) || !mld_is_addr_reported(&inm->in6m_addr) || (mli->mli_version != MLD_VERSION_2)) { if (!mld_is_addr_reported(&inm->in6m_addr)) { CTR1(KTR_MLD, "%s: not kicking state machine for silent group", __func__); } CTR1(KTR_MLD, "%s: nothing to do", __func__); in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); return (0); } mbufq_drain(&inm->in6m_scq); retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) return (-retval); /* * If record(s) were enqueued, start the state-change * report timer for this group. */ inm->in6m_scrv = mli->mli_rv; inm->in6m_sctimer = 1; V_state_change_timers_running6 = 1; return (0); } /* * Perform the final leave for a multicast address. * * When leaving a group: * MLDv1 sends a DONE message, if and only if we are the reporter. * MLDv2 enqueues a state-change report containing a transition * to INCLUDE {} for immediate transmission. */ static void mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli) { struct epoch_tracker et; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, if_name(inm->in6m_ifp)); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_LEAVING_MEMBER: /* Already leaving or left; do nothing. */ CTR1(KTR_MLD, "%s: not kicking state machine for silent group", __func__); break; case MLD_REPORTING_MEMBER: case MLD_IDLE_MEMBER: case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: if (mli->mli_version == MLD_VERSION_1) { #ifdef INVARIANTS if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) panic("%s: MLDv2 state reached, not MLDv2 mode", __func__); #endif NET_EPOCH_ENTER(et); mld_v1_transmit_report(inm, MLD_LISTENER_DONE); NET_EPOCH_EXIT(et); inm->in6m_state = MLD_NOT_MEMBER; V_current_state_timers_running6 = 1; } else if (mli->mli_version == MLD_VERSION_2) { /* * Stop group timer and all pending reports. * Immediately enqueue a state-change report * TO_IN {} to be sent on the next fast timeout, * giving us an opportunity to merge reports. */ mbufq_drain(&inm->in6m_scq); inm->in6m_timer = 0; inm->in6m_scrv = mli->mli_rv; CTR4(KTR_MLD, "%s: Leaving %s/%s with %d " "pending retransmissions.", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp), inm->in6m_scrv); if (inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; inm->in6m_sctimer = 0; } else { int retval __diagused; in6m_acquire_locked(inm); retval = mld_v2_enqueue_group_record( &inm->in6m_scq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); KASSERT(retval != 0, ("%s: enqueue record = %d", __func__, retval)); inm->in6m_state = MLD_LEAVING_MEMBER; inm->in6m_sctimer = 1; V_state_change_timers_running6 = 1; } break; } break; case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: /* Our reports are suppressed; do nothing. */ break; } in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s", __func__, &inm->in6m_addr, if_name(inm->in6m_ifp)); } /* * Enqueue an MLDv2 group record to the given output queue. * * If is_state_change is zero, a current-state record is appended. * If is_state_change is non-zero, a state-change report is appended. * * If is_group_query is non-zero, an mbuf packet chain is allocated. * If is_group_query is zero, and if there is a packet with free space * at the tail of the queue, it will be appended to providing there * is enough free space. * Otherwise a new mbuf packet chain is allocated. * * If is_source_query is non-zero, each source is checked to see if * it was recorded for a Group-Source query, and will be omitted if * it is not both in-mode and recorded. * * If use_block_allow is non-zero, state change reports for initial join * and final leave, on an inclusive mode group with a source list, will be * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively. * * The function will attempt to allocate leading space in the packet * for the IPv6+ICMP headers to be prepended without fragmenting the chain. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm, const int is_state_change, const int is_group_query, const int is_source_query, const int use_block_allow) { struct mldv2_record mr; struct mldv2_record *pmr; struct ifnet *ifp; struct ip6_msource *ims, *nims; struct mbuf *m0, *m, *md; int is_filter_list_change; int minrec0len, m0srcs, msrcs, nbytes, off; int record_has_sources; int now; int type; uint8_t mode; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif IN6_MULTI_LIST_LOCK_ASSERT(); ifp = inm->in6m_ifp; is_filter_list_change = 0; m = NULL; m0 = NULL; m0srcs = 0; msrcs = 0; nbytes = 0; nims = NULL; record_has_sources = 1; pmr = NULL; type = MLD_DO_NOTHING; mode = inm->in6m_st[1].iss_fmode; /* * If we did not transition out of ASM mode during t0->t1, * and there are no source nodes to process, we can skip * the generation of source records. */ if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 && inm->in6m_nsrc == 0) record_has_sources = 0; if (is_state_change) { /* * Queue a state change record. * If the mode did not change, and there are non-ASM * listeners or source filters present, * we potentially need to issue two records for the group. * If there are ASM listeners, and there was no filter * mode transition of any kind, do nothing. * * If we are transitioning to MCAST_UNDEFINED, we need * not send any sources. A transition to/from this state is * considered inclusive with some special treatment. * * If we are rewriting initial joins/leaves to use * ALLOW/BLOCK, and the group's membership is inclusive, * we need to send sources in all cases. */ if (mode != inm->in6m_st[0].iss_fmode) { if (mode == MCAST_EXCLUDE) { CTR1(KTR_MLD, "%s: change to EXCLUDE", __func__); type = MLD_CHANGE_TO_EXCLUDE_MODE; } else { CTR1(KTR_MLD, "%s: change to INCLUDE", __func__); if (use_block_allow) { /* * XXX * Here we're interested in state * edges either direction between * MCAST_UNDEFINED and MCAST_INCLUDE. * Perhaps we should just check * the group state, rather than * the filter mode. */ if (mode == MCAST_UNDEFINED) { type = MLD_BLOCK_OLD_SOURCES; } else { type = MLD_ALLOW_NEW_SOURCES; } } else { type = MLD_CHANGE_TO_INCLUDE_MODE; if (mode == MCAST_UNDEFINED) record_has_sources = 0; } } } else { if (record_has_sources) { is_filter_list_change = 1; } else { type = MLD_DO_NOTHING; } } } else { /* * Queue a current state record. */ if (mode == MCAST_EXCLUDE) { type = MLD_MODE_IS_EXCLUDE; } else if (mode == MCAST_INCLUDE) { type = MLD_MODE_IS_INCLUDE; KASSERT(inm->in6m_st[1].iss_asm == 0, ("%s: inm %p is INCLUDE but ASM count is %d", __func__, inm, inm->in6m_st[1].iss_asm)); } } /* * Generate the filter list changes using a separate function. */ if (is_filter_list_change) return (mld_v2_enqueue_filter_change(mq, inm)); if (type == MLD_DO_NOTHING) { CTR3(KTR_MLD, "%s: nothing to do for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); return (0); } /* * If any sources are present, we must be able to fit at least * one in the trailing space of the tail packet's mbuf, * ideally more. */ minrec0len = sizeof(struct mldv2_record); if (record_has_sources) minrec0len += sizeof(struct in6_addr); CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__, mld_rec_type_to_str(type), ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); /* * Check if we have a packet in the tail of the queue for this * group into which the first group record for this group will fit. * Otherwise allocate a new packet. * Always allocate leading space for IP6+RA+ICMPV6+REPORT. * Note: Group records for G/GSR query responses MUST be sent * in their own packet. */ m0 = mbufq_last(mq); if (!is_group_query && m0 != NULL && (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && (m0->m_pkthdr.len + minrec0len) < (ifp->if_mtu - MLD_MTUSPACE)) { m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); m = m0; CTR1(KTR_MLD, "%s: use existing packet", __func__); } else { if (mbufq_full(mq)) { CTR1(KTR_MLD, "%s: outbound queue full", __func__); return (-ENOMEM); } m = NULL; m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); if (!is_state_change && !is_group_query) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (-ENOMEM); mld_save_context(m, ifp); CTR1(KTR_MLD, "%s: allocated first packet", __func__); } /* * Append group record. * If we have sources, we don't know how many yet. */ mr.mr_type = type; mr.mr_datalen = 0; mr.mr_numsrc = 0; mr.mr_addr = inm->in6m_addr; in6_clearscope(&mr.mr_addr); if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(struct mldv2_record); /* * Append as many sources as will fit in the first packet. * If we are appending to a new packet, the chain allocation * may potentially use clusters; use m_getptr() in this case. * If we are appending to an existing packet, we need to obtain * a pointer to the group record after m_append(), in case a new * mbuf was allocated. * * Only append sources which are in-mode at t1. If we are * transitioning to MCAST_UNDEFINED state on the group, and * use_block_allow is zero, do not include source entries. * Otherwise, we need to include this source in the report. * * Only report recorded sources in our filter set when responding * to a group-source query. */ if (record_has_sources) { if (m == m0) { md = m_last(m); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + md->m_len - nbytes); } else { md = m_getptr(m, 0, &off); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); } msrcs = 0; RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, nims) { CTR2(KTR_MLD, "%s: visit node %s", __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); now = im6s_get_mode(inm, ims, 1); CTR2(KTR_MLD, "%s: node is %d", __func__, now); if ((now != mode) || (now == mode && (!use_block_allow && mode == MCAST_UNDEFINED))) { CTR1(KTR_MLD, "%s: skip node", __func__); continue; } if (is_source_query && ims->im6s_stp == 0) { CTR1(KTR_MLD, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_MLD, "%s: append node", __func__); if (!m_append(m, sizeof(struct in6_addr), (void *)&ims->im6s_addr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(struct in6_addr); ++msrcs; if (msrcs == m0srcs) break; } CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__, msrcs); pmr->mr_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(struct in6_addr)); } if (is_source_query && msrcs == 0) { CTR1(KTR_MLD, "%s: no recorded sources to report", __func__); if (m != m0) m_freem(m); return (0); } /* * We are good to go with first packet. */ if (m != m0) { CTR1(KTR_MLD, "%s: enqueueing first packet", __func__); m->m_pkthdr.vt_nrecs = 1; mbufq_enqueue(mq, m); } else m->m_pkthdr.vt_nrecs++; /* * No further work needed if no source list in packet(s). */ if (!record_has_sources) return (nbytes); /* * Whilst sources remain to be announced, we need to allocate * a new packet and fill out as many sources as will fit. * Always try for a cluster first. */ while (nims != NULL) { if (mbufq_full(mq)) { CTR1(KTR_MLD, "%s: outbound queue full", __func__); return (-ENOMEM); } m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (-ENOMEM); mld_save_context(m, ifp); md = m_getptr(m, 0, &off); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); CTR1(KTR_MLD, "%s: allocated next packet", __func__); if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } m->m_pkthdr.vt_nrecs = 1; nbytes += sizeof(struct mldv2_record); m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); msrcs = 0; RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { CTR2(KTR_MLD, "%s: visit node %s", __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); now = im6s_get_mode(inm, ims, 1); if ((now != mode) || (now == mode && (!use_block_allow && mode == MCAST_UNDEFINED))) { CTR1(KTR_MLD, "%s: skip node", __func__); continue; } if (is_source_query && ims->im6s_stp == 0) { CTR1(KTR_MLD, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_MLD, "%s: append node", __func__); if (!m_append(m, sizeof(struct in6_addr), (void *)&ims->im6s_addr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } ++msrcs; if (msrcs == m0srcs) break; } pmr->mr_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(struct in6_addr)); CTR1(KTR_MLD, "%s: enqueueing next packet", __func__); mbufq_enqueue(mq, m); } return (nbytes); } /* * Type used to mark record pass completion. * We exploit the fact we can cast to this easily from the * current filter modes on each ip_msource node. */ typedef enum { REC_NONE = 0x00, /* MCAST_UNDEFINED */ REC_ALLOW = 0x01, /* MCAST_INCLUDE */ REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ REC_FULL = REC_ALLOW | REC_BLOCK } rectype_t; /* * Enqueue an MLDv2 filter list change to the given output queue. * * Source list filter state is held in an RB-tree. When the filter list * for a group is changed without changing its mode, we need to compute * the deltas between T0 and T1 for each source in the filter set, * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. * * As we may potentially queue two record types, and the entire R-B tree * needs to be walked at once, we break this out into its own function * so we can generate a tightly packed queue of packets. * * XXX This could be written to only use one tree walk, although that makes * serializing into the mbuf chains a bit harder. For now we do two walks * which makes things easier on us, and it may or may not be harder on * the L2 cache. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm) { static const int MINRECLEN = sizeof(struct mldv2_record) + sizeof(struct in6_addr); struct ifnet *ifp; struct mldv2_record mr; struct mldv2_record *pmr; struct ip6_msource *ims, *nims; struct mbuf *m, *m0, *md; int m0srcs, nbytes, npbytes, off, rsrcs, schanged; uint8_t mode, now, then; rectype_t crt, drt, nrt; #ifdef KTR int nallow, nblock; char ip6tbuf[INET6_ADDRSTRLEN]; #endif IN6_MULTI_LIST_LOCK_ASSERT(); if (inm->in6m_nsrc == 0 || (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) return (0); ifp = inm->in6m_ifp; /* interface */ mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */ crt = REC_NONE; /* current group record type */ drt = REC_NONE; /* mask of completed group record types */ nrt = REC_NONE; /* record type for current node */ m0srcs = 0; /* # source which will fit in current mbuf chain */ npbytes = 0; /* # of bytes appended this packet */ nbytes = 0; /* # of bytes appended to group's state-change queue */ rsrcs = 0; /* # sources encoded in current record */ schanged = 0; /* # nodes encoded in overall filter change */ #ifdef KTR nallow = 0; /* # of source entries in ALLOW_NEW */ nblock = 0; /* # of source entries in BLOCK_OLD */ #endif nims = NULL; /* next tree node pointer */ /* * For each possible filter record mode. * The first kind of source we encounter tells us which * is the first kind of record we start appending. * If a node transitioned to UNDEFINED at t1, its mode is treated * as the inverse of the group's filter mode. */ while (drt != REC_FULL) { do { m0 = mbufq_last(mq); if (m0 != NULL && (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && (m0->m_pkthdr.len + MINRECLEN) < (ifp->if_mtu - MLD_MTUSPACE)) { m = m0; m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); CTR1(KTR_MLD, "%s: use previous packet", __func__); } else { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CTR1(KTR_MLD, "%s: m_get*() failed", __func__); return (-ENOMEM); } m->m_pkthdr.vt_nrecs = 0; mld_save_context(m, ifp); m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); npbytes = 0; CTR1(KTR_MLD, "%s: allocated new packet", __func__); } /* * Append the MLD group record header to the * current packet's data area. * Recalculate pointer to free space for next * group record, in case m_append() allocated * a new mbuf or cluster. */ memset(&mr, 0, sizeof(mr)); mr.mr_addr = inm->in6m_addr; in6_clearscope(&mr.mr_addr); if (!m_append(m, sizeof(mr), (void *)&mr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed", __func__); return (-ENOMEM); } npbytes += sizeof(struct mldv2_record); if (m != m0) { /* new packet; offset in chain */ md = m_getptr(m, npbytes - sizeof(struct mldv2_record), &off); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); } else { /* current packet; offset from last append */ md = m_last(m); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + md->m_len - sizeof(struct mldv2_record)); } /* * Begin walking the tree for this record type * pass, or continue from where we left off * previously if we had to allocate a new packet. * Only report deltas in-mode at t1. * We need not report included sources as allowed * if we are in inclusive mode on the group, * however the converse is not true. */ rsrcs = 0; if (nims == NULL) { nims = RB_MIN(ip6_msource_tree, &inm->in6m_srcs); } RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { CTR2(KTR_MLD, "%s: visit node %s", __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); now = im6s_get_mode(inm, ims, 1); then = im6s_get_mode(inm, ims, 0); CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d", __func__, then, now); if (now == then) { CTR1(KTR_MLD, "%s: skip unchanged", __func__); continue; } if (mode == MCAST_EXCLUDE && now == MCAST_INCLUDE) { CTR1(KTR_MLD, "%s: skip IN src on EX group", __func__); continue; } nrt = (rectype_t)now; if (nrt == REC_NONE) nrt = (rectype_t)(~mode & REC_FULL); if (schanged++ == 0) { crt = nrt; } else if (crt != nrt) continue; if (!m_append(m, sizeof(struct in6_addr), (void *)&ims->im6s_addr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed", __func__); return (-ENOMEM); } #ifdef KTR nallow += !!(crt == REC_ALLOW); nblock += !!(crt == REC_BLOCK); #endif if (++rsrcs == m0srcs) break; } /* * If we did not append any tree nodes on this * pass, back out of allocations. */ if (rsrcs == 0) { npbytes -= sizeof(struct mldv2_record); if (m != m0) { CTR1(KTR_MLD, "%s: m_free(m)", __func__); m_freem(m); } else { CTR1(KTR_MLD, "%s: m_adj(m, -mr)", __func__); m_adj(m, -((int)sizeof( struct mldv2_record))); } continue; } npbytes += (rsrcs * sizeof(struct in6_addr)); if (crt == REC_ALLOW) pmr->mr_type = MLD_ALLOW_NEW_SOURCES; else if (crt == REC_BLOCK) pmr->mr_type = MLD_BLOCK_OLD_SOURCES; pmr->mr_numsrc = htons(rsrcs); /* * Count the new group record, and enqueue this * packet if it wasn't already queued. */ m->m_pkthdr.vt_nrecs++; if (m != m0) mbufq_enqueue(mq, m); nbytes += npbytes; } while (nims != NULL); drt |= crt; crt = (~crt & REC_FULL); } CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, nallow, nblock); return (nbytes); } static int mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq) { struct mbufq *gq; struct mbuf *m; /* pending state-change */ struct mbuf *m0; /* copy of pending state-change */ struct mbuf *mt; /* last state-change in packet */ int docopy, domerge; u_int recslen; docopy = 0; domerge = 0; recslen = 0; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); /* * If there are further pending retransmissions, make a writable * copy of each queued state-change message before merging. */ if (inm->in6m_scrv > 0) docopy = 1; gq = &inm->in6m_scq; #ifdef KTR if (mbufq_first(gq) == NULL) { CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty", __func__, inm); } #endif m = mbufq_first(gq); while (m != NULL) { /* * Only merge the report into the current packet if * there is sufficient space to do so; an MLDv2 report * packet may only contain 65,535 group records. * Always use a simple mbuf chain concatentation to do this, * as large state changes for single groups may have * allocated clusters. */ domerge = 0; mt = mbufq_last(scq); if (mt != NULL) { recslen = m_length(m, NULL); if ((mt->m_pkthdr.vt_nrecs + m->m_pkthdr.vt_nrecs <= MLD_V2_REPORT_MAXRECS) && (mt->m_pkthdr.len + recslen <= (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) domerge = 1; } if (!domerge && mbufq_full(gq)) { CTR2(KTR_MLD, "%s: outbound queue full, skipping whole packet %p", __func__, m); mt = m->m_nextpkt; if (!docopy) m_freem(m); m = mt; continue; } if (!docopy) { CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m); m0 = mbufq_dequeue(gq); m = m0->m_nextpkt; } else { CTR2(KTR_MLD, "%s: copying %p", __func__, m); m0 = m_dup(m, M_NOWAIT); if (m0 == NULL) return (ENOMEM); m0->m_nextpkt = NULL; m = m->m_nextpkt; } if (!domerge) { CTR3(KTR_MLD, "%s: queueing %p to scq %p)", __func__, m0, scq); mbufq_enqueue(scq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)", __func__, m0, mt); mtl = m_last(mt); m0->m_flags &= ~M_PKTHDR; mt->m_pkthdr.len += recslen; mt->m_pkthdr.vt_nrecs += m0->m_pkthdr.vt_nrecs; mtl->m_next = m0; } } return (0); } /* * Respond to a pending MLDv2 General Query. */ static void mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in6_multi *inm; int retval __unused; NET_EPOCH_ASSERT(); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); KASSERT(mli->mli_version == MLD_VERSION_2, ("%s: called when version %d", __func__, mli->mli_version)); /* * Check that there are some packets queued. If so, send them first. * For large number of groups the reply to general query can take * many packets, we should finish sending them before starting of * queuing the new reply. */ - if (mbufq_len(&mli->mli_gq) != 0) + if (!mbufq_empty(&mli->mli_gq)) goto send; ifp = mli->mli_ifp; CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; KASSERT(ifp == inm->in6m_ifp, ("%s: inconsistent ifp", __func__)); switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: break; case MLD_REPORTING_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: inm->in6m_state = MLD_REPORTING_MEMBER; retval = mld_v2_enqueue_group_record(&mli->mli_gq, inm, 0, 0, 0, 0); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: case MLD_LEAVING_MEMBER: break; } } send: mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); /* * Slew transmission of bursts over 500ms intervals. */ if (mbufq_first(&mli->mli_gq) != NULL) { mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( MLD_RESPONSE_BURST_INTERVAL); V_interface_timers_running6 = 1; } } /* * Transmit the next pending message in the output queue. * * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. * MRT: Nothing needs to be done, as MLD traffic is always local to * a link and uses a link-scope multicast address. */ static void mld_dispatch_packet(struct mbuf *m) { struct ip6_moptions im6o; struct ifnet *ifp; struct ifnet *oifp; struct mbuf *m0; struct mbuf *md; struct ip6_hdr *ip6; struct mld_hdr *mld; int error; int off; int type; uint32_t ifindex; CTR2(KTR_MLD, "%s: transmit %p", __func__, m); NET_EPOCH_ASSERT(); /* * Set VNET image pointer from enqueued mbuf chain * before doing anything else. Whilst we use interface * indexes to guard against interface detach, they are * unique to each VIMAGE and must be retrieved. */ ifindex = mld_restore_context(m); /* * Check if the ifnet still exists. This limits the scope of * any race in the absence of a global ifp lock for low cost * (an array lookup). */ ifp = ifnet_byindex(ifindex); if (ifp == NULL) { CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.", __func__, m, ifindex); m_freem(m); IP6STAT_INC(ip6s_noroute); goto out; } im6o.im6o_multicast_hlim = 1; im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL); im6o.im6o_multicast_ifp = ifp; if (m->m_flags & M_MLDV1) { m0 = m; } else { m0 = mld_v2_encap_report(ifp, m); if (m0 == NULL) { CTR2(KTR_MLD, "%s: dropped %p", __func__, m); IP6STAT_INC(ip6s_odropped); goto out; } } mld_scrub_context(m0); m_clrprotoflags(m); m0->m_pkthdr.rcvif = V_loif; ip6 = mtod(m0, struct ip6_hdr *); #if 0 (void)in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */ #else /* * XXX XXX Break some KPI rules to prevent an LOR which would * occur if we called in6_setscope() at transmission. * See comments at top of file. */ MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index); #endif /* * Retrieve the ICMPv6 type before handoff to ip6_output(), * so we can bump the stats. */ md = m_getptr(m0, sizeof(struct ip6_hdr), &off); mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off); type = mld->mld_type; oifp = NULL; error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, &oifp, NULL); if (error) { CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error); goto out; } ICMP6STAT_INC(icp6s_outhist[type]); if (oifp != NULL) { icmp6_ifstat_inc(oifp, ifs6_out_msg); switch (type) { case MLD_LISTENER_REPORT: case MLDV2_LISTENER_REPORT: icmp6_ifstat_inc(oifp, ifs6_out_mldreport); break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(oifp, ifs6_out_mlddone); break; } } out: return; } /* * Encapsulate an MLDv2 report. * * KAME IPv6 requires that hop-by-hop options be passed separately, * and that the IPv6 header be prepended in a separate mbuf. * * Returns a pointer to the new mbuf chain head, or NULL if the * allocation failed. */ static struct mbuf * mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) { struct mbuf *mh; struct mldv2_report *mld; struct ip6_hdr *ip6; struct in6_ifaddr *ia; int mldreclen; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); KASSERT((m->m_flags & M_PKTHDR), ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); /* * RFC3590: OK to send as :: or tentative during DAD. */ NET_EPOCH_ASSERT(); ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); if (ia == NULL) CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__); mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); m_freem(m); return (NULL); } M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); mldreclen = m_length(m, NULL); CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen); mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report); mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report) + mldreclen; ip6 = mtod(mh, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; if (ia != NULL) ifa_free(&ia->ia_ifa); ip6->ip6_dst = in6addr_linklocal_allv2routers; /* scope ID will be set in netisr */ mld = (struct mldv2_report *)(ip6 + 1); mld->mld_type = MLDV2_LISTENER_REPORT; mld->mld_code = 0; mld->mld_cksum = 0; mld->mld_v2_reserved = 0; mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs); m->m_pkthdr.vt_nrecs = 0; mh->m_next = m; mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen); return (mh); } #ifdef KTR static char * mld_rec_type_to_str(const int type) { switch (type) { case MLD_CHANGE_TO_EXCLUDE_MODE: return "TO_EX"; break; case MLD_CHANGE_TO_INCLUDE_MODE: return "TO_IN"; break; case MLD_MODE_IS_EXCLUDE: return "MODE_EX"; break; case MLD_MODE_IS_INCLUDE: return "MODE_IN"; break; case MLD_ALLOW_NEW_SOURCES: return "ALLOW_NEW"; break; case MLD_BLOCK_OLD_SOURCES: return "BLOCK_OLD"; break; default: break; } return "unknown"; } #endif static void mld_init(void *unused __unused) { CTR1(KTR_MLD, "%s: initializing", __func__); MLD_LOCK_INIT(); ip6_initpktopts(&mld_po); mld_po.ip6po_hlim = 1; mld_po.ip6po_hbh = &mld_ra.hbh; mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; mld_po.ip6po_flags = IP6PO_DONTFRAG; callout_init(&mldslow_callout, 1); callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL); callout_init(&mldfast_callout, 1); callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL); } SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL); static void mld_uninit(void *unused __unused) { CTR1(KTR_MLD, "%s: tearing down", __func__); callout_drain(&mldslow_callout); callout_drain(&mldfast_callout); MLD_LOCK_DESTROY(); } SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL); static void vnet_mld_init(const void *unused __unused) { CTR1(KTR_MLD, "%s: initializing", __func__); LIST_INIT(&V_mli_head); } VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init, NULL); static void vnet_mld_uninit(const void *unused __unused) { /* This can happen if we shutdown the network stack. */ CTR1(KTR_MLD, "%s: tearing down", __func__); } VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit, NULL); static int mld_modevent(module_t mod, int type, void *unused __unused) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t mld_mod = { "mld", mld_modevent, 0 }; DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY);