Index: head/sys/dev/altera/atse/if_atse.c =================================================================== --- head/sys/dev/altera/atse/if_atse.c (revision 357009) +++ head/sys/dev/altera/atse/if_atse.c (revision 357010) @@ -1,1601 +1,1602 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012, 2013 Bjoern A. Zeeb * Copyright (c) 2014 Robert N. M. Watson * Copyright (c) 2016-2017 Ruslan Bukin * All rights reserved. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-11-C-0249) * ("MRC2"), as part of the DARPA MRC research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Altera Triple-Speed Ethernet MegaCore, Function User Guide * UG-01008-3.0, Software Version: 12.0, June 2012. * Available at the time of writing at: * http://www.altera.com/literature/ug/ug_ethernet.pdf * * We are using an Marvell E1111 (Alaska) PHY on the DE4. See mii/e1000phy.c. */ /* * XXX-BZ NOTES: * - ifOutBroadcastPkts are only counted if both ether dst and src are all-1s; * seems an IP core bug, they count ether broadcasts as multicast. Is this * still the case? * - figure out why the TX FIFO fill status and intr did not work as expected. * - test 100Mbit/s and 10Mbit/s * - blacklist the one special factory programmed ethernet address (for now * hardcoded, later from loader?) * - resolve all XXX, left as reminders to shake out details later * - Jumbo frame support */ #include __FBSDID("$FreeBSD$"); #include "opt_device_polling.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define RX_QUEUE_SIZE 4096 #define TX_QUEUE_SIZE 4096 #define NUM_RX_MBUF 512 #define BUFRING_SIZE 8192 #include /* XXX once we'd do parallel attach, we need a global lock for this. */ #define ATSE_ETHERNET_OPTION_BITS_UNDEF 0 #define ATSE_ETHERNET_OPTION_BITS_READ 1 static int atse_ethernet_option_bits_flag = ATSE_ETHERNET_OPTION_BITS_UNDEF; static uint8_t atse_ethernet_option_bits[ALTERA_ETHERNET_OPTION_BITS_LEN]; /* * Softc and critical resource locking. */ #define ATSE_LOCK(_sc) mtx_lock(&(_sc)->atse_mtx) #define ATSE_UNLOCK(_sc) mtx_unlock(&(_sc)->atse_mtx) #define ATSE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->atse_mtx, MA_OWNED) #define ATSE_DEBUG #undef ATSE_DEBUG #ifdef ATSE_DEBUG #define DPRINTF(format, ...) printf(format, __VA_ARGS__) #else #define DPRINTF(format, ...) #endif /* * Register space access macros. */ static inline void csr_write_4(struct atse_softc *sc, uint32_t reg, uint32_t val4, const char *f, const int l) { val4 = htole32(val4); DPRINTF("[%s:%d] CSR W %s 0x%08x (0x%08x) = 0x%08x\n", f, l, "atse_mem_res", reg, reg * 4, val4); bus_write_4(sc->atse_mem_res, reg * 4, val4); } static inline uint32_t csr_read_4(struct atse_softc *sc, uint32_t reg, const char *f, const int l) { uint32_t val4; val4 = le32toh(bus_read_4(sc->atse_mem_res, reg * 4)); DPRINTF("[%s:%d] CSR R %s 0x%08x (0x%08x) = 0x%08x\n", f, l, "atse_mem_res", reg, reg * 4, val4); return (val4); } /* * See page 5-2 that it's all dword offsets and the MS 16 bits need to be zero * on write and ignored on read. */ static inline void pxx_write_2(struct atse_softc *sc, bus_addr_t bmcr, uint32_t reg, uint16_t val, const char *f, const int l, const char *s) { uint32_t val4; val4 = htole32(val & 0x0000ffff); DPRINTF("[%s:%d] %s W %s 0x%08x (0x%08jx) = 0x%08x\n", f, l, s, "atse_mem_res", reg, (bmcr + reg) * 4, val4); bus_write_4(sc->atse_mem_res, (bmcr + reg) * 4, val4); } static inline uint16_t pxx_read_2(struct atse_softc *sc, bus_addr_t bmcr, uint32_t reg, const char *f, const int l, const char *s) { uint32_t val4; uint16_t val; val4 = bus_read_4(sc->atse_mem_res, (bmcr + reg) * 4); val = le32toh(val4) & 0x0000ffff; DPRINTF("[%s:%d] %s R %s 0x%08x (0x%08jx) = 0x%04x\n", f, l, s, "atse_mem_res", reg, (bmcr + reg) * 4, val); return (val); } #define CSR_WRITE_4(sc, reg, val) \ csr_write_4((sc), (reg), (val), __func__, __LINE__) #define CSR_READ_4(sc, reg) \ csr_read_4((sc), (reg), __func__, __LINE__) #define PCS_WRITE_2(sc, reg, val) \ pxx_write_2((sc), sc->atse_bmcr0, (reg), (val), __func__, __LINE__, \ "PCS") #define PCS_READ_2(sc, reg) \ pxx_read_2((sc), sc->atse_bmcr0, (reg), __func__, __LINE__, "PCS") #define PHY_WRITE_2(sc, reg, val) \ pxx_write_2((sc), sc->atse_bmcr1, (reg), (val), __func__, __LINE__, \ "PHY") #define PHY_READ_2(sc, reg) \ pxx_read_2((sc), sc->atse_bmcr1, (reg), __func__, __LINE__, "PHY") static void atse_tick(void *); static int atse_detach(device_t); devclass_t atse_devclass; static int atse_rx_enqueue(struct atse_softc *sc, uint32_t n) { struct mbuf *m; int i; for (i = 0; i < n; i++) { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { device_printf(sc->dev, "%s: Can't alloc rx mbuf\n", __func__); return (-1); } m->m_pkthdr.len = m->m_len = m->m_ext.ext_size; xdma_enqueue_mbuf(sc->xchan_rx, &m, 0, 4, 4, XDMA_DEV_TO_MEM); } return (0); } static int atse_xdma_tx_intr(void *arg, xdma_transfer_status_t *status) { xdma_transfer_status_t st; struct atse_softc *sc; struct ifnet *ifp; struct mbuf *m; int err; sc = arg; ATSE_LOCK(sc); ifp = sc->atse_ifp; for (;;) { err = xdma_dequeue_mbuf(sc->xchan_tx, &m, &st); if (err != 0) { break; } if (st.error != 0) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } m_freem(m); sc->txcount--; } ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ATSE_UNLOCK(sc); return (0); } static int atse_xdma_rx_intr(void *arg, xdma_transfer_status_t *status) { xdma_transfer_status_t st; struct atse_softc *sc; struct ifnet *ifp; struct mbuf *m; int err; uint32_t cnt_processed; sc = arg; ATSE_LOCK(sc); ifp = sc->atse_ifp; cnt_processed = 0; for (;;) { err = xdma_dequeue_mbuf(sc->xchan_rx, &m, &st); if (err != 0) { break; } cnt_processed++; if (st.error != 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); continue; } m->m_pkthdr.len = m->m_len = st.transferred; m->m_pkthdr.rcvif = ifp; m_adj(m, ETHER_ALIGN); ATSE_UNLOCK(sc); (*ifp->if_input)(ifp, m); ATSE_LOCK(sc); } atse_rx_enqueue(sc, cnt_processed); ATSE_UNLOCK(sc); return (0); } static int atse_transmit_locked(struct ifnet *ifp) { struct atse_softc *sc; struct mbuf *m; struct buf_ring *br; int error; int enq; sc = ifp->if_softc; br = sc->br; enq = 0; while ((m = drbr_peek(ifp, br)) != NULL) { error = xdma_enqueue_mbuf(sc->xchan_tx, &m, 0, 4, 4, XDMA_MEM_TO_DEV); if (error != 0) { /* No space in request queue available yet. */ drbr_putback(ifp, br, m); break; } drbr_advance(ifp, br); sc->txcount++; enq++; /* If anyone is interested give them a copy. */ ETHER_BPF_MTAP(ifp, m); } if (enq > 0) xdma_queue_submit(sc->xchan_tx); return (0); } static int atse_transmit(struct ifnet *ifp, struct mbuf *m) { struct atse_softc *sc; struct buf_ring *br; int error; sc = ifp->if_softc; br = sc->br; ATSE_LOCK(sc); mtx_lock(&sc->br_mtx); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { error = drbr_enqueue(ifp, sc->br, m); mtx_unlock(&sc->br_mtx); ATSE_UNLOCK(sc); return (error); } if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) { error = drbr_enqueue(ifp, sc->br, m); mtx_unlock(&sc->br_mtx); ATSE_UNLOCK(sc); return (error); } error = drbr_enqueue(ifp, br, m); if (error) { mtx_unlock(&sc->br_mtx); ATSE_UNLOCK(sc); return (error); } error = atse_transmit_locked(ifp); mtx_unlock(&sc->br_mtx); ATSE_UNLOCK(sc); return (error); } static void atse_qflush(struct ifnet *ifp) { struct atse_softc *sc; sc = ifp->if_softc; printf("%s\n", __func__); } static int atse_stop_locked(struct atse_softc *sc) { uint32_t mask, val4; struct ifnet *ifp; int i; ATSE_LOCK_ASSERT(sc); callout_stop(&sc->atse_tick); ifp = sc->atse_ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* Disable MAC transmit and receive datapath. */ mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA; val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); val4 &= ~mask; CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4); /* Wait for bits to be cleared; i=100 is excessive. */ for (i = 0; i < 100; i++) { val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); if ((val4 & mask) == 0) { break; } DELAY(10); } if ((val4 & mask) != 0) { device_printf(sc->atse_dev, "Disabling MAC TX/RX timed out.\n"); /* Punt. */ } sc->atse_flags &= ~ATSE_FLAGS_LINK; return (0); } static u_int atse_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { uint64_t *h = arg; uint8_t *addr, x, y; int i, j; addr = LLADDR(sdl); x = 0; for (i = 0; i < ETHER_ADDR_LEN; i++) { y = addr[i] & 0x01; for (j = 1; j < 8; j++) y ^= (addr[i] >> j) & 0x01; x |= (y << i); } *h |= (1 << x); return (1); } static int atse_rxfilter_locked(struct atse_softc *sc) { struct ifnet *ifp; uint32_t val4; int i; /* XXX-BZ can we find out if we have the MHASH synthesized? */ val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); /* For simplicity always hash full 48 bits of addresses. */ if ((val4 & BASE_CFG_COMMAND_CONFIG_MHASH_SEL) != 0) val4 &= ~BASE_CFG_COMMAND_CONFIG_MHASH_SEL; ifp = sc->atse_ifp; if (ifp->if_flags & IFF_PROMISC) { val4 |= BASE_CFG_COMMAND_CONFIG_PROMIS_EN; } else { val4 &= ~BASE_CFG_COMMAND_CONFIG_PROMIS_EN; } CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4); if (ifp->if_flags & IFF_ALLMULTI) { /* Accept all multicast addresses. */ for (i = 0; i <= MHASH_LEN; i++) CSR_WRITE_4(sc, MHASH_START + i, 0x1); } else { /* * Can hold MHASH_LEN entries. * XXX-BZ bitstring.h would be more general. */ uint64_t h; /* * Re-build and re-program hash table. First build the * bit-field "yes" or "no" for each slot per address, then * do all the programming afterwards. */ h = 0; (void)if_foreach_llmaddr(ifp, atse_hash_maddr, &h); for (i = 0; i <= MHASH_LEN; i++) { CSR_WRITE_4(sc, MHASH_START + i, (h & (1 << i)) ? 0x01 : 0x00); } } return (0); } static int atse_ethernet_option_bits_read_fdt(device_t dev) { struct resource *res; device_t fdev; int i, rid; if (atse_ethernet_option_bits_flag & ATSE_ETHERNET_OPTION_BITS_READ) { return (0); } fdev = device_find_child(device_get_parent(dev), "cfi", 0); if (fdev == NULL) { return (ENOENT); } rid = 0; res = bus_alloc_resource_any(fdev, SYS_RES_MEMORY, &rid, RF_ACTIVE | RF_SHAREABLE); if (res == NULL) { return (ENXIO); } for (i = 0; i < ALTERA_ETHERNET_OPTION_BITS_LEN; i++) { atse_ethernet_option_bits[i] = bus_read_1(res, ALTERA_ETHERNET_OPTION_BITS_OFF + i); } bus_release_resource(fdev, SYS_RES_MEMORY, rid, res); atse_ethernet_option_bits_flag |= ATSE_ETHERNET_OPTION_BITS_READ; return (0); } static int atse_ethernet_option_bits_read(device_t dev) { int error; error = atse_ethernet_option_bits_read_fdt(dev); if (error == 0) return (0); device_printf(dev, "Cannot read Ethernet addresses from flash.\n"); return (error); } static int atse_get_eth_address(struct atse_softc *sc) { unsigned long hostid; uint32_t val4; int unit; /* * Make sure to only ever do this once. Otherwise a reset would * possibly change our ethernet address, which is not good at all. */ if (sc->atse_eth_addr[0] != 0x00 || sc->atse_eth_addr[1] != 0x00 || sc->atse_eth_addr[2] != 0x00) { return (0); } if ((atse_ethernet_option_bits_flag & ATSE_ETHERNET_OPTION_BITS_READ) == 0) { goto get_random; } val4 = atse_ethernet_option_bits[0] << 24; val4 |= atse_ethernet_option_bits[1] << 16; val4 |= atse_ethernet_option_bits[2] << 8; val4 |= atse_ethernet_option_bits[3]; /* They chose "safe". */ if (val4 != le32toh(0x00005afe)) { device_printf(sc->atse_dev, "Magic '5afe' is not safe: 0x%08x. " "Falling back to random numbers for hardware address.\n", val4); goto get_random; } sc->atse_eth_addr[0] = atse_ethernet_option_bits[4]; sc->atse_eth_addr[1] = atse_ethernet_option_bits[5]; sc->atse_eth_addr[2] = atse_ethernet_option_bits[6]; sc->atse_eth_addr[3] = atse_ethernet_option_bits[7]; sc->atse_eth_addr[4] = atse_ethernet_option_bits[8]; sc->atse_eth_addr[5] = atse_ethernet_option_bits[9]; /* Handle factory default ethernet addresss: 00:07:ed:ff:ed:15 */ if (sc->atse_eth_addr[0] == 0x00 && sc->atse_eth_addr[1] == 0x07 && sc->atse_eth_addr[2] == 0xed && sc->atse_eth_addr[3] == 0xff && sc->atse_eth_addr[4] == 0xed && sc->atse_eth_addr[5] == 0x15) { device_printf(sc->atse_dev, "Factory programmed Ethernet " "hardware address blacklisted. Falling back to random " "address to avoid collisions.\n"); device_printf(sc->atse_dev, "Please re-program your flash.\n"); goto get_random; } if (sc->atse_eth_addr[0] == 0x00 && sc->atse_eth_addr[1] == 0x00 && sc->atse_eth_addr[2] == 0x00 && sc->atse_eth_addr[3] == 0x00 && sc->atse_eth_addr[4] == 0x00 && sc->atse_eth_addr[5] == 0x00) { device_printf(sc->atse_dev, "All zero's Ethernet hardware " "address blacklisted. Falling back to random address.\n"); device_printf(sc->atse_dev, "Please re-program your flash.\n"); goto get_random; } if (ETHER_IS_MULTICAST(sc->atse_eth_addr)) { device_printf(sc->atse_dev, "Multicast Ethernet hardware " "address blacklisted. Falling back to random address.\n"); device_printf(sc->atse_dev, "Please re-program your flash.\n"); goto get_random; } /* * If we find an Altera prefixed address with a 0x0 ending * adjust by device unit. If not and this is not the first * Ethernet, go to random. */ unit = device_get_unit(sc->atse_dev); if (unit == 0x00) { return (0); } if (unit > 0x0f) { device_printf(sc->atse_dev, "We do not support Ethernet " "addresses for more than 16 MACs. Falling back to " "random hadware address.\n"); goto get_random; } if ((sc->atse_eth_addr[0] & ~0x2) != 0 || sc->atse_eth_addr[1] != 0x07 || sc->atse_eth_addr[2] != 0xed || (sc->atse_eth_addr[5] & 0x0f) != 0x0) { device_printf(sc->atse_dev, "Ethernet address not meeting our " "multi-MAC standards. Falling back to random hadware " "address.\n"); goto get_random; } sc->atse_eth_addr[5] |= (unit & 0x0f); return (0); get_random: /* * Fall back to random code we also use on bridge(4). */ getcredhostid(curthread->td_ucred, &hostid); if (hostid == 0) { arc4rand(sc->atse_eth_addr, ETHER_ADDR_LEN, 1); sc->atse_eth_addr[0] &= ~1;/* clear multicast bit */ sc->atse_eth_addr[0] |= 2; /* set the LAA bit */ } else { sc->atse_eth_addr[0] = 0x2; sc->atse_eth_addr[1] = (hostid >> 24) & 0xff; sc->atse_eth_addr[2] = (hostid >> 16) & 0xff; sc->atse_eth_addr[3] = (hostid >> 8 ) & 0xff; sc->atse_eth_addr[4] = hostid & 0xff; sc->atse_eth_addr[5] = sc->atse_unit & 0xff; } return (0); } static int atse_set_eth_address(struct atse_softc *sc, int n) { uint32_t v0, v1; v0 = (sc->atse_eth_addr[3] << 24) | (sc->atse_eth_addr[2] << 16) | (sc->atse_eth_addr[1] << 8) | sc->atse_eth_addr[0]; v1 = (sc->atse_eth_addr[5] << 8) | sc->atse_eth_addr[4]; if (n & ATSE_ETH_ADDR_DEF) { CSR_WRITE_4(sc, BASE_CFG_MAC_0, v0); CSR_WRITE_4(sc, BASE_CFG_MAC_1, v1); } if (n & ATSE_ETH_ADDR_SUPP1) { CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_0_0, v0); CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_0_1, v1); } if (n & ATSE_ETH_ADDR_SUPP2) { CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_1_0, v0); CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_1_1, v1); } if (n & ATSE_ETH_ADDR_SUPP3) { CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_2_0, v0); CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_2_1, v1); } if (n & ATSE_ETH_ADDR_SUPP4) { CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_3_0, v0); CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_3_1, v1); } return (0); } static int atse_reset(struct atse_softc *sc) { uint32_t val4, mask; uint16_t val; int i; /* 1. External PHY Initialization using MDIO. */ /* * We select the right MDIO space in atse_attach() and let MII do * anything else. */ /* 2. PCS Configuration Register Initialization. */ /* a. Set auto negotiation link timer to 1.6ms for SGMII. */ PCS_WRITE_2(sc, PCS_EXT_LINK_TIMER_0, 0x0D40); PCS_WRITE_2(sc, PCS_EXT_LINK_TIMER_1, 0x0003); /* b. Configure SGMII. */ val = PCS_EXT_IF_MODE_SGMII_ENA|PCS_EXT_IF_MODE_USE_SGMII_AN; PCS_WRITE_2(sc, PCS_EXT_IF_MODE, val); /* c. Enable auto negotiation. */ /* Ignore Bits 6,8,13; should be set,set,unset. */ val = PCS_READ_2(sc, PCS_CONTROL); val &= ~(PCS_CONTROL_ISOLATE|PCS_CONTROL_POWERDOWN); val &= ~PCS_CONTROL_LOOPBACK; /* Make this a -link1 option? */ val |= PCS_CONTROL_AUTO_NEGOTIATION_ENABLE; PCS_WRITE_2(sc, PCS_CONTROL, val); /* d. PCS reset. */ val = PCS_READ_2(sc, PCS_CONTROL); val |= PCS_CONTROL_RESET; PCS_WRITE_2(sc, PCS_CONTROL, val); /* Wait for reset bit to clear; i=100 is excessive. */ for (i = 0; i < 100; i++) { val = PCS_READ_2(sc, PCS_CONTROL); if ((val & PCS_CONTROL_RESET) == 0) { break; } DELAY(10); } if ((val & PCS_CONTROL_RESET) != 0) { device_printf(sc->atse_dev, "PCS reset timed out.\n"); return (ENXIO); } /* 3. MAC Configuration Register Initialization. */ /* a. Disable MAC transmit and receive datapath. */ mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA; val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); val4 &= ~mask; /* Samples in the manual do have the SW_RESET bit set here, why? */ CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4); /* Wait for bits to be cleared; i=100 is excessive. */ for (i = 0; i < 100; i++) { val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); if ((val4 & mask) == 0) { break; } DELAY(10); } if ((val4 & mask) != 0) { device_printf(sc->atse_dev, "Disabling MAC TX/RX timed out.\n"); return (ENXIO); } /* b. MAC FIFO configuration. */ CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_EMPTY, FIFO_DEPTH_TX - 16); CSR_WRITE_4(sc, BASE_CFG_TX_ALMOST_FULL, 3); CSR_WRITE_4(sc, BASE_CFG_TX_ALMOST_EMPTY, 8); CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_EMPTY, FIFO_DEPTH_RX - 16); CSR_WRITE_4(sc, BASE_CFG_RX_ALMOST_FULL, 8); CSR_WRITE_4(sc, BASE_CFG_RX_ALMOST_EMPTY, 8); #if 0 CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_FULL, 16); CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_FULL, 16); #else /* For store-and-forward mode, set this threshold to 0. */ CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_FULL, 0); CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_FULL, 0); #endif /* c. MAC address configuration. */ /* Also intialize supplementary addresses to our primary one. */ /* XXX-BZ FreeBSD really needs to grow and API for using these. */ atse_get_eth_address(sc); atse_set_eth_address(sc, ATSE_ETH_ADDR_ALL); /* d. MAC function configuration. */ CSR_WRITE_4(sc, BASE_CFG_FRM_LENGTH, 1518); /* Default. */ CSR_WRITE_4(sc, BASE_CFG_TX_IPG_LENGTH, 12); CSR_WRITE_4(sc, BASE_CFG_PAUSE_QUANT, 0xFFFF); val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); /* * If 1000BASE-X/SGMII PCS is initialized, set the ETH_SPEED (bit 3) * and ENA_10 (bit 25) in command_config register to 0. If half duplex * is reported in the PHY/PCS status register, set the HD_ENA (bit 10) * to 1 in command_config register. * BZ: We shoot for 1000 instead. */ #if 0 val4 |= BASE_CFG_COMMAND_CONFIG_ETH_SPEED; #else val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED; #endif val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10; #if 0 /* * We do not want to set this, otherwise, we could not even send * random raw ethernet frames for various other research. By default * FreeBSD will use the right ether source address. */ val4 |= BASE_CFG_COMMAND_CONFIG_TX_ADDR_INS; #endif val4 |= BASE_CFG_COMMAND_CONFIG_PAD_EN; val4 &= ~BASE_CFG_COMMAND_CONFIG_CRC_FWD; #if 0 val4 |= BASE_CFG_COMMAND_CONFIG_CNTL_FRM_ENA; #endif #if 1 val4 |= BASE_CFG_COMMAND_CONFIG_RX_ERR_DISC; #endif val &= ~BASE_CFG_COMMAND_CONFIG_LOOP_ENA; /* link0? */ CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4); /* * Make sure we do not enable 32bit alignment; FreeBSD cannot * cope with the additional padding (though we should!?). * Also make sure we get the CRC appended. */ val4 = CSR_READ_4(sc, TX_CMD_STAT); val4 &= ~(TX_CMD_STAT_OMIT_CRC|TX_CMD_STAT_TX_SHIFT16); CSR_WRITE_4(sc, TX_CMD_STAT, val4); val4 = CSR_READ_4(sc, RX_CMD_STAT); val4 &= ~RX_CMD_STAT_RX_SHIFT16; val4 |= RX_CMD_STAT_RX_SHIFT16; CSR_WRITE_4(sc, RX_CMD_STAT, val4); /* e. Reset MAC. */ val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); val4 |= BASE_CFG_COMMAND_CONFIG_SW_RESET; CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4); /* Wait for bits to be cleared; i=100 is excessive. */ for (i = 0; i < 100; i++) { val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); if ((val4 & BASE_CFG_COMMAND_CONFIG_SW_RESET) == 0) { break; } DELAY(10); } if ((val4 & BASE_CFG_COMMAND_CONFIG_SW_RESET) != 0) { device_printf(sc->atse_dev, "MAC reset timed out.\n"); return (ENXIO); } /* f. Enable MAC transmit and receive datapath. */ mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA; val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); val4 |= mask; CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4); /* Wait for bits to be cleared; i=100 is excessive. */ for (i = 0; i < 100; i++) { val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); if ((val4 & mask) == mask) { break; } DELAY(10); } if ((val4 & mask) != mask) { device_printf(sc->atse_dev, "Enabling MAC TX/RX timed out.\n"); return (ENXIO); } return (0); } static void atse_init_locked(struct atse_softc *sc) { struct ifnet *ifp; struct mii_data *mii; uint8_t *eaddr; ATSE_LOCK_ASSERT(sc); ifp = sc->atse_ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { return; } /* * Must update the ether address if changed. Given we do not handle * in atse_ioctl() but it's in the general framework, just always * do it here before atse_reset(). */ eaddr = IF_LLADDR(sc->atse_ifp); bcopy(eaddr, &sc->atse_eth_addr, ETHER_ADDR_LEN); /* Make things frind to halt, cleanup, ... */ atse_stop_locked(sc); atse_reset(sc); /* ... and fire up the engine again. */ atse_rxfilter_locked(sc); sc->atse_flags &= ATSE_FLAGS_LINK; /* Preserve. */ mii = device_get_softc(sc->atse_miibus); sc->atse_flags &= ~ATSE_FLAGS_LINK; mii_mediachg(mii); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->atse_tick, hz, atse_tick, sc); } static void atse_init(void *xsc) { struct atse_softc *sc; /* * XXXRW: There is some argument that we should immediately do RX * processing after enabling interrupts, or one may not fire if there * are buffered packets. */ sc = (struct atse_softc *)xsc; ATSE_LOCK(sc); atse_init_locked(sc); ATSE_UNLOCK(sc); } static int atse_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct atse_softc *sc; struct ifreq *ifr; int error, mask; error = 0; sc = ifp->if_softc; ifr = (struct ifreq *)data; switch (command) { case SIOCSIFFLAGS: ATSE_LOCK(sc); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && ((ifp->if_flags ^ sc->atse_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) atse_rxfilter_locked(sc); else atse_init_locked(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) atse_stop_locked(sc); sc->atse_if_flags = ifp->if_flags; ATSE_UNLOCK(sc); break; case SIOCSIFCAP: ATSE_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; ATSE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: ATSE_LOCK(sc); atse_rxfilter_locked(sc); ATSE_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: { struct mii_data *mii; struct ifreq *ifr; mii = device_get_softc(sc->atse_miibus); ifr = (struct ifreq *)data; error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void atse_tick(void *xsc) { struct atse_softc *sc; struct mii_data *mii; struct ifnet *ifp; sc = (struct atse_softc *)xsc; ATSE_LOCK_ASSERT(sc); ifp = sc->atse_ifp; mii = device_get_softc(sc->atse_miibus); mii_tick(mii); if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) { atse_miibus_statchg(sc->atse_dev); } callout_reset(&sc->atse_tick, hz, atse_tick, sc); } /* * Set media options. */ static int atse_ifmedia_upd(struct ifnet *ifp) { struct atse_softc *sc; struct mii_data *mii; struct mii_softc *miisc; int error; sc = ifp->if_softc; ATSE_LOCK(sc); mii = device_get_softc(sc->atse_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) { PHY_RESET(miisc); } error = mii_mediachg(mii); ATSE_UNLOCK(sc); return (error); } /* * Report current media status. */ static void atse_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct atse_softc *sc; struct mii_data *mii; sc = ifp->if_softc; ATSE_LOCK(sc); mii = device_get_softc(sc->atse_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; ATSE_UNLOCK(sc); } static struct atse_mac_stats_regs { const char *name; const char *descr; /* Mostly copied from Altera datasheet. */ } atse_mac_stats_regs[] = { [0x1a] = { "aFramesTransmittedOK", "The number of frames that are successfully transmitted including " "the pause frames." }, { "aFramesReceivedOK", "The number of frames that are successfully received including the " "pause frames." }, { "aFrameCheckSequenceErrors", "The number of receive frames with CRC error." }, { "aAlignmentErrors", "The number of receive frames with alignment error." }, { "aOctetsTransmittedOK", "The lower 32 bits of the number of data and padding octets that " "are successfully transmitted." }, { "aOctetsReceivedOK", "The lower 32 bits of the number of data and padding octets that " " are successfully received." }, { "aTxPAUSEMACCtrlFrames", "The number of pause frames transmitted." }, { "aRxPAUSEMACCtrlFrames", "The number received pause frames received." }, { "ifInErrors", "The number of errored frames received." }, { "ifOutErrors", "The number of transmit frames with either a FIFO overflow error, " "a FIFO underflow error, or a error defined by the user " "application." }, { "ifInUcastPkts", "The number of valid unicast frames received." }, { "ifInMulticastPkts", "The number of valid multicast frames received. The count does " "not include pause frames." }, { "ifInBroadcastPkts", "The number of valid broadcast frames received." }, { "ifOutDiscards", "This statistics counter is not in use. The MAC function does not " "discard frames that are written to the FIFO buffer by the user " "application." }, { "ifOutUcastPkts", "The number of valid unicast frames transmitted." }, { "ifOutMulticastPkts", "The number of valid multicast frames transmitted, excluding pause " "frames." }, { "ifOutBroadcastPkts", "The number of valid broadcast frames transmitted." }, { "etherStatsDropEvents", "The number of frames that are dropped due to MAC internal errors " "when FIFO buffer overflow persists." }, { "etherStatsOctets", "The lower 32 bits of the total number of octets received. This " "count includes both good and errored frames." }, { "etherStatsPkts", "The total number of good and errored frames received." }, { "etherStatsUndersizePkts", "The number of frames received with length less than 64 bytes. " "This count does not include errored frames." }, { "etherStatsOversizePkts", "The number of frames received that are longer than the value " "configured in the frm_length register. This count does not " "include errored frames." }, { "etherStatsPkts64Octets", "The number of 64-byte frames received. This count includes good " "and errored frames." }, { "etherStatsPkts65to127Octets", "The number of received good and errored frames between the length " "of 65 and 127 bytes." }, { "etherStatsPkts128to255Octets", "The number of received good and errored frames between the length " "of 128 and 255 bytes." }, { "etherStatsPkts256to511Octets", "The number of received good and errored frames between the length " "of 256 and 511 bytes." }, { "etherStatsPkts512to1023Octets", "The number of received good and errored frames between the length " "of 512 and 1023 bytes." }, { "etherStatsPkts1024to1518Octets", "The number of received good and errored frames between the length " "of 1024 and 1518 bytes." }, { "etherStatsPkts1519toXOctets", "The number of received good and errored frames between the length " "of 1519 and the maximum frame length configured in the frm_length " "register." }, { "etherStatsJabbers", "Too long frames with CRC error." }, { "etherStatsFragments", "Too short frames with CRC error." }, /* 0x39 unused, 0x3a/b non-stats. */ [0x3c] = /* Extended Statistics Counters */ { "msb_aOctetsTransmittedOK", "Upper 32 bits of the number of data and padding octets that are " "successfully transmitted." }, { "msb_aOctetsReceivedOK", "Upper 32 bits of the number of data and padding octets that are " "successfully received." }, { "msb_etherStatsOctets", "Upper 32 bits of the total number of octets received. This count " "includes both good and errored frames." } }; static int sysctl_atse_mac_stats_proc(SYSCTL_HANDLER_ARGS) { struct atse_softc *sc; int error, offset, s; sc = arg1; offset = arg2; s = CSR_READ_4(sc, offset); error = sysctl_handle_int(oidp, &s, 0, req); if (error || !req->newptr) { return (error); } return (0); } static struct atse_rx_err_stats_regs { const char *name; const char *descr; } atse_rx_err_stats_regs[] = { #define ATSE_RX_ERR_FIFO_THRES_EOP 0 /* FIFO threshold reached, on EOP. */ #define ATSE_RX_ERR_ELEN 1 /* Frame/payload length not valid. */ #define ATSE_RX_ERR_CRC32 2 /* CRC-32 error. */ #define ATSE_RX_ERR_FIFO_THRES_TRUNC 3 /* FIFO thresh., truncated frame. */ #define ATSE_RX_ERR_4 4 /* ? */ #define ATSE_RX_ERR_5 5 /* / */ { "rx_err_fifo_thres_eop", "FIFO threshold reached, reported on EOP." }, { "rx_err_fifo_elen", "Frame or payload length not valid." }, { "rx_err_fifo_crc32", "CRC-32 error." }, { "rx_err_fifo_thres_trunc", "FIFO threshold reached, truncated frame" }, { "rx_err_4", "?" }, { "rx_err_5", "?" }, }; static int sysctl_atse_rx_err_stats_proc(SYSCTL_HANDLER_ARGS) { struct atse_softc *sc; int error, offset, s; sc = arg1; offset = arg2; s = sc->atse_rx_err[offset]; error = sysctl_handle_int(oidp, &s, 0, req); if (error || !req->newptr) { return (error); } return (0); } static void atse_sysctl_stats_attach(device_t dev) { struct sysctl_ctx_list *sctx; struct sysctl_oid *soid; struct atse_softc *sc; int i; sc = device_get_softc(dev); sctx = device_get_sysctl_ctx(dev); soid = device_get_sysctl_tree(dev); /* MAC statistics. */ for (i = 0; i < nitems(atse_mac_stats_regs); i++) { if (atse_mac_stats_regs[i].name == NULL || atse_mac_stats_regs[i].descr == NULL) { continue; } SYSCTL_ADD_PROC(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, atse_mac_stats_regs[i].name, CTLTYPE_UINT|CTLFLAG_RD, sc, i, sysctl_atse_mac_stats_proc, "IU", atse_mac_stats_regs[i].descr); } /* rx_err[]. */ for (i = 0; i < ATSE_RX_ERR_MAX; i++) { if (atse_rx_err_stats_regs[i].name == NULL || atse_rx_err_stats_regs[i].descr == NULL) { continue; } SYSCTL_ADD_PROC(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, atse_rx_err_stats_regs[i].name, CTLTYPE_UINT|CTLFLAG_RD, sc, i, sysctl_atse_rx_err_stats_proc, "IU", atse_rx_err_stats_regs[i].descr); } } /* * Generic device handling routines. */ int atse_attach(device_t dev) { struct atse_softc *sc; struct ifnet *ifp; uint32_t caps; int error; sc = device_get_softc(dev); sc->dev = dev; /* Get xDMA controller */ sc->xdma_tx = xdma_ofw_get(sc->dev, "tx"); if (sc->xdma_tx == NULL) { device_printf(dev, "Can't find DMA controller.\n"); return (ENXIO); } /* * Only final (EOP) write can be less than "symbols per beat" value * so we have to defrag mbuf chain. * Chapter 15. On-Chip FIFO Memory Core. * Embedded Peripherals IP User Guide. */ caps = XCHAN_CAP_NOSEG; /* Alloc xDMA virtual channel. */ sc->xchan_tx = xdma_channel_alloc(sc->xdma_tx, caps); if (sc->xchan_tx == NULL) { device_printf(dev, "Can't alloc virtual DMA channel.\n"); return (ENXIO); } /* Setup interrupt handler. */ error = xdma_setup_intr(sc->xchan_tx, atse_xdma_tx_intr, sc, &sc->ih_tx); if (error) { device_printf(sc->dev, "Can't setup xDMA interrupt handler.\n"); return (ENXIO); } xdma_prep_sg(sc->xchan_tx, TX_QUEUE_SIZE, /* xchan requests queue size */ MCLBYTES, /* maxsegsize */ 8, /* maxnsegs */ 16, /* alignment */ 0, /* boundary */ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR); /* Get RX xDMA controller */ sc->xdma_rx = xdma_ofw_get(sc->dev, "rx"); if (sc->xdma_rx == NULL) { device_printf(dev, "Can't find DMA controller.\n"); return (ENXIO); } /* Alloc xDMA virtual channel. */ sc->xchan_rx = xdma_channel_alloc(sc->xdma_rx, caps); if (sc->xchan_rx == NULL) { device_printf(dev, "Can't alloc virtual DMA channel.\n"); return (ENXIO); } /* Setup interrupt handler. */ error = xdma_setup_intr(sc->xchan_rx, atse_xdma_rx_intr, sc, &sc->ih_rx); if (error) { device_printf(sc->dev, "Can't setup xDMA interrupt handler.\n"); return (ENXIO); } xdma_prep_sg(sc->xchan_rx, RX_QUEUE_SIZE, /* xchan requests queue size */ MCLBYTES, /* maxsegsize */ 1, /* maxnsegs */ 16, /* alignment */ 0, /* boundary */ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR); mtx_init(&sc->br_mtx, "buf ring mtx", NULL, MTX_DEF); sc->br = buf_ring_alloc(BUFRING_SIZE, M_DEVBUF, M_NOWAIT, &sc->br_mtx); if (sc->br == NULL) { return (ENOMEM); } atse_ethernet_option_bits_read(dev); mtx_init(&sc->atse_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->atse_tick, &sc->atse_mtx, 0); /* * We are only doing single-PHY with this driver currently. The * defaults would be right so that BASE_CFG_MDIO_ADDR0 points to the * 1st PHY address (0) apart from the fact that BMCR0 is always * the PCS mapping, so we always use BMCR1. See Table 5-1 0xA0-0xBF. */ #if 0 /* Always PCS. */ sc->atse_bmcr0 = MDIO_0_START; CSR_WRITE_4(sc, BASE_CFG_MDIO_ADDR0, 0x00); #endif /* Always use matching PHY for atse[0..]. */ sc->atse_phy_addr = device_get_unit(dev); sc->atse_bmcr1 = MDIO_1_START; CSR_WRITE_4(sc, BASE_CFG_MDIO_ADDR1, sc->atse_phy_addr); /* Reset the adapter. */ atse_reset(sc); /* Setup interface. */ ifp = sc->atse_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "if_alloc() failed\n"); error = ENOSPC; goto err; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSEPOCH; ifp->if_ioctl = atse_ioctl; ifp->if_transmit = atse_transmit; ifp->if_qflush = atse_qflush; ifp->if_init = atse_init; IFQ_SET_MAXLEN(&ifp->if_snd, ATSE_TX_LIST_CNT - 1); ifp->if_snd.ifq_drv_maxlen = ATSE_TX_LIST_CNT - 1; IFQ_SET_READY(&ifp->if_snd); /* MII setup. */ error = mii_attach(dev, &sc->atse_miibus, ifp, atse_ifmedia_upd, atse_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHY failed: %d\n", error); goto err; } /* Call media-indepedent attach routine. */ ether_ifattach(ifp, sc->atse_eth_addr); /* Tell the upper layer(s) about vlan mtu support. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; err: if (error != 0) { atse_detach(dev); } if (error == 0) { atse_sysctl_stats_attach(dev); } atse_rx_enqueue(sc, NUM_RX_MBUF); xdma_queue_submit(sc->xchan_rx); return (error); } static int atse_detach(device_t dev) { struct atse_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->atse_mtx), ("%s: mutex not initialized", device_get_nameunit(dev))); ifp = sc->atse_ifp; /* Only cleanup if attach succeeded. */ if (device_is_attached(dev)) { ATSE_LOCK(sc); atse_stop_locked(sc); ATSE_UNLOCK(sc); callout_drain(&sc->atse_tick); ether_ifdetach(ifp); } if (sc->atse_miibus != NULL) { device_delete_child(dev, sc->atse_miibus); } if (ifp != NULL) { if_free(ifp); } mtx_destroy(&sc->atse_mtx); xdma_channel_free(sc->xchan_tx); xdma_channel_free(sc->xchan_rx); xdma_put(sc->xdma_tx); xdma_put(sc->xdma_rx); return (0); } /* Shared between nexus and fdt implementation. */ void atse_detach_resources(device_t dev) { struct atse_softc *sc; sc = device_get_softc(dev); if (sc->atse_mem_res != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, sc->atse_mem_rid, sc->atse_mem_res); sc->atse_mem_res = NULL; } } int atse_detach_dev(device_t dev) { int error; error = atse_detach(dev); if (error) { /* We are basically in undefined state now. */ device_printf(dev, "atse_detach() failed: %d\n", error); return (error); } atse_detach_resources(dev); return (0); } int atse_miibus_readreg(device_t dev, int phy, int reg) { struct atse_softc *sc; int val; sc = device_get_softc(dev); /* * We currently do not support re-mapping of MDIO space on-the-fly * but de-facto hard-code the phy#. */ if (phy != sc->atse_phy_addr) { return (0); } val = PHY_READ_2(sc, reg); return (val); } int atse_miibus_writereg(device_t dev, int phy, int reg, int data) { struct atse_softc *sc; sc = device_get_softc(dev); /* * We currently do not support re-mapping of MDIO space on-the-fly * but de-facto hard-code the phy#. */ if (phy != sc->atse_phy_addr) { return (0); } PHY_WRITE_2(sc, reg, data); return (0); } void atse_miibus_statchg(device_t dev) { struct atse_softc *sc; struct mii_data *mii; struct ifnet *ifp; uint32_t val4; sc = device_get_softc(dev); ATSE_LOCK_ASSERT(sc); mii = device_get_softc(sc->atse_miibus); ifp = sc->atse_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { return; } val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG); /* Assume no link. */ sc->atse_flags &= ~ATSE_FLAGS_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: val4 |= BASE_CFG_COMMAND_CONFIG_ENA_10; val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED; sc->atse_flags |= ATSE_FLAGS_LINK; break; case IFM_100_TX: val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10; val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED; sc->atse_flags |= ATSE_FLAGS_LINK; break; case IFM_1000_T: val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10; val4 |= BASE_CFG_COMMAND_CONFIG_ETH_SPEED; sc->atse_flags |= ATSE_FLAGS_LINK; break; default: break; } } if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) { /* Need to stop the MAC? */ return; } if (IFM_OPTIONS(mii->mii_media_active & IFM_FDX) != 0) { val4 &= ~BASE_CFG_COMMAND_CONFIG_HD_ENA; } else { val4 |= BASE_CFG_COMMAND_CONFIG_HD_ENA; } /* flow control? */ /* Make sure the MAC is activated. */ val4 |= BASE_CFG_COMMAND_CONFIG_TX_ENA; val4 |= BASE_CFG_COMMAND_CONFIG_RX_ENA; CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4); } MODULE_DEPEND(atse, ether, 1, 1, 1); MODULE_DEPEND(atse, miibus, 1, 1, 1); Index: head/sys/dev/beri/virtio/network/if_vtbe.c =================================================================== --- head/sys/dev/beri/virtio/network/if_vtbe.c (revision 357009) +++ head/sys/dev/beri/virtio/network/if_vtbe.c (revision 357010) @@ -1,651 +1,651 @@ /*- * Copyright (c) 2014 Ruslan Bukin * All rights reserved. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * BERI Virtio Networking Frontend */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pio_if.h" #define DPRINTF(fmt, args...) printf(fmt, ##args) #define READ4(_sc, _reg) \ bus_read_4((_sc)->res[0], _reg) #define WRITE4(_sc, _reg, _val) \ bus_write_4((_sc)->res[0], _reg, _val) #define VTBE_LOCK(sc) mtx_lock(&(sc)->mtx) #define VTBE_UNLOCK(sc) mtx_unlock(&(sc)->mtx) #define VTBE_ASSERT_LOCKED(sc) mtx_assert(&(sc)->mtx, MA_OWNED); #define VTBE_ASSERT_UNLOCKED(sc) mtx_assert(&(sc)->mtx, MA_NOTOWNED); /* * Driver data and defines. */ #define DESC_COUNT 256 struct vtbe_softc { struct resource *res[2]; bus_space_tag_t bst; bus_space_handle_t bsh; device_t dev; struct ifnet *ifp; int if_flags; struct mtx mtx; boolean_t is_attached; int beri_mem_offset; device_t pio_send; device_t pio_recv; int opened; struct vqueue_info vs_queues[2]; int vs_curq; int hdrsize; }; static struct resource_spec vtbe_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, { -1, 0 } }; static void vtbe_txfinish_locked(struct vtbe_softc *sc); static void vtbe_rxfinish_locked(struct vtbe_softc *sc); static void vtbe_stop_locked(struct vtbe_softc *sc); static int pio_enable_irq(struct vtbe_softc *sc, int enable); static void vtbe_txstart_locked(struct vtbe_softc *sc) { struct iovec iov[DESC_COUNT]; struct virtio_net_hdr *vnh; struct vqueue_info *vq; struct iovec *tiov; struct ifnet *ifp; struct mbuf *m; struct uio uio; int enqueued; int iolen; int error; int reg; int len; int n; VTBE_ASSERT_LOCKED(sc); /* RX queue */ vq = &sc->vs_queues[0]; if (!vq_has_descs(vq)) { return; } ifp = sc->ifp; if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { return; } enqueued = 0; if (!vq_ring_ready(vq)) return; vq->vq_save_used = be16toh(vq->vq_used->idx); for (;;) { if (!vq_has_descs(vq)) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { break; } n = vq_getchain(sc->beri_mem_offset, vq, iov, DESC_COUNT, NULL); KASSERT(n == 2, ("Unexpected amount of descriptors (%d)", n)); tiov = getcopy(iov, n); vnh = iov[0].iov_base; memset(vnh, 0, sc->hdrsize); len = iov[1].iov_len; uio.uio_resid = len; uio.uio_iov = &tiov[1]; uio.uio_segflg = UIO_SYSSPACE; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_rw = UIO_READ; error = m_mbuftouio(&uio, m, 0); if (error) panic("m_mbuftouio failed\n"); iolen = (len - uio.uio_resid + sc->hdrsize); free(tiov, M_DEVBUF); vq_relchain(vq, iov, n, iolen); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); BPF_MTAP(ifp, m); m_freem(m); ++enqueued; } if (enqueued != 0) { reg = htobe32(VIRTIO_MMIO_INT_VRING); WRITE4(sc, VIRTIO_MMIO_INTERRUPT_STATUS, reg); PIO_SET(sc->pio_send, Q_INTR, 1); } } static void vtbe_txstart(struct ifnet *ifp) { struct vtbe_softc *sc = ifp->if_softc; VTBE_LOCK(sc); vtbe_txstart_locked(sc); VTBE_UNLOCK(sc); } static void vtbe_stop_locked(struct vtbe_softc *sc) { struct ifnet *ifp; VTBE_ASSERT_LOCKED(sc); ifp = sc->ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void vtbe_init_locked(struct vtbe_softc *sc) { struct ifnet *ifp = sc->ifp; VTBE_ASSERT_LOCKED(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; ifp->if_drv_flags |= IFF_DRV_RUNNING; } static void vtbe_init(void *if_softc) { struct vtbe_softc *sc = if_softc; VTBE_LOCK(sc); vtbe_init_locked(sc); VTBE_UNLOCK(sc); } static int vtbe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifmediareq *ifmr; struct vtbe_softc *sc; struct ifreq *ifr; int mask, error; sc = ifp->if_softc; ifr = (struct ifreq *)data; error = 0; switch (cmd) { case SIOCSIFFLAGS: VTBE_LOCK(sc); if (ifp->if_flags & IFF_UP) { pio_enable_irq(sc, 1); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { vtbe_init_locked(sc); } } else { pio_enable_irq(sc, 0); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { vtbe_stop_locked(sc); } } sc->if_flags = ifp->if_flags; VTBE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifmr = (struct ifmediareq *)data; ifmr->ifm_count = 1; ifmr->ifm_status = (IFM_AVALID | IFM_ACTIVE); ifmr->ifm_active = (IFM_ETHER | IFM_10G_T | IFM_FDX); ifmr->ifm_current = ifmr->ifm_active; break; case SIOCSIFCAP: mask = ifp->if_capenable ^ ifr->ifr_reqcap; if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; } break; case SIOCSIFADDR: pio_enable_irq(sc, 1); default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void vtbe_txfinish_locked(struct vtbe_softc *sc) { struct ifnet *ifp; VTBE_ASSERT_LOCKED(sc); ifp = sc->ifp; } static int vq_init(struct vtbe_softc *sc) { struct vqueue_info *vq; uint8_t *base; int size; int reg; int pfn; vq = &sc->vs_queues[sc->vs_curq]; vq->vq_qsize = DESC_COUNT; reg = READ4(sc, VIRTIO_MMIO_QUEUE_PFN); pfn = be32toh(reg); vq->vq_pfn = pfn; size = vring_size(vq->vq_qsize, VRING_ALIGN); base = paddr_map(sc->beri_mem_offset, (pfn << PAGE_SHIFT), size); /* First pages are descriptors */ vq->vq_desc = (struct vring_desc *)base; base += vq->vq_qsize * sizeof(struct vring_desc); /* Then avail ring */ vq->vq_avail = (struct vring_avail *)base; base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t); /* Then it's rounded up to the next page */ base = (uint8_t *)roundup2((uintptr_t)base, VRING_ALIGN); /* And the last pages are the used ring */ vq->vq_used = (struct vring_used *)base; /* Mark queue as allocated, and start at 0 when we use it. */ vq->vq_flags = VQ_ALLOC; vq->vq_last_avail = 0; return (0); } static void vtbe_proc_rx(struct vtbe_softc *sc, struct vqueue_info *vq) { struct iovec iov[DESC_COUNT]; struct iovec *tiov; struct ifnet *ifp; struct uio uio; struct mbuf *m; int iolen; int i; int n; ifp = sc->ifp; n = vq_getchain(sc->beri_mem_offset, vq, iov, DESC_COUNT, NULL); KASSERT(n >= 1 && n <= DESC_COUNT, ("wrong n %d", n)); tiov = getcopy(iov, n); iolen = 0; for (i = 1; i < n; i++) { iolen += iov[i].iov_len; } uio.uio_resid = iolen; uio.uio_iov = &tiov[1]; uio.uio_segflg = UIO_SYSSPACE; uio.uio_iovcnt = (n - 1); uio.uio_rw = UIO_WRITE; if ((m = m_uiotombuf(&uio, M_NOWAIT, 0, ETHER_ALIGN, M_PKTHDR)) == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto done; } m->m_pkthdr.rcvif = ifp; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); CURVNET_SET(ifp->if_vnet); VTBE_UNLOCK(sc); (*ifp->if_input)(ifp, m); VTBE_LOCK(sc); CURVNET_RESTORE(); done: free(tiov, M_DEVBUF); vq_relchain(vq, iov, n, iolen + sc->hdrsize); } static void vtbe_rxfinish_locked(struct vtbe_softc *sc) { struct vqueue_info *vq; int reg; /* TX queue */ vq = &sc->vs_queues[1]; if (!vq_ring_ready(vq)) return; /* Process new descriptors */ vq->vq_save_used = be16toh(vq->vq_used->idx); while (vq_has_descs(vq)) { vtbe_proc_rx(sc, vq); } /* Interrupt the other side */ reg = htobe32(VIRTIO_MMIO_INT_VRING); WRITE4(sc, VIRTIO_MMIO_INTERRUPT_STATUS, reg); PIO_SET(sc->pio_send, Q_INTR, 1); } static void vtbe_intr(void *arg) { struct vtbe_softc *sc; int pending; uint32_t reg; sc = arg; VTBE_LOCK(sc); reg = PIO_READ(sc->pio_recv); /* Ack */ PIO_SET(sc->pio_recv, reg, 0); pending = htobe32(reg); if (pending & Q_SEL) { reg = READ4(sc, VIRTIO_MMIO_QUEUE_SEL); sc->vs_curq = be32toh(reg); } if (pending & Q_PFN) { vq_init(sc); } if (pending & Q_NOTIFY) { /* beri rx / arm tx notify */ vtbe_txfinish_locked(sc); } if (pending & Q_NOTIFY1) { vtbe_rxfinish_locked(sc); } VTBE_UNLOCK(sc); } static int vtbe_get_hwaddr(struct vtbe_softc *sc, uint8_t *hwaddr) { int rnd; /* * Generate MAC address, use 'bsd' + random 24 low-order bits. */ rnd = arc4random() & 0x00ffffff; hwaddr[0] = 'b'; hwaddr[1] = 's'; hwaddr[2] = 'd'; hwaddr[3] = rnd >> 16; hwaddr[4] = rnd >> 8; hwaddr[5] = rnd >> 0; return (0); } static int pio_enable_irq(struct vtbe_softc *sc, int enable) { /* * IRQ lines should be disabled while reprogram FPGA core. */ if (enable) { if (sc->opened == 0) { sc->opened = 1; PIO_SETUP_IRQ(sc->pio_recv, vtbe_intr, sc); } } else { if (sc->opened == 1) { PIO_TEARDOWN_IRQ(sc->pio_recv); sc->opened = 0; } } return (0); } static int vtbe_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "sri-cambridge,beri-vtnet")) return (ENXIO); device_set_desc(dev, "Virtio BERI Ethernet Controller"); return (BUS_PROBE_DEFAULT); } static int vtbe_attach(device_t dev) { uint8_t macaddr[ETHER_ADDR_LEN]; struct vtbe_softc *sc; struct ifnet *ifp; int reg; sc = device_get_softc(dev); sc->dev = dev; sc->hdrsize = sizeof(struct virtio_net_hdr); if (bus_alloc_resources(dev, vtbe_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } /* Memory interface */ sc->bst = rman_get_bustag(sc->res[0]); sc->bsh = rman_get_bushandle(sc->res[0]); mtx_init(&sc->mtx, device_get_nameunit(sc->dev), MTX_NETWORK_LOCK, MTX_DEF); if (setup_offset(dev, &sc->beri_mem_offset) != 0) return (ENXIO); if (setup_pio(dev, "pio-send", &sc->pio_send) != 0) return (ENXIO); if (setup_pio(dev, "pio-recv", &sc->pio_recv) != 0) return (ENXIO); /* Setup MMIO */ /* Specify that we provide network device */ reg = htobe32(VIRTIO_ID_NETWORK); WRITE4(sc, VIRTIO_MMIO_DEVICE_ID, reg); /* The number of desc we support */ reg = htobe32(DESC_COUNT); WRITE4(sc, VIRTIO_MMIO_QUEUE_NUM_MAX, reg); /* Our features */ reg = htobe32(VIRTIO_NET_F_MAC | VIRTIO_F_NOTIFY_ON_EMPTY); WRITE4(sc, VIRTIO_MMIO_HOST_FEATURES, reg); /* Get MAC */ if (vtbe_get_hwaddr(sc, macaddr)) { device_printf(sc->dev, "can't get mac\n"); return (ENXIO); } /* Set up the ethernet interface. */ sc->ifp = ifp = if_alloc(IFT_ETHER); ifp->if_baudrate = IF_Gbps(10); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | - IFF_MULTICAST | IFF_PROMISC); + IFF_MULTICAST | IFF_PROMISC | IFF_NEEDSEPOCH); ifp->if_capabilities = IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; ifp->if_start = vtbe_txstart; ifp->if_ioctl = vtbe_ioctl; ifp->if_init = vtbe_init; IFQ_SET_MAXLEN(&ifp->if_snd, DESC_COUNT - 1); ifp->if_snd.ifq_drv_maxlen = DESC_COUNT - 1; IFQ_SET_READY(&ifp->if_snd); ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* All ready to run, attach the ethernet interface. */ ether_ifattach(ifp, macaddr); sc->is_attached = true; return (0); } static device_method_t vtbe_methods[] = { DEVMETHOD(device_probe, vtbe_probe), DEVMETHOD(device_attach, vtbe_attach), { 0, 0 } }; static driver_t vtbe_driver = { "vtbe", vtbe_methods, sizeof(struct vtbe_softc), }; static devclass_t vtbe_devclass; DRIVER_MODULE(vtbe, simplebus, vtbe_driver, vtbe_devclass, 0, 0); MODULE_DEPEND(vtbe, ether, 1, 1, 1); Index: head/sys/dev/dpaa/if_dtsec.c =================================================================== --- head/sys/dev/dpaa/if_dtsec.c (revision 357009) +++ head/sys/dev/dpaa/if_dtsec.c (revision 357010) @@ -1,856 +1,856 @@ /*- * Copyright (c) 2011-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miibus_if.h" #include #include #include #include #include "fman.h" #include "if_dtsec.h" #include "if_dtsec_im.h" #include "if_dtsec_rm.h" #define DTSEC_MIN_FRAME_SIZE 64 #define DTSEC_MAX_FRAME_SIZE 9600 #define DTSEC_REG_MAXFRM 0x110 /** * @group dTSEC private defines. * @{ */ /** * dTSEC FMan MAC exceptions info struct. */ struct dtsec_fm_mac_ex_str { const int num; const char *str; }; /** @} */ /** * @group FMan MAC routines. * @{ */ #define DTSEC_MAC_EXCEPTIONS_END (-1) /** * FMan MAC exceptions. */ static const struct dtsec_fm_mac_ex_str dtsec_fm_mac_exceptions[] = { { e_FM_MAC_EX_10G_MDIO_SCAN_EVENTMDIO, "MDIO scan event" }, { e_FM_MAC_EX_10G_MDIO_CMD_CMPL, "MDIO command completion" }, { e_FM_MAC_EX_10G_REM_FAULT, "Remote fault" }, { e_FM_MAC_EX_10G_LOC_FAULT, "Local fault" }, { e_FM_MAC_EX_10G_1TX_ECC_ER, "Transmit frame ECC error" }, { e_FM_MAC_EX_10G_TX_FIFO_UNFL, "Transmit FIFO underflow" }, { e_FM_MAC_EX_10G_TX_FIFO_OVFL, "Receive FIFO overflow" }, { e_FM_MAC_EX_10G_TX_ER, "Transmit frame error" }, { e_FM_MAC_EX_10G_RX_FIFO_OVFL, "Receive FIFO overflow" }, { e_FM_MAC_EX_10G_RX_ECC_ER, "Receive frame ECC error" }, { e_FM_MAC_EX_10G_RX_JAB_FRM, "Receive jabber frame" }, { e_FM_MAC_EX_10G_RX_OVRSZ_FRM, "Receive oversized frame" }, { e_FM_MAC_EX_10G_RX_RUNT_FRM, "Receive runt frame" }, { e_FM_MAC_EX_10G_RX_FRAG_FRM, "Receive fragment frame" }, { e_FM_MAC_EX_10G_RX_LEN_ER, "Receive payload length error" }, { e_FM_MAC_EX_10G_RX_CRC_ER, "Receive CRC error" }, { e_FM_MAC_EX_10G_RX_ALIGN_ER, "Receive alignment error" }, { e_FM_MAC_EX_1G_BAB_RX, "Babbling receive error" }, { e_FM_MAC_EX_1G_RX_CTL, "Receive control (pause frame) interrupt" }, { e_FM_MAC_EX_1G_GRATEFUL_TX_STP_COMPLET, "Graceful transmit stop " "complete" }, { e_FM_MAC_EX_1G_BAB_TX, "Babbling transmit error" }, { e_FM_MAC_EX_1G_TX_CTL, "Transmit control (pause frame) interrupt" }, { e_FM_MAC_EX_1G_TX_ERR, "Transmit error" }, { e_FM_MAC_EX_1G_LATE_COL, "Late collision" }, { e_FM_MAC_EX_1G_COL_RET_LMT, "Collision retry limit" }, { e_FM_MAC_EX_1G_TX_FIFO_UNDRN, "Transmit FIFO underrun" }, { e_FM_MAC_EX_1G_MAG_PCKT, "Magic Packet detected when dTSEC is in " "Magic Packet detection mode" }, { e_FM_MAC_EX_1G_MII_MNG_RD_COMPLET, "MII management read completion" }, { e_FM_MAC_EX_1G_MII_MNG_WR_COMPLET, "MII management write completion" }, { e_FM_MAC_EX_1G_GRATEFUL_RX_STP_COMPLET, "Graceful receive stop " "complete" }, { e_FM_MAC_EX_1G_TX_DATA_ERR, "Internal data error on transmit" }, { e_FM_MAC_EX_1G_RX_DATA_ERR, "Internal data error on receive" }, { e_FM_MAC_EX_1G_1588_TS_RX_ERR, "Time-Stamp Receive Error" }, { e_FM_MAC_EX_1G_RX_MIB_CNT_OVFL, "MIB counter overflow" }, { DTSEC_MAC_EXCEPTIONS_END, "" } }; static const char * dtsec_fm_mac_ex_to_str(e_FmMacExceptions exception) { int i; for (i = 0; dtsec_fm_mac_exceptions[i].num != exception && dtsec_fm_mac_exceptions[i].num != DTSEC_MAC_EXCEPTIONS_END; ++i) ; if (dtsec_fm_mac_exceptions[i].num == DTSEC_MAC_EXCEPTIONS_END) return (""); return (dtsec_fm_mac_exceptions[i].str); } static void dtsec_fm_mac_mdio_event_callback(t_Handle h_App, e_FmMacExceptions exception) { struct dtsec_softc *sc; sc = h_App; device_printf(sc->sc_dev, "MDIO event %i: %s.\n", exception, dtsec_fm_mac_ex_to_str(exception)); } static void dtsec_fm_mac_exception_callback(t_Handle app, e_FmMacExceptions exception) { struct dtsec_softc *sc; sc = app; device_printf(sc->sc_dev, "MAC exception %i: %s.\n", exception, dtsec_fm_mac_ex_to_str(exception)); } static void dtsec_fm_mac_free(struct dtsec_softc *sc) { if (sc->sc_mach == NULL) return; FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX); FM_MAC_Free(sc->sc_mach); sc->sc_mach = NULL; } static int dtsec_fm_mac_init(struct dtsec_softc *sc, uint8_t *mac) { t_FmMacParams params; t_Error error; memset(¶ms, 0, sizeof(params)); memcpy(¶ms.addr, mac, sizeof(params.addr)); params.baseAddr = rman_get_bushandle(sc->sc_mem); params.enetMode = sc->sc_mac_enet_mode; params.macId = sc->sc_eth_id; params.mdioIrq = sc->sc_mac_mdio_irq; params.f_Event = dtsec_fm_mac_mdio_event_callback; params.f_Exception = dtsec_fm_mac_exception_callback; params.h_App = sc; params.h_Fm = sc->sc_fmh; sc->sc_mach = FM_MAC_Config(¶ms); if (sc->sc_mach == NULL) { device_printf(sc->sc_dev, "couldn't configure FM_MAC module.\n" ); return (ENXIO); } error = FM_MAC_ConfigResetOnInit(sc->sc_mach, TRUE); if (error != E_OK) { device_printf(sc->sc_dev, "couldn't enable reset on init " "feature.\n"); dtsec_fm_mac_free(sc); return (ENXIO); } /* Do not inform about pause frames */ error = FM_MAC_ConfigException(sc->sc_mach, e_FM_MAC_EX_1G_RX_CTL, FALSE); if (error != E_OK) { device_printf(sc->sc_dev, "couldn't disable pause frames " "exception.\n"); dtsec_fm_mac_free(sc); return (ENXIO); } error = FM_MAC_Init(sc->sc_mach); if (error != E_OK) { device_printf(sc->sc_dev, "couldn't initialize FM_MAC module." "\n"); dtsec_fm_mac_free(sc); return (ENXIO); } return (0); } /** @} */ /** * @group FMan PORT routines. * @{ */ static const char * dtsec_fm_port_ex_to_str(e_FmPortExceptions exception) { switch (exception) { case e_FM_PORT_EXCEPTION_IM_BUSY: return ("IM: RX busy"); default: return (""); } } void dtsec_fm_port_rx_exception_callback(t_Handle app, e_FmPortExceptions exception) { struct dtsec_softc *sc; sc = app; device_printf(sc->sc_dev, "RX exception: %i: %s.\n", exception, dtsec_fm_port_ex_to_str(exception)); } void dtsec_fm_port_tx_exception_callback(t_Handle app, e_FmPortExceptions exception) { struct dtsec_softc *sc; sc = app; device_printf(sc->sc_dev, "TX exception: %i: %s.\n", exception, dtsec_fm_port_ex_to_str(exception)); } e_FmPortType dtsec_fm_port_rx_type(enum eth_dev_type type) { switch (type) { case ETH_DTSEC: return (e_FM_PORT_TYPE_RX); case ETH_10GSEC: return (e_FM_PORT_TYPE_RX_10G); default: return (e_FM_PORT_TYPE_DUMMY); } } e_FmPortType dtsec_fm_port_tx_type(enum eth_dev_type type) { switch (type) { case ETH_DTSEC: return (e_FM_PORT_TYPE_TX); case ETH_10GSEC: return (e_FM_PORT_TYPE_TX_10G); default: return (e_FM_PORT_TYPE_DUMMY); } } static void dtsec_fm_port_free_both(struct dtsec_softc *sc) { if (sc->sc_rxph) { FM_PORT_Free(sc->sc_rxph); sc->sc_rxph = NULL; } if (sc->sc_txph) { FM_PORT_Free(sc->sc_txph); sc->sc_txph = NULL; } } /** @} */ /** * @group IFnet routines. * @{ */ static int dtsec_set_mtu(struct dtsec_softc *sc, unsigned int mtu) { mtu += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; DTSEC_LOCK_ASSERT(sc); if (mtu >= DTSEC_MIN_FRAME_SIZE && mtu <= DTSEC_MAX_FRAME_SIZE) { bus_write_4(sc->sc_mem, DTSEC_REG_MAXFRM, mtu); return (mtu); } return (0); } static int dtsec_if_enable_locked(struct dtsec_softc *sc) { int error; DTSEC_LOCK_ASSERT(sc); error = FM_MAC_Enable(sc->sc_mach, e_COMM_MODE_RX_AND_TX); if (error != E_OK) return (EIO); error = FM_PORT_Enable(sc->sc_rxph); if (error != E_OK) return (EIO); error = FM_PORT_Enable(sc->sc_txph); if (error != E_OK) return (EIO); sc->sc_ifnet->if_drv_flags |= IFF_DRV_RUNNING; /* Refresh link state */ dtsec_miibus_statchg(sc->sc_dev); return (0); } static int dtsec_if_disable_locked(struct dtsec_softc *sc) { int error; DTSEC_LOCK_ASSERT(sc); error = FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX); if (error != E_OK) return (EIO); error = FM_PORT_Disable(sc->sc_rxph); if (error != E_OK) return (EIO); error = FM_PORT_Disable(sc->sc_txph); if (error != E_OK) return (EIO); sc->sc_ifnet->if_drv_flags &= ~IFF_DRV_RUNNING; return (0); } static int dtsec_if_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct dtsec_softc *sc; struct ifreq *ifr; int error; sc = ifp->if_softc; ifr = (struct ifreq *)data; error = 0; /* Basic functionality to achieve media status reports */ switch (command) { case SIOCSIFMTU: DTSEC_LOCK(sc); if (dtsec_set_mtu(sc, ifr->ifr_mtu)) ifp->if_mtu = ifr->ifr_mtu; else error = EINVAL; DTSEC_UNLOCK(sc); break; case SIOCSIFFLAGS: DTSEC_LOCK(sc); if (sc->sc_ifnet->if_flags & IFF_UP) error = dtsec_if_enable_locked(sc); else error = dtsec_if_disable_locked(sc); DTSEC_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, command); break; default: error = ether_ioctl(ifp, command, data); } return (error); } static void dtsec_if_tick(void *arg) { struct dtsec_softc *sc; sc = arg; /* TODO */ DTSEC_LOCK(sc); mii_tick(sc->sc_mii); callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc); DTSEC_UNLOCK(sc); } static void dtsec_if_deinit_locked(struct dtsec_softc *sc) { DTSEC_LOCK_ASSERT(sc); DTSEC_UNLOCK(sc); callout_drain(&sc->sc_tick_callout); DTSEC_LOCK(sc); } static void dtsec_if_init_locked(struct dtsec_softc *sc) { int error; DTSEC_LOCK_ASSERT(sc); /* Set MAC address */ error = FM_MAC_ModifyMacAddr(sc->sc_mach, (t_EnetAddr *)IF_LLADDR(sc->sc_ifnet)); if (error != E_OK) { device_printf(sc->sc_dev, "couldn't set MAC address.\n"); goto err; } /* Start MII polling */ if (sc->sc_mii) callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc); if (sc->sc_ifnet->if_flags & IFF_UP) { error = dtsec_if_enable_locked(sc); if (error != 0) goto err; } else { error = dtsec_if_disable_locked(sc); if (error != 0) goto err; } return; err: dtsec_if_deinit_locked(sc); device_printf(sc->sc_dev, "initialization error.\n"); return; } static void dtsec_if_init(void *data) { struct dtsec_softc *sc; sc = data; DTSEC_LOCK(sc); dtsec_if_init_locked(sc); DTSEC_UNLOCK(sc); } static void dtsec_if_start(struct ifnet *ifp) { struct dtsec_softc *sc; sc = ifp->if_softc; DTSEC_LOCK(sc); sc->sc_start_locked(sc); DTSEC_UNLOCK(sc); } static void dtsec_if_watchdog(struct ifnet *ifp) { /* TODO */ } /** @} */ /** * @group IFmedia routines. * @{ */ static int dtsec_ifmedia_upd(struct ifnet *ifp) { struct dtsec_softc *sc = ifp->if_softc; DTSEC_LOCK(sc); mii_mediachg(sc->sc_mii); DTSEC_UNLOCK(sc); return (0); } static void dtsec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct dtsec_softc *sc = ifp->if_softc; DTSEC_LOCK(sc); mii_pollstat(sc->sc_mii); ifmr->ifm_active = sc->sc_mii->mii_media_active; ifmr->ifm_status = sc->sc_mii->mii_media_status; DTSEC_UNLOCK(sc); } /** @} */ /** * @group dTSEC bus interface. * @{ */ static void dtsec_configure_mode(struct dtsec_softc *sc) { char tunable[64]; snprintf(tunable, sizeof(tunable), "%s.independent_mode", device_get_nameunit(sc->sc_dev)); sc->sc_mode = DTSEC_MODE_REGULAR; TUNABLE_INT_FETCH(tunable, &sc->sc_mode); if (sc->sc_mode == DTSEC_MODE_REGULAR) { sc->sc_port_rx_init = dtsec_rm_fm_port_rx_init; sc->sc_port_tx_init = dtsec_rm_fm_port_tx_init; sc->sc_start_locked = dtsec_rm_if_start_locked; } else { sc->sc_port_rx_init = dtsec_im_fm_port_rx_init; sc->sc_port_tx_init = dtsec_im_fm_port_tx_init; sc->sc_start_locked = dtsec_im_if_start_locked; } device_printf(sc->sc_dev, "Configured for %s mode.\n", (sc->sc_mode == DTSEC_MODE_REGULAR) ? "regular" : "independent"); } int dtsec_attach(device_t dev) { struct dtsec_softc *sc; device_t parent; int error; struct ifnet *ifp; sc = device_get_softc(dev); parent = device_get_parent(dev); sc->sc_dev = dev; sc->sc_mac_mdio_irq = NO_IRQ; /* Check if MallocSmart allocator is ready */ if (XX_MallocSmartInit() != E_OK) return (ENXIO); /* Init locks */ mtx_init(&sc->sc_lock, device_get_nameunit(dev), "DTSEC Global Lock", MTX_DEF); mtx_init(&sc->sc_mii_lock, device_get_nameunit(dev), "DTSEC MII Lock", MTX_DEF); /* Init callouts */ callout_init(&sc->sc_tick_callout, CALLOUT_MPSAFE); /* Read configuraton */ if ((error = fman_get_handle(parent, &sc->sc_fmh)) != 0) return (error); if ((error = fman_get_muram_handle(parent, &sc->sc_muramh)) != 0) return (error); if ((error = fman_get_bushandle(parent, &sc->sc_fm_base)) != 0) return (error); /* Configure working mode */ dtsec_configure_mode(sc); /* If we are working in regular mode configure BMAN and QMAN */ if (sc->sc_mode == DTSEC_MODE_REGULAR) { /* Create RX buffer pool */ error = dtsec_rm_pool_rx_init(sc); if (error != 0) return (EIO); /* Create RX frame queue range */ error = dtsec_rm_fqr_rx_init(sc); if (error != 0) return (EIO); /* Create frame info pool */ error = dtsec_rm_fi_pool_init(sc); if (error != 0) return (EIO); /* Create TX frame queue range */ error = dtsec_rm_fqr_tx_init(sc); if (error != 0) return (EIO); } /* Init FMan MAC module. */ error = dtsec_fm_mac_init(sc, sc->sc_mac_addr); if (error != 0) { dtsec_detach(dev); return (ENXIO); } /* Init FMan TX port */ error = sc->sc_port_tx_init(sc, device_get_unit(sc->sc_dev)); if (error != 0) { dtsec_detach(dev); return (ENXIO); } /* Init FMan RX port */ error = sc->sc_port_rx_init(sc, device_get_unit(sc->sc_dev)); if (error != 0) { dtsec_detach(dev); return (ENXIO); } /* Create network interface for upper layers */ ifp = sc->sc_ifnet = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc->sc_dev, "if_alloc() failed.\n"); dtsec_detach(dev); return (ENOMEM); } ifp->if_softc = sc; ifp->if_mtu = ETHERMTU; /* TODO: Configure */ - ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST; + ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_NEEDSEPOCH; ifp->if_init = dtsec_if_init; ifp->if_start = dtsec_if_start; ifp->if_ioctl = dtsec_if_ioctl; ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; if (sc->sc_phy_addr >= 0) if_initname(ifp, device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev)); else if_initname(ifp, "dtsec_phy", device_get_unit(sc->sc_dev)); /* TODO */ #if 0 IFQ_SET_MAXLEN(&ifp->if_snd, TSEC_TX_NUM_DESC - 1); ifp->if_snd.ifq_drv_maxlen = TSEC_TX_NUM_DESC - 1; IFQ_SET_READY(&ifp->if_snd); #endif ifp->if_capabilities = IFCAP_JUMBO_MTU; /* TODO: HWCSUM */ ifp->if_capenable = ifp->if_capabilities; /* Attach PHY(s) */ error = mii_attach(sc->sc_dev, &sc->sc_mii_dev, ifp, dtsec_ifmedia_upd, dtsec_ifmedia_sts, BMSR_DEFCAPMASK, sc->sc_phy_addr, MII_OFFSET_ANY, 0); if (error) { device_printf(sc->sc_dev, "attaching PHYs failed: %d\n", error); dtsec_detach(sc->sc_dev); return (error); } sc->sc_mii = device_get_softc(sc->sc_mii_dev); /* Attach to stack */ ether_ifattach(ifp, sc->sc_mac_addr); return (0); } int dtsec_detach(device_t dev) { struct dtsec_softc *sc; if_t ifp; sc = device_get_softc(dev); ifp = sc->sc_ifnet; if (device_is_attached(dev)) { ether_ifdetach(ifp); /* Shutdown interface */ DTSEC_LOCK(sc); dtsec_if_deinit_locked(sc); DTSEC_UNLOCK(sc); } if (sc->sc_ifnet) { if_free(sc->sc_ifnet); sc->sc_ifnet = NULL; } if (sc->sc_mode == DTSEC_MODE_REGULAR) { /* Free RX/TX FQRs */ dtsec_rm_fqr_rx_free(sc); dtsec_rm_fqr_tx_free(sc); /* Free frame info pool */ dtsec_rm_fi_pool_free(sc); /* Free RX buffer pool */ dtsec_rm_pool_rx_free(sc); } dtsec_fm_mac_free(sc); dtsec_fm_port_free_both(sc); /* Destroy lock */ mtx_destroy(&sc->sc_lock); return (0); } int dtsec_suspend(device_t dev) { return (0); } int dtsec_resume(device_t dev) { return (0); } int dtsec_shutdown(device_t dev) { return (0); } /** @} */ /** * @group MII bus interface. * @{ */ int dtsec_miibus_readreg(device_t dev, int phy, int reg) { struct dtsec_softc *sc; sc = device_get_softc(dev); return (MIIBUS_READREG(sc->sc_mdio, phy, reg)); } int dtsec_miibus_writereg(device_t dev, int phy, int reg, int value) { struct dtsec_softc *sc; sc = device_get_softc(dev); return (MIIBUS_WRITEREG(sc->sc_mdio, phy, reg, value)); } void dtsec_miibus_statchg(device_t dev) { struct dtsec_softc *sc; e_EnetSpeed speed; bool duplex; int error; sc = device_get_softc(dev); DTSEC_LOCK_ASSERT(sc); duplex = ((sc->sc_mii->mii_media_active & IFM_GMASK) == IFM_FDX); switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) { case IFM_1000_T: case IFM_1000_SX: speed = e_ENET_SPEED_1000; break; case IFM_100_TX: speed = e_ENET_SPEED_100; break; case IFM_10_T: speed = e_ENET_SPEED_10; break; default: speed = e_ENET_SPEED_10; } error = FM_MAC_AdjustLink(sc->sc_mach, speed, duplex); if (error != E_OK) device_printf(sc->sc_dev, "error while adjusting MAC speed.\n"); } /** @} */ Index: head/sys/dev/hyperv/netvsc/if_hn.c =================================================================== --- head/sys/dev/hyperv/netvsc/if_hn.c (revision 357009) +++ head/sys/dev/hyperv/netvsc/if_hn.c (revision 357010) @@ -1,7574 +1,7575 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_hn.h" #include "opt_inet6.h" #include "opt_inet.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmbus_if.h" #define HN_IFSTART_SUPPORT #define HN_RING_CNT_DEF_MAX 8 #define HN_VFMAP_SIZE_DEF 8 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 #define HN_RNDIS_PKT_LEN \ (sizeof(struct rndis_packet_msg) + \ HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE #define HN_TX_DATA_BOUNDARY PAGE_SIZE #define HN_TX_DATA_MAXSIZE IP_MAXPACKET #define HN_TX_DATA_SEGSIZE PAGE_SIZE /* -1 for RNDIS packet message */ #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) #define HN_DIRECT_TX_SIZE_DEF 128 #define HN_EARLY_TXEOF_THRESH 8 #define HN_PKTBUF_LEN_DEF (16 * 1024) #define HN_LROENT_CNT_DEF 128 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) #define HN_LRO_ACKCNT_DEF 1 #define HN_LOCK_INIT(sc) \ sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) #define HN_LOCK(sc) \ do { \ while (sx_try_xlock(&(sc)->hn_lock) == 0) \ DELAY(1000); \ } while (0) #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) #define HN_CSUM_IP_HWASSIST(sc) \ ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) #define HN_CSUM_IP6_HWASSIST(sc) \ ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) #define HN_PKTSIZE_MIN(align) \ roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ HN_RNDIS_PKT_LEN, (align)) #define HN_PKTSIZE(m, align) \ roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) #ifdef RSS #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) #else #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) #endif struct hn_txdesc { #ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; #endif STAILQ_ENTRY(hn_txdesc) agg_link; /* Aggregated txdescs, in sending order. */ STAILQ_HEAD(, hn_txdesc) agg_list; /* The oldest packet, if transmission aggregation happens. */ struct mbuf *m; struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ struct hn_nvs_sendctx send_ctx; uint32_t chim_index; int chim_size; bus_dmamap_t data_dmap; bus_addr_t rndis_pkt_paddr; struct rndis_packet_msg *rndis_pkt; bus_dmamap_t rndis_pkt_dmap; }; #define HN_TXD_FLAG_ONLIST 0x0001 #define HN_TXD_FLAG_DMAMAP 0x0002 #define HN_TXD_FLAG_ONAGG 0x0004 struct hn_rxinfo { uint32_t vlan_info; uint32_t csum_info; uint32_t hash_info; uint32_t hash_value; }; struct hn_rxvf_setarg { struct hn_rx_ring *rxr; struct ifnet *vf_ifp; }; #define HN_RXINFO_VLAN 0x0001 #define HN_RXINFO_CSUM 0x0002 #define HN_RXINFO_HASHINF 0x0004 #define HN_RXINFO_HASHVAL 0x0008 #define HN_RXINFO_ALL \ (HN_RXINFO_VLAN | \ HN_RXINFO_CSUM | \ HN_RXINFO_HASHINF | \ HN_RXINFO_HASHVAL) #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff #define HN_NDIS_RXCSUM_INFO_INVALID 0 #define HN_NDIS_HASH_INFO_INVALID 0 static int hn_probe(device_t); static int hn_attach(device_t); static int hn_detach(device_t); static int hn_shutdown(device_t); static void hn_chan_callback(struct vmbus_channel *, void *); static void hn_init(void *); static int hn_ioctl(struct ifnet *, u_long, caddr_t); #ifdef HN_IFSTART_SUPPORT static void hn_start(struct ifnet *); #endif static int hn_transmit(struct ifnet *, struct mbuf *); static void hn_xmit_qflush(struct ifnet *); static int hn_ifmedia_upd(struct ifnet *); static void hn_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void hn_ifnet_event(void *, struct ifnet *, int); static void hn_ifaddr_event(void *, struct ifnet *); static void hn_ifnet_attevent(void *, struct ifnet *); static void hn_ifnet_detevent(void *, struct ifnet *); static void hn_ifnet_lnkevent(void *, struct ifnet *, int); static bool hn_ismyvf(const struct hn_softc *, const struct ifnet *); static void hn_rxvf_change(struct hn_softc *, struct ifnet *, bool); static void hn_rxvf_set(struct hn_softc *, struct ifnet *); static void hn_rxvf_set_task(void *, int); static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); static int hn_xpnt_vf_iocsetflags(struct hn_softc *); static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, struct ifreq *); static void hn_xpnt_vf_saveifflags(struct hn_softc *); static bool hn_xpnt_vf_isready(struct hn_softc *); static void hn_xpnt_vf_setready(struct hn_softc *); static void hn_xpnt_vf_init_taskfunc(void *, int); static void hn_xpnt_vf_init(struct hn_softc *); static void hn_xpnt_vf_setenable(struct hn_softc *); static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); static void hn_vf_rss_fixup(struct hn_softc *, bool); static void hn_vf_rss_restore(struct hn_softc *); static int hn_rndis_rxinfo(const void *, int, struct hn_rxinfo *); static void hn_rndis_rx_data(struct hn_rx_ring *, const void *, int); static void hn_rndis_rx_status(struct hn_softc *, const void *, int); static void hn_rndis_init_fixat(struct hn_softc *, int); static void hn_nvs_handle_notify(struct hn_softc *, const struct vmbus_chanpkt_hdr *); static void hn_nvs_handle_comp(struct hn_softc *, struct vmbus_channel *, const struct vmbus_chanpkt_hdr *); static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, struct vmbus_channel *, const struct vmbus_chanpkt_hdr *); static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, struct vmbus_channel *, uint64_t); #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); #if __FreeBSD_version < 1100095 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); #else static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); #ifndef RSS static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); static void hn_stop(struct hn_softc *, bool); static void hn_init_locked(struct hn_softc *); static int hn_chan_attach(struct hn_softc *, struct vmbus_channel *); static void hn_chan_detach(struct hn_softc *, struct vmbus_channel *); static int hn_attach_subchans(struct hn_softc *); static void hn_detach_allchans(struct hn_softc *); static void hn_chan_rollup(struct hn_rx_ring *, struct hn_tx_ring *); static void hn_set_ring_inuse(struct hn_softc *, int); static int hn_synth_attach(struct hn_softc *, int); static void hn_synth_detach(struct hn_softc *); static int hn_synth_alloc_subchans(struct hn_softc *, int *); static bool hn_synth_attachable(const struct hn_softc *); static void hn_suspend(struct hn_softc *); static void hn_suspend_data(struct hn_softc *); static void hn_suspend_mgmt(struct hn_softc *); static void hn_resume(struct hn_softc *); static void hn_resume_data(struct hn_softc *); static void hn_resume_mgmt(struct hn_softc *); static void hn_suspend_mgmt_taskfunc(void *, int); static void hn_chan_drain(struct hn_softc *, struct vmbus_channel *); static void hn_disable_rx(struct hn_softc *); static void hn_drain_rxtx(struct hn_softc *, int); static void hn_polling(struct hn_softc *, u_int); static void hn_chan_polling(struct vmbus_channel *, u_int); static void hn_mtu_change_fixup(struct hn_softc *); static void hn_update_link_status(struct hn_softc *); static void hn_change_network(struct hn_softc *); static void hn_link_taskfunc(void *, int); static void hn_netchg_init_taskfunc(void *, int); static void hn_netchg_status_taskfunc(void *, int); static void hn_link_status(struct hn_softc *); static int hn_create_rx_data(struct hn_softc *, int); static void hn_destroy_rx_data(struct hn_softc *); static int hn_check_iplen(const struct mbuf *, int); static void hn_rxpkt_proto(const struct mbuf *, int *, int *); static int hn_set_rxfilter(struct hn_softc *, uint32_t); static int hn_rxfilter_config(struct hn_softc *); static int hn_rss_reconfig(struct hn_softc *); static void hn_rss_ind_fixup(struct hn_softc *); static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); static int hn_rxpkt(struct hn_rx_ring *, const void *, int, const struct hn_rxinfo *); static uint32_t hn_rss_type_fromndis(uint32_t); static uint32_t hn_rss_type_tondis(uint32_t); static int hn_tx_ring_create(struct hn_softc *, int); static void hn_tx_ring_destroy(struct hn_tx_ring *); static int hn_create_tx_data(struct hn_softc *, int); static void hn_fixup_tx_data(struct hn_softc *); static void hn_fixup_rx_data(struct hn_softc *); static void hn_destroy_tx_data(struct hn_softc *); static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); static void hn_txdesc_gc(struct hn_tx_ring *, struct hn_txdesc *); static int hn_encap(struct ifnet *, struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, struct hn_txdesc *); static void hn_set_chim_size(struct hn_softc *, int); static void hn_set_tso_maxsize(struct hn_softc *, int, int); static bool hn_tx_ring_pending(struct hn_tx_ring *); static void hn_tx_ring_qflush(struct hn_tx_ring *); static void hn_resume_tx(struct hn_softc *, int); static void hn_set_txagg(struct hn_softc *); static void *hn_try_txagg(struct ifnet *, struct hn_tx_ring *, struct hn_txdesc *, int); static int hn_get_txswq_depth(const struct hn_tx_ring *); static void hn_txpkt_done(struct hn_nvs_sendctx *, struct hn_softc *, struct vmbus_channel *, const void *, int); static int hn_txpkt_sglist(struct hn_tx_ring *, struct hn_txdesc *); static int hn_txpkt_chim(struct hn_tx_ring *, struct hn_txdesc *); static int hn_xmit(struct hn_tx_ring *, int); static void hn_xmit_taskfunc(void *, int); static void hn_xmit_txeof(struct hn_tx_ring *); static void hn_xmit_txeof_taskfunc(void *, int); #ifdef HN_IFSTART_SUPPORT static int hn_start_locked(struct hn_tx_ring *, int); static void hn_start_taskfunc(void *, int); static void hn_start_txeof(struct hn_tx_ring *); static void hn_start_txeof_taskfunc(void *, int); #endif SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V network interface"); /* Trust tcp segements verification on host side. */ static int hn_trust_hosttcp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, &hn_trust_hosttcp, 0, "Trust tcp segement verification on host side, " "when csum info is missing (global setting)"); /* Trust udp datagrams verification on host side. */ static int hn_trust_hostudp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, &hn_trust_hostudp, 0, "Trust udp datagram verification on host side, " "when csum info is missing (global setting)"); /* Trust ip packets verification on host side. */ static int hn_trust_hostip = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, &hn_trust_hostip, 0, "Trust ip packet verification on host side, " "when csum info is missing (global setting)"); /* * Offload UDP/IPv4 checksum. */ static int hn_enable_udp4cs = 1; SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); /* * Offload UDP/IPv6 checksum. */ static int hn_enable_udp6cs = 1; SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); /* Stats. */ static counter_u64_t hn_udpcs_fixup; SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, &hn_udpcs_fixup, "# of UDP checksum fixup"); /* * See hn_set_hlen(). * * This value is for Azure. For Hyper-V, set this above * 65536 to disable UDP datagram checksum fixup. */ static int hn_udpcs_fixup_mtu = 1420; SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); /* Limit TSO burst size */ static int hn_tso_maxlen = IP_MAXPACKET; SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, &hn_tso_maxlen, 0, "TSO burst limit"); /* Limit chimney send size */ static int hn_tx_chimney_size = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, &hn_tx_chimney_size, 0, "Chimney send packet size limit"); /* Limit the size of packet for direct transmission */ static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); /* # of LRO entries per RX ring */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, &hn_lro_entry_count, 0, "LRO entry count"); #endif #endif static int hn_tx_taskq_cnt = 1; SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); #define HN_TX_TASKQ_M_INDEP 0 #define HN_TX_TASKQ_M_GLOBAL 1 #define HN_TX_TASKQ_M_EVTTQ 2 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, &hn_tx_taskq_mode, 0, "TX taskqueue modes: " "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); #ifndef HN_USE_TXDESC_BUFRING static int hn_use_txdesc_bufring = 0; #else static int hn_use_txdesc_bufring = 1; #endif SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); #ifdef HN_IFSTART_SUPPORT /* Use ifnet.if_start instead of ifnet.if_transmit */ static int hn_use_if_start = 0; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); #endif /* # of channels to use */ static int hn_chan_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hn_chan_cnt, 0, "# of channels to use; each channel has one RX ring and one TX ring"); /* # of transmit rings to use */ static int hn_tx_ring_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, &hn_tx_ring_cnt, 0, "# of TX rings to use"); /* Software TX ring deptch */ static int hn_tx_swq_depth = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ #if __FreeBSD_version >= 1100095 static u_int hn_lro_mbufq_depth = 0; SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); #endif /* Packet transmission aggregation size limit */ static int hn_tx_agg_size = -1; SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); /* Packet transmission aggregation count limit */ static int hn_tx_agg_pkts = -1; SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); /* VF list */ SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 0, 0, hn_vflist_sysctl, "A", "VF list"); /* VF mapping */ SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); /* Transparent VF */ static int hn_xpnt_vf = 1; SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, &hn_xpnt_vf, 0, "Transparent VF mod"); /* Accurate BPF support for Transparent VF */ static int hn_xpnt_vf_accbpf = 0; SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); /* Extra wait for transparent VF attach routing; unit seconds. */ static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, &hn_xpnt_vf_attwait, 0, "Extra wait for transparent VF attach routing; unit: seconds"); static u_int hn_cpu_index; /* next CPU for channel */ static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ static struct rmlock hn_vfmap_lock; static int hn_vfmap_size; static struct ifnet **hn_vfmap; #ifndef RSS static const uint8_t hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; #endif /* !RSS */ static const struct hyperv_guid hn_guid = { .hv_guid = { 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } }; static device_method_t hn_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hn_probe), DEVMETHOD(device_attach, hn_attach), DEVMETHOD(device_detach, hn_detach), DEVMETHOD(device_shutdown, hn_shutdown), DEVMETHOD_END }; static driver_t hn_driver = { "hn", hn_methods, sizeof(struct hn_softc) }; static devclass_t hn_devclass; DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); #if __FreeBSD_version >= 1100099 static void hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) { int i; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; } #endif static int hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && txd->chim_size == 0, ("invalid rndis sglist txd")); return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); } static int hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) { struct hn_nvs_rndis rndis; KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && txd->chim_size > 0, ("invalid rndis chim txd")); rndis.nvs_type = HN_NVS_TYPE_RNDIS; rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; rndis.nvs_chim_idx = txd->chim_index; rndis.nvs_chim_sz = txd->chim_size; return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, &rndis, sizeof(rndis), &txd->send_ctx)); } static __inline uint32_t hn_chim_alloc(struct hn_softc *sc) { int i, bmap_cnt = sc->hn_chim_bmap_cnt; u_long *bmap = sc->hn_chim_bmap; uint32_t ret = HN_NVS_CHIM_IDX_INVALID; for (i = 0; i < bmap_cnt; ++i) { int idx; idx = ffsl(~bmap[i]); if (idx == 0) continue; --idx; /* ffsl is 1-based */ KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, ("invalid i %d and idx %d", i, idx)); if (atomic_testandset_long(&bmap[i], idx)) continue; ret = i * LONG_BIT + idx; break; } return (ret); } static __inline void hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) { u_long mask; uint32_t idx; idx = chim_idx / LONG_BIT; KASSERT(idx < sc->hn_chim_bmap_cnt, ("invalid chimney index 0x%x", chim_idx)); mask = 1UL << (chim_idx % LONG_BIT); KASSERT(sc->hn_chim_bmap[idx] & mask, ("index bitmap 0x%lx, chimney index %u, " "bitmap idx %d, bitmask 0x%lx", sc->hn_chim_bmap[idx], chim_idx, idx, mask)); atomic_clear_long(&sc->hn_chim_bmap[idx], mask); } #if defined(INET6) || defined(INET) #define PULLUP_HDR(m, len) \ do { \ if (__predict_false((m)->m_len < (len))) { \ (m) = m_pullup((m), (len)); \ if ((m) == NULL) \ return (NULL); \ } \ } while (0) /* * NOTE: If this function failed, the m_head would be freed. */ static __inline struct mbuf * hn_tso_fixup(struct mbuf *m_head) { struct ether_vlan_header *evl; struct tcphdr *th; int ehlen; KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); PULLUP_HDR(m_head, sizeof(*evl)); evl = mtod(m_head, struct ether_vlan_header *); if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ehlen = ETHER_HDR_LEN; m_head->m_pkthdr.l2hlen = ehlen; #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { struct ip *ip; int iphlen; PULLUP_HDR(m_head, ehlen + sizeof(*ip)); ip = mtodo(m_head, ehlen); iphlen = ip->ip_hl << 2; m_head->m_pkthdr.l3hlen = iphlen; PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); th = mtodo(m_head, ehlen + iphlen); ip->ip_len = 0; ip->ip_sum = 0; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { struct ip6_hdr *ip6; PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); ip6 = mtodo(m_head, ehlen); if (ip6->ip6_nxt != IPPROTO_TCP) { m_freem(m_head); return (NULL); } m_head->m_pkthdr.l3hlen = sizeof(*ip6); PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); th = mtodo(m_head, ehlen + sizeof(*ip6)); ip6->ip6_plen = 0; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); } #endif return (m_head); } /* * NOTE: If this function failed, the m_head would be freed. */ static __inline struct mbuf * hn_set_hlen(struct mbuf *m_head) { const struct ether_vlan_header *evl; int ehlen; PULLUP_HDR(m_head, sizeof(*evl)); evl = mtod(m_head, const struct ether_vlan_header *); if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ehlen = ETHER_HDR_LEN; m_head->m_pkthdr.l2hlen = ehlen; #ifdef INET if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { const struct ip *ip; int iphlen; PULLUP_HDR(m_head, ehlen + sizeof(*ip)); ip = mtodo(m_head, ehlen); iphlen = ip->ip_hl << 2; m_head->m_pkthdr.l3hlen = iphlen; /* * UDP checksum offload does not work in Azure, if the * following conditions meet: * - sizeof(IP hdr + UDP hdr + payload) > 1420. * - IP_DF is not set in the IP hdr. * * Fallback to software checksum for these UDP datagrams. */ if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && (ntohs(ip->ip_off) & IP_DF) == 0) { uint16_t off = ehlen + iphlen; counter_u64_add(hn_udpcs_fixup, 1); PULLUP_HDR(m_head, off + sizeof(struct udphdr)); *(uint16_t *)(m_head->m_data + off + m_head->m_pkthdr.csum_data) = in_cksum_skip( m_head, m_head->m_pkthdr.len, off); m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; } } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { const struct ip6_hdr *ip6; PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); ip6 = mtodo(m_head, ehlen); if (ip6->ip6_nxt != IPPROTO_TCP && ip6->ip6_nxt != IPPROTO_UDP) { m_freem(m_head); return (NULL); } m_head->m_pkthdr.l3hlen = sizeof(*ip6); } #endif return (m_head); } /* * NOTE: If this function failed, the m_head would be freed. */ static __inline struct mbuf * hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) { const struct tcphdr *th; int ehlen, iphlen; *tcpsyn = 0; ehlen = m_head->m_pkthdr.l2hlen; iphlen = m_head->m_pkthdr.l3hlen; PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); th = mtodo(m_head, ehlen + iphlen); if (th->th_flags & TH_SYN) *tcpsyn = 1; return (m_head); } #undef PULLUP_HDR #endif /* INET6 || INET */ static int hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) { int error = 0; HN_LOCK_ASSERT(sc); if (sc->hn_rx_filter != filter) { error = hn_rndis_set_rxfilter(sc, filter); if (!error) sc->hn_rx_filter = filter; } return (error); } static int hn_rxfilter_config(struct hn_softc *sc) { struct ifnet *ifp = sc->hn_ifp; uint32_t filter; HN_LOCK_ASSERT(sc); /* * If the non-transparent mode VF is activated, we don't know how * its RX filter is configured, so stick the synthetic device in * the promiscous mode. */ if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { filter = NDIS_PACKET_TYPE_PROMISCUOUS; } else { filter = NDIS_PACKET_TYPE_DIRECTED; if (ifp->if_flags & IFF_BROADCAST) filter |= NDIS_PACKET_TYPE_BROADCAST; /* TODO: support multicast list */ if ((ifp->if_flags & IFF_ALLMULTI) || !CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; } return (hn_set_rxfilter(sc, filter)); } static void hn_set_txagg(struct hn_softc *sc) { uint32_t size, pkts; int i; /* * Setup aggregation size. */ if (sc->hn_agg_size < 0) size = UINT32_MAX; else size = sc->hn_agg_size; if (sc->hn_rndis_agg_size < size) size = sc->hn_rndis_agg_size; /* NOTE: We only aggregate packets using chimney sending buffers. */ if (size > (uint32_t)sc->hn_chim_szmax) size = sc->hn_chim_szmax; if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { /* Disable */ size = 0; pkts = 0; goto done; } /* NOTE: Type of the per TX ring setting is 'int'. */ if (size > INT_MAX) size = INT_MAX; /* * Setup aggregation packet count. */ if (sc->hn_agg_pkts < 0) pkts = UINT32_MAX; else pkts = sc->hn_agg_pkts; if (sc->hn_rndis_agg_pkts < pkts) pkts = sc->hn_rndis_agg_pkts; if (pkts <= 1) { /* Disable */ size = 0; pkts = 0; goto done; } /* NOTE: Type of the per TX ring setting is 'short'. */ if (pkts > SHRT_MAX) pkts = SHRT_MAX; done: /* NOTE: Type of the per TX ring setting is 'short'. */ if (sc->hn_rndis_agg_align > SHRT_MAX) { /* Disable */ size = 0; pkts = 0; } if (bootverbose) { if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", size, pkts, sc->hn_rndis_agg_align); } for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; mtx_lock(&txr->hn_tx_lock); txr->hn_agg_szmax = size; txr->hn_agg_pktmax = pkts; txr->hn_agg_align = sc->hn_rndis_agg_align; mtx_unlock(&txr->hn_tx_lock); } } static int hn_get_txswq_depth(const struct hn_tx_ring *txr) { KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); if (hn_tx_swq_depth < txr->hn_txdesc_cnt) return txr->hn_txdesc_cnt; return hn_tx_swq_depth; } static int hn_rss_reconfig(struct hn_softc *sc) { int error; HN_LOCK_ASSERT(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) return (ENXIO); /* * Disable RSS first. * * NOTE: * Direct reconfiguration by setting the UNCHG flags does * _not_ work properly. */ if (bootverbose) if_printf(sc->hn_ifp, "disable RSS\n"); error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); if (error) { if_printf(sc->hn_ifp, "RSS disable failed\n"); return (error); } /* * Reenable the RSS w/ the updated RSS key or indirect * table. */ if (bootverbose) if_printf(sc->hn_ifp, "reconfig RSS\n"); error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); if (error) { if_printf(sc->hn_ifp, "RSS reconfig failed\n"); return (error); } return (0); } static void hn_rss_ind_fixup(struct hn_softc *sc) { struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; int i, nchan; nchan = sc->hn_rx_ring_inuse; KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); /* * Check indirect table to make sure that all channels in it * can be used. */ for (i = 0; i < NDIS_HASH_INDCNT; ++i) { if (rss->rss_ind[i] >= nchan) { if_printf(sc->hn_ifp, "RSS indirect table %d fixup: %u -> %d\n", i, rss->rss_ind[i], nchan - 1); rss->rss_ind[i] = nchan - 1; } } } static int hn_ifmedia_upd(struct ifnet *ifp __unused) { return EOPNOTSUPP; } static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct hn_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } static void hn_rxvf_set_task(void *xarg, int pending __unused) { struct hn_rxvf_setarg *arg = xarg; arg->rxr->hn_rxvf_ifp = arg->vf_ifp; } static void hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) { struct hn_rx_ring *rxr; struct hn_rxvf_setarg arg; struct task task; int i; HN_LOCK_ASSERT(sc); TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; if (i < sc->hn_rx_ring_inuse) { arg.rxr = rxr; arg.vf_ifp = vf_ifp; vmbus_chan_run_task(rxr->hn_chan, &task); } else { rxr->hn_rxvf_ifp = vf_ifp; } } } static bool hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) { const struct ifnet *hn_ifp; hn_ifp = sc->hn_ifp; if (ifp == hn_ifp) return (false); if (ifp->if_alloctype != IFT_ETHER) return (false); /* Ignore lagg/vlan interfaces */ if (strcmp(ifp->if_dname, "lagg") == 0 || strcmp(ifp->if_dname, "vlan") == 0) return (false); /* * During detach events ifp->if_addr might be NULL. * Make sure the bcmp() below doesn't panic on that: */ if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL) return (false); if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) return (false); return (true); } static void hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) { struct ifnet *hn_ifp; HN_LOCK(sc); if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) goto out; if (!hn_ismyvf(sc, ifp)) goto out; hn_ifp = sc->hn_ifp; if (rxvf) { if (sc->hn_flags & HN_FLAG_RXVF) goto out; sc->hn_flags |= HN_FLAG_RXVF; hn_rxfilter_config(sc); } else { if (!(sc->hn_flags & HN_FLAG_RXVF)) goto out; sc->hn_flags &= ~HN_FLAG_RXVF; if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) hn_rxfilter_config(sc); else hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); } hn_nvs_set_datapath(sc, rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); hn_rxvf_set(sc, rxvf ? ifp : NULL); if (rxvf) { hn_vf_rss_fixup(sc, true); hn_suspend_mgmt(sc); sc->hn_link_flags &= ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); if_link_state_change(hn_ifp, LINK_STATE_DOWN); } else { hn_vf_rss_restore(sc); hn_resume_mgmt(sc); } devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, rxvf ? "VF_UP" : "VF_DOWN", NULL); if (bootverbose) { if_printf(hn_ifp, "datapath is switched %s %s\n", rxvf ? "to" : "from", ifp->if_xname); } out: HN_UNLOCK(sc); } static void hn_ifnet_event(void *arg, struct ifnet *ifp, int event) { if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) return; hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); } static void hn_ifaddr_event(void *arg, struct ifnet *ifp) { hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); } static int hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) { struct ifnet *ifp, *vf_ifp; uint64_t tmp; int error; HN_LOCK_ASSERT(sc); ifp = sc->hn_ifp; vf_ifp = sc->hn_vf_ifp; /* * Fix up requested capabilities w/ supported capabilities, * since the supported capabilities could have been changed. */ ifr->ifr_reqcap &= ifp->if_capabilities; /* Pass SIOCSIFCAP to VF. */ error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); /* * NOTE: * The error will be propagated to the callers, however, it * is _not_ useful here. */ /* * Merge VF's enabled capabilities. */ ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= tmp; else ifp->if_hwassist &= ~tmp; tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= tmp; else ifp->if_hwassist &= ~tmp; tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= tmp; else ifp->if_hwassist &= ~tmp; tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= tmp; else ifp->if_hwassist &= ~tmp; return (error); } static int hn_xpnt_vf_iocsetflags(struct hn_softc *sc) { struct ifnet *vf_ifp; struct ifreq ifr; HN_LOCK_ASSERT(sc); vf_ifp = sc->hn_vf_ifp; memset(&ifr, 0, sizeof(ifr)); strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); ifr.ifr_flags = vf_ifp->if_flags & 0xffff; ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); } static void hn_xpnt_vf_saveifflags(struct hn_softc *sc) { struct ifnet *ifp = sc->hn_ifp; int allmulti = 0; HN_LOCK_ASSERT(sc); /* XXX vlan(4) style mcast addr maintenance */ if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) allmulti = IFF_ALLMULTI; /* Always set the VF's if_flags */ sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; } static void hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) { struct rm_priotracker pt; struct ifnet *hn_ifp = NULL; struct mbuf *mn; /* * XXX racy, if hn(4) ever detached. */ rm_rlock(&hn_vfmap_lock, &pt); if (vf_ifp->if_index < hn_vfmap_size) hn_ifp = hn_vfmap[vf_ifp->if_index]; rm_runlock(&hn_vfmap_lock, &pt); if (hn_ifp != NULL) { for (mn = m; mn != NULL; mn = mn->m_nextpkt) { /* * Allow tapping on the VF. */ ETHER_BPF_MTAP(vf_ifp, mn); /* * Update VF stats. */ if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, mn->m_pkthdr.len); } /* * XXX IFCOUNTER_IMCAST * This stat updating is kinda invasive, since it * requires two checks on the mbuf: the length check * and the ethernet header check. As of this write, * all multicast packets go directly to hn(4), which * makes imcast stat updating in the VF a try in vian. */ /* * Fix up rcvif and increase hn(4)'s ipackets. */ mn->m_pkthdr.rcvif = hn_ifp; if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); } /* * Go through hn(4)'s if_input. */ hn_ifp->if_input(hn_ifp, m); } else { /* * In the middle of the transition; free this * mbuf chain. */ while (m != NULL) { mn = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); m = mn; } } } static void hn_mtu_change_fixup(struct hn_softc *sc) { struct ifnet *ifp; HN_LOCK_ASSERT(sc); ifp = sc->hn_ifp; hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); #endif } static uint32_t hn_rss_type_fromndis(uint32_t rss_hash) { uint32_t types = 0; if (rss_hash & NDIS_HASH_IPV4) types |= RSS_TYPE_IPV4; if (rss_hash & NDIS_HASH_TCP_IPV4) types |= RSS_TYPE_TCP_IPV4; if (rss_hash & NDIS_HASH_IPV6) types |= RSS_TYPE_IPV6; if (rss_hash & NDIS_HASH_IPV6_EX) types |= RSS_TYPE_IPV6_EX; if (rss_hash & NDIS_HASH_TCP_IPV6) types |= RSS_TYPE_TCP_IPV6; if (rss_hash & NDIS_HASH_TCP_IPV6_EX) types |= RSS_TYPE_TCP_IPV6_EX; if (rss_hash & NDIS_HASH_UDP_IPV4_X) types |= RSS_TYPE_UDP_IPV4; return (types); } static uint32_t hn_rss_type_tondis(uint32_t types) { uint32_t rss_hash = 0; KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, ("UDP6 and UDP6EX are not supported")); if (types & RSS_TYPE_IPV4) rss_hash |= NDIS_HASH_IPV4; if (types & RSS_TYPE_TCP_IPV4) rss_hash |= NDIS_HASH_TCP_IPV4; if (types & RSS_TYPE_IPV6) rss_hash |= NDIS_HASH_IPV6; if (types & RSS_TYPE_IPV6_EX) rss_hash |= NDIS_HASH_IPV6_EX; if (types & RSS_TYPE_TCP_IPV6) rss_hash |= NDIS_HASH_TCP_IPV6; if (types & RSS_TYPE_TCP_IPV6_EX) rss_hash |= NDIS_HASH_TCP_IPV6_EX; if (types & RSS_TYPE_UDP_IPV4) rss_hash |= NDIS_HASH_UDP_IPV4_X; return (rss_hash); } static void hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) { int i; HN_LOCK_ASSERT(sc); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; } static void hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) { struct ifnet *ifp, *vf_ifp; struct ifrsshash ifrh; struct ifrsskey ifrk; int error; uint32_t my_types, diff_types, mbuf_types = 0; HN_LOCK_ASSERT(sc); KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); if (sc->hn_rx_ring_inuse == 1) { /* No RSS on synthetic parts; done. */ return; } if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { /* Synthetic parts do not support Toeplitz; done. */ return; } ifp = sc->hn_ifp; vf_ifp = sc->hn_vf_ifp; /* * Extract VF's RSS key. Only 40 bytes key for Toeplitz is * supported. */ memset(&ifrk, 0, sizeof(ifrk)); strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); if (error) { if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n", vf_ifp->if_xname, error); goto done; } if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { if_printf(ifp, "%s RSS function %u is not Toeplitz\n", vf_ifp->if_xname, ifrk.ifrk_func); goto done; } if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", vf_ifp->if_xname, ifrk.ifrk_keylen); goto done; } /* * Extract VF's RSS hash. Only Toeplitz is supported. */ memset(&ifrh, 0, sizeof(ifrh)); strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); if (error) { if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", vf_ifp->if_xname, error); goto done; } if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { if_printf(ifp, "%s RSS function %u is not Toeplitz\n", vf_ifp->if_xname, ifrh.ifrh_func); goto done; } my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); if ((ifrh.ifrh_types & my_types) == 0) { /* This disables RSS; ignore it then */ if_printf(ifp, "%s intersection of RSS types failed. " "VF %#x, mine %#x\n", vf_ifp->if_xname, ifrh.ifrh_types, my_types); goto done; } diff_types = my_types ^ ifrh.ifrh_types; my_types &= ifrh.ifrh_types; mbuf_types = my_types; /* * Detect RSS hash value/type confliction. * * NOTE: * We don't disable the hash type, but stop delivery the hash * value/type through mbufs on RX path. * * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple * hash is delivered with type of TCP_IPV4. This means if * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at * least to hn_mbuf_hash. However, given that _all_ of the * NICs implement TCP_IPV4, this will _not_ impose any issues * here. */ if ((my_types & RSS_TYPE_IPV4) && (diff_types & ifrh.ifrh_types & (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { /* Conflict; disable IPV4 hash type/value delivery. */ if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); mbuf_types &= ~RSS_TYPE_IPV4; } if ((my_types & RSS_TYPE_IPV6) && (diff_types & ifrh.ifrh_types & (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | RSS_TYPE_IPV6_EX))) { /* Conflict; disable IPV6 hash type/value delivery. */ if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); mbuf_types &= ~RSS_TYPE_IPV6; } if ((my_types & RSS_TYPE_IPV6_EX) && (diff_types & ifrh.ifrh_types & (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | RSS_TYPE_IPV6))) { /* Conflict; disable IPV6_EX hash type/value delivery. */ if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); mbuf_types &= ~RSS_TYPE_IPV6_EX; } if ((my_types & RSS_TYPE_TCP_IPV6) && (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { /* Conflict; disable TCP_IPV6 hash type/value delivery. */ if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); mbuf_types &= ~RSS_TYPE_TCP_IPV6; } if ((my_types & RSS_TYPE_TCP_IPV6_EX) && (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; } if ((my_types & RSS_TYPE_UDP_IPV6) && (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { /* Conflict; disable UDP_IPV6 hash type/value delivery. */ if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); mbuf_types &= ~RSS_TYPE_UDP_IPV6; } if ((my_types & RSS_TYPE_UDP_IPV6_EX) && (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; } /* * Indirect table does not matter. */ sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | hn_rss_type_tondis(my_types); memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); sc->hn_flags |= HN_FLAG_HAS_RSSKEY; if (reconf) { error = hn_rss_reconfig(sc); if (error) { /* XXX roll-back? */ if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); /* XXX keep going. */ } } done: /* Hash deliverability for mbufs. */ hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); } static void hn_vf_rss_restore(struct hn_softc *sc) { HN_LOCK_ASSERT(sc); KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); if (sc->hn_rx_ring_inuse == 1) goto done; /* * Restore hash types. Key does _not_ matter. */ if (sc->hn_rss_hash != sc->hn_rss_hcap) { int error; sc->hn_rss_hash = sc->hn_rss_hcap; error = hn_rss_reconfig(sc); if (error) { if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", error); /* XXX keep going. */ } } done: /* Hash deliverability for mbufs. */ hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); } static void hn_xpnt_vf_setready(struct hn_softc *sc) { struct ifnet *ifp, *vf_ifp; struct ifreq ifr; HN_LOCK_ASSERT(sc); ifp = sc->hn_ifp; vf_ifp = sc->hn_vf_ifp; /* * Mark the VF ready. */ sc->hn_vf_rdytick = 0; /* * Save information for restoration. */ sc->hn_saved_caps = ifp->if_capabilities; sc->hn_saved_tsomax = ifp->if_hw_tsomax; sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; /* * Intersect supported/enabled capabilities. * * NOTE: * if_hwassist is not changed here. */ ifp->if_capabilities &= vf_ifp->if_capabilities; ifp->if_capenable &= ifp->if_capabilities; /* * Fix TSO settings. */ if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; /* * Change VF's enabled capabilities. */ memset(&ifr, 0, sizeof(ifr)); strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); ifr.ifr_reqcap = ifp->if_capenable; hn_xpnt_vf_iocsetcaps(sc, &ifr); if (ifp->if_mtu != ETHERMTU) { int error; /* * Change VF's MTU. */ memset(&ifr, 0, sizeof(ifr)); strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); ifr.ifr_mtu = ifp->if_mtu; error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); if (error) { if_printf(ifp, "%s SIOCSIFMTU %u failed\n", vf_ifp->if_xname, ifp->if_mtu); if (ifp->if_mtu > ETHERMTU) { if_printf(ifp, "change MTU to %d\n", ETHERMTU); /* * XXX * No need to adjust the synthetic parts' MTU; * failure of the adjustment will cause us * infinite headache. */ ifp->if_mtu = ETHERMTU; hn_mtu_change_fixup(sc); } } } } static bool hn_xpnt_vf_isready(struct hn_softc *sc) { HN_LOCK_ASSERT(sc); if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) return (false); if (sc->hn_vf_rdytick == 0) return (true); if (sc->hn_vf_rdytick > ticks) return (false); /* Mark VF as ready. */ hn_xpnt_vf_setready(sc); return (true); } static void hn_xpnt_vf_setenable(struct hn_softc *sc) { int i; HN_LOCK_ASSERT(sc); /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ rm_wlock(&sc->hn_vf_lock); sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; rm_wunlock(&sc->hn_vf_lock); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; } static void hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) { int i; HN_LOCK_ASSERT(sc); /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ rm_wlock(&sc->hn_vf_lock); sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; if (clear_vf) sc->hn_vf_ifp = NULL; rm_wunlock(&sc->hn_vf_lock); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; } static void hn_xpnt_vf_init(struct hn_softc *sc) { int error; HN_LOCK_ASSERT(sc); KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); if (bootverbose) { if_printf(sc->hn_ifp, "try bringing up %s\n", sc->hn_vf_ifp->if_xname); } /* * Bring the VF up. */ hn_xpnt_vf_saveifflags(sc); sc->hn_vf_ifp->if_flags |= IFF_UP; error = hn_xpnt_vf_iocsetflags(sc); if (error) { if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", sc->hn_vf_ifp->if_xname, error); return; } /* * NOTE: * Datapath setting must happen _after_ bringing the VF up. */ hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); /* * NOTE: * Fixup RSS related bits _after_ the VF is brought up, since * many VFs generate RSS key during it's initialization. */ hn_vf_rss_fixup(sc, true); /* Mark transparent mode VF as enabled. */ hn_xpnt_vf_setenable(sc); } static void hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) { struct hn_softc *sc = xsc; HN_LOCK(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) goto done; if (sc->hn_vf_ifp == NULL) goto done; if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) goto done; if (sc->hn_vf_rdytick != 0) { /* Mark VF as ready. */ hn_xpnt_vf_setready(sc); } if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { /* * Delayed VF initialization. */ if (bootverbose) { if_printf(sc->hn_ifp, "delayed initialize %s\n", sc->hn_vf_ifp->if_xname); } hn_xpnt_vf_init(sc); } done: HN_UNLOCK(sc); } static void hn_ifnet_attevent(void *xsc, struct ifnet *ifp) { struct hn_softc *sc = xsc; HN_LOCK(sc); if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) goto done; if (!hn_ismyvf(sc, ifp)) goto done; if (sc->hn_vf_ifp != NULL) { if_printf(sc->hn_ifp, "%s was attached as VF\n", sc->hn_vf_ifp->if_xname); goto done; } if (hn_xpnt_vf && ifp->if_start != NULL) { /* * ifnet.if_start is _not_ supported by transparent * mode VF; mainly due to the IFF_DRV_OACTIVE flag. */ if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " "in transparent VF mode.\n", ifp->if_xname); goto done; } rm_wlock(&hn_vfmap_lock); if (ifp->if_index >= hn_vfmap_size) { struct ifnet **newmap; int newsize; newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, M_WAITOK | M_ZERO); memcpy(newmap, hn_vfmap, sizeof(struct ifnet *) * hn_vfmap_size); free(hn_vfmap, M_DEVBUF); hn_vfmap = newmap; hn_vfmap_size = newsize; } KASSERT(hn_vfmap[ifp->if_index] == NULL, ("%s: ifindex %d was mapped to %s", ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); hn_vfmap[ifp->if_index] = sc->hn_ifp; rm_wunlock(&hn_vfmap_lock); /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ rm_wlock(&sc->hn_vf_lock); KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); sc->hn_vf_ifp = ifp; rm_wunlock(&sc->hn_vf_lock); if (hn_xpnt_vf) { int wait_ticks; /* * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. * Save vf_ifp's current if_input for later restoration. */ sc->hn_vf_input = ifp->if_input; ifp->if_input = hn_xpnt_vf_input; /* * Stop link status management; use the VF's. */ hn_suspend_mgmt(sc); /* * Give VF sometime to complete its attach routing. */ wait_ticks = hn_xpnt_vf_attwait * hz; sc->hn_vf_rdytick = ticks + wait_ticks; taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, wait_ticks); } done: HN_UNLOCK(sc); } static void hn_ifnet_detevent(void *xsc, struct ifnet *ifp) { struct hn_softc *sc = xsc; HN_LOCK(sc); if (sc->hn_vf_ifp == NULL) goto done; if (!hn_ismyvf(sc, ifp)) goto done; if (hn_xpnt_vf) { /* * Make sure that the delayed initialization is not running. * * NOTE: * - This lock _must_ be released, since the hn_vf_init task * will try holding this lock. * - It is safe to release this lock here, since the * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. * * XXX racy, if hn(4) ever detached. */ HN_UNLOCK(sc); taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); HN_LOCK(sc); KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", sc->hn_ifp->if_xname)); ifp->if_input = sc->hn_vf_input; sc->hn_vf_input = NULL; if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); if (sc->hn_vf_rdytick == 0) { /* * The VF was ready; restore some settings. */ sc->hn_ifp->if_capabilities = sc->hn_saved_caps; /* * NOTE: * There is _no_ need to fixup if_capenable and * if_hwassist, since the if_capabilities before * restoration was an intersection of the VF's * if_capabilites and the synthetic device's * if_capabilites. */ sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; sc->hn_ifp->if_hw_tsomaxsegcount = sc->hn_saved_tsosegcnt; sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; } if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { /* * Restore RSS settings. */ hn_vf_rss_restore(sc); /* * Resume link status management, which was suspended * by hn_ifnet_attevent(). */ hn_resume_mgmt(sc); } } /* Mark transparent mode VF as disabled. */ hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); rm_wlock(&hn_vfmap_lock); KASSERT(ifp->if_index < hn_vfmap_size, ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); if (hn_vfmap[ifp->if_index] != NULL) { KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, ("%s: ifindex %d was mapped to %s", ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); hn_vfmap[ifp->if_index] = NULL; } rm_wunlock(&hn_vfmap_lock); done: HN_UNLOCK(sc); } static void hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) { struct hn_softc *sc = xsc; if (sc->hn_vf_ifp == ifp) if_link_state_change(sc->hn_ifp, link_state); } static int hn_probe(device_t dev) { if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { device_set_desc(dev, "Hyper-V Network Interface"); return BUS_PROBE_DEFAULT; } return ENXIO; } static int hn_attach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; uint8_t eaddr[ETHER_ADDR_LEN]; struct ifnet *ifp = NULL; int error, ring_cnt, tx_ring_cnt; uint32_t mtu; sc->hn_dev = dev; sc->hn_prichan = vmbus_get_channel(dev); HN_LOCK_INIT(sc); rm_init(&sc->hn_vf_lock, "hnvf"); if (hn_xpnt_vf && hn_xpnt_vf_accbpf) sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; /* * Initialize these tunables once. */ sc->hn_agg_size = hn_tx_agg_size; sc->hn_agg_pkts = hn_tx_agg_pkts; /* * Setup taskqueue for transmission. */ if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { int i; sc->hn_tx_taskqs = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), M_DEVBUF, M_WAITOK); for (i = 0; i < hn_tx_taskq_cnt; ++i) { sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_tx_taskqs[i]); taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, "%s tx%d", device_get_nameunit(dev), i); } } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { sc->hn_tx_taskqs = hn_tx_taskque; } /* * Setup taskqueue for mangement tasks, e.g. link status. */ sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", device_get_nameunit(dev)); TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, hn_netchg_status_taskfunc, sc); if (hn_xpnt_vf) { /* * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. */ sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_vf_taskq); taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", device_get_nameunit(dev)); TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, hn_xpnt_vf_init_taskfunc, sc); } /* * Allocate ifnet and setup its name earlier, so that if_printf * can be used by functions, which will be called after * ether_ifattach(). */ ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Initialize ifmedia earlier so that it can be unconditionally * destroyed, if error happened later on. */ ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); /* * Figure out the # of RX rings (ring_cnt) and the # of TX rings * to use (tx_ring_cnt). * * NOTE: * The # of RX rings to use is same as the # of channels to use. */ ring_cnt = hn_chan_cnt; if (ring_cnt <= 0) { /* Default */ ring_cnt = mp_ncpus; if (ring_cnt > HN_RING_CNT_DEF_MAX) ring_cnt = HN_RING_CNT_DEF_MAX; } else if (ring_cnt > mp_ncpus) { ring_cnt = mp_ncpus; } #ifdef RSS if (ring_cnt > rss_getnumbuckets()) ring_cnt = rss_getnumbuckets(); #endif tx_ring_cnt = hn_tx_ring_cnt; if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) tx_ring_cnt = ring_cnt; #ifdef HN_IFSTART_SUPPORT if (hn_use_if_start) { /* ifnet.if_start only needs one TX ring. */ tx_ring_cnt = 1; } #endif /* * Set the leader CPU for channels. */ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; /* * Create enough TX/RX rings, even if only limited number of * channels can be allocated. */ error = hn_create_tx_data(sc, tx_ring_cnt); if (error) goto failed; error = hn_create_rx_data(sc, ring_cnt); if (error) goto failed; /* * Create transaction context for NVS and RNDIS transactions. */ sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); if (sc->hn_xact == NULL) { error = ENXIO; goto failed; } /* * Install orphan handler for the revocation of this device's * primary channel. * * NOTE: * The processing order is critical here: * Install the orphan handler, _before_ testing whether this * device's primary channel has been revoked or not. */ vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); if (vmbus_chan_is_revoked(sc->hn_prichan)) { error = ENXIO; goto failed; } /* * Attach the synthetic parts, i.e. NVS and RNDIS. */ error = hn_synth_attach(sc, ETHERMTU); if (error) goto failed; error = hn_rndis_get_eaddr(sc, eaddr); if (error) goto failed; error = hn_rndis_get_mtu(sc, &mtu); if (error) mtu = ETHERMTU; else if (bootverbose) device_printf(dev, "RNDIS mtu %u\n", mtu); #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring_inuse > 1) { /* * Reduce TCP segment aggregation limit for multiple * RX rings to increase ACK timeliness. */ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); } #endif /* * Fixup TX/RX stuffs after synthetic parts are attached. */ hn_fixup_tx_data(sc); hn_fixup_rx_data(sc); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, &sc->hn_nvs_ver, 0, "NVS version"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_ndis_version_sysctl, "A", "NDIS version"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_caps_sysctl, "A", "capabilities"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_hwassist_sysctl, "A", "hwassist"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, "max # of TSO segments"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, "max size of TSO segment"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rxfilter_sysctl, "A", "rxfilter"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rss_hash_sysctl, "A", "RSS hash"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); #ifndef RSS /* * Don't allow RSS key/indirect table changes, if RSS is defined. */ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_rss_key_sysctl, "IU", "RSS key"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_rss_ind_sysctl, "IU", "RSS indirect table"); #endif SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, "RNDIS offered packet transmission aggregation size limit"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, "RNDIS offered packet transmission aggregation count limit"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, "RNDIS packet transmission aggregation alignment"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_txagg_size_sysctl, "I", "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_txagg_pkts_sysctl, "I", "Packet transmission aggregation packets, " "0 -- disable, -1 -- auto"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_polling_sysctl, "I", "Polling frequency: [100,1000000], 0 disable polling"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_vf_sysctl, "A", "Virtual Function's name"); if (!hn_xpnt_vf) { SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rxvf_sysctl, "A", "activated Virtual Function's name"); } else { SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_xpnt_vf_enabled_sysctl, "I", "Transparent VF enabled"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_xpnt_vf_accbpf_sysctl, "I", "Accurate BPF for transparent VF"); } /* * Setup the ifmedia, which has been initialized earlier. */ ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); /* XXX ifmedia_set really should do this for us */ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; /* * Setup the ifnet for this interface. */ ifp->if_baudrate = IF_Gbps(10); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSEPOCH; ifp->if_ioctl = hn_ioctl; ifp->if_init = hn_init; #ifdef HN_IFSTART_SUPPORT if (hn_use_if_start) { int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); ifp->if_snd.ifq_drv_maxlen = qdepth - 1; IFQ_SET_READY(&ifp->if_snd); } else #endif { ifp->if_transmit = hn_transmit; ifp->if_qflush = hn_xmit_qflush; } ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; #ifdef foo /* We can't diff IPv6 packets from IPv4 packets on RX path. */ ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; #endif if (sc->hn_caps & HN_CAP_VLAN) { /* XXX not sure about VLAN_MTU. */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; } ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; if (ifp->if_hwassist & HN_CSUM_IP_MASK) ifp->if_capabilities |= IFCAP_TXCSUM; if (ifp->if_hwassist & HN_CSUM_IP6_MASK) ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; if (sc->hn_caps & HN_CAP_TSO4) { ifp->if_capabilities |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_IP_TSO; } if (sc->hn_caps & HN_CAP_TSO6) { ifp->if_capabilities |= IFCAP_TSO6; ifp->if_hwassist |= CSUM_IP6_TSO; } /* Enable all available capabilities by default. */ ifp->if_capenable = ifp->if_capabilities; /* * Disable IPv6 TSO and TXCSUM by default, they still can * be enabled through SIOCSIFCAP. */ ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { /* * Lock hn_set_tso_maxsize() to simplify its * internal logic. */ HN_LOCK(sc); hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); HN_UNLOCK(sc); ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; } ether_ifattach(ifp, eaddr); if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { if_printf(ifp, "TSO segcnt %u segsz %u\n", ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } if (mtu < ETHERMTU) { if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); ifp->if_mtu = mtu; } /* Inform the upper layer about the long frame support. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* * Kick off link status check. */ sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; hn_update_link_status(sc); if (!hn_xpnt_vf) { sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); } else { sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); } /* * NOTE: * Subscribe ether_ifattach event, instead of ifnet_arrival event, * since interface's LLADDR is needed; interface LLADDR is not * available when ifnet_arrival event is triggered. */ sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); return (0); failed: if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) hn_synth_detach(sc); hn_detach(dev); return (error); } static int hn_detach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->hn_ifp, *vf_ifp; if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { /* * In case that the vmbus missed the orphan handler * installation. */ vmbus_xact_ctx_orphan(sc->hn_xact); } if (sc->hn_ifaddr_evthand != NULL) EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); if (sc->hn_ifnet_evthand != NULL) EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); if (sc->hn_ifnet_atthand != NULL) { EVENTHANDLER_DEREGISTER(ether_ifattach_event, sc->hn_ifnet_atthand); } if (sc->hn_ifnet_dethand != NULL) { EVENTHANDLER_DEREGISTER(ifnet_departure_event, sc->hn_ifnet_dethand); } if (sc->hn_ifnet_lnkhand != NULL) EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); vf_ifp = sc->hn_vf_ifp; __compiler_membar(); if (vf_ifp != NULL) hn_ifnet_detevent(sc, vf_ifp); if (device_is_attached(dev)) { HN_LOCK(sc); if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) hn_stop(sc, true); /* * NOTE: * hn_stop() only suspends data, so managment * stuffs have to be suspended manually here. */ hn_suspend_mgmt(sc); hn_synth_detach(sc); } HN_UNLOCK(sc); ether_ifdetach(ifp); } ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); hn_destroy_tx_data(sc); if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { int i; for (i = 0; i < hn_tx_taskq_cnt; ++i) taskqueue_free(sc->hn_tx_taskqs[i]); free(sc->hn_tx_taskqs, M_DEVBUF); } taskqueue_free(sc->hn_mgmt_taskq0); if (sc->hn_vf_taskq != NULL) taskqueue_free(sc->hn_vf_taskq); if (sc->hn_xact != NULL) { /* * Uninstall the orphan handler _before_ the xact is * destructed. */ vmbus_chan_unset_orphan(sc->hn_prichan); vmbus_xact_ctx_destroy(sc->hn_xact); } if_free(ifp); HN_LOCK_DESTROY(sc); rm_destroy(&sc->hn_vf_lock); return (0); } static int hn_shutdown(device_t dev) { return (0); } static void hn_link_status(struct hn_softc *sc) { uint32_t link_status; int error; error = hn_rndis_get_linkstatus(sc, &link_status); if (error) { /* XXX what to do? */ return; } if (link_status == NDIS_MEDIA_STATE_CONNECTED) sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; else sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; if_link_state_change(sc->hn_ifp, (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? LINK_STATE_UP : LINK_STATE_DOWN); } static void hn_link_taskfunc(void *xsc, int pending __unused) { struct hn_softc *sc = xsc; if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) return; hn_link_status(sc); } static void hn_netchg_init_taskfunc(void *xsc, int pending __unused) { struct hn_softc *sc = xsc; /* Prevent any link status checks from running. */ sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; /* * Fake up a [link down --> link up] state change; 5 seconds * delay is used, which closely simulates miibus reaction * upon link down event. */ sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 5 * hz); } static void hn_netchg_status_taskfunc(void *xsc, int pending __unused) { struct hn_softc *sc = xsc; /* Re-allow link status checks. */ sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; hn_link_status(sc); } static void hn_update_link_status(struct hn_softc *sc) { if (sc->hn_mgmt_taskq != NULL) taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); } static void hn_change_network(struct hn_softc *sc) { if (sc->hn_mgmt_taskq != NULL) taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); } static __inline int hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); if (m_new == NULL) return ENOBUFS; else *m_head = m = m_new; txr->hn_tx_collapsed++; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } return error; } static __inline int hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, ("put an onlist txd %#x", txd->flags)); KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, ("put an onagg txd %#x", txd->flags)); KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; if (!STAILQ_EMPTY(&txd->agg_list)) { struct hn_txdesc *tmp_txd; while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { int freed; KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), ("resursive aggregation on aggregated txdesc")); KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), ("not aggregated txdesc")); KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("aggregated txdesc uses dmamap")); KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("aggregated txdesc consumes " "chimney sending buffer")); KASSERT(tmp_txd->chim_size == 0, ("aggregated txdesc has non-zero " "chimney sending size")); STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; freed = hn_txdesc_put(txr, tmp_txd); KASSERT(freed, ("failed to free aggregated txdesc")); } } if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("chim txd uses dmamap")); hn_chim_free(txr->hn_sc, txd->chim_index); txd->chim_index = HN_NVS_CHIM_IDX_INVALID; txd->chim_size = 0; } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; } txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); #else /* HN_USE_TXDESC_BUFRING */ #ifdef HN_DEBUG atomic_add_int(&txr->hn_txdesc_avail, 1); #endif buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif /* !HN_USE_TXDESC_BUFRING */ return 1; } static __inline struct hn_txdesc * hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { KASSERT(txr->hn_txdesc_avail > 0, ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail--; SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); #else txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); #endif if (txd != NULL) { #ifdef HN_USE_TXDESC_BUFRING #ifdef HN_DEBUG atomic_subtract_int(&txr->hn_txdesc_avail, 1); #endif #endif /* HN_USE_TXDESC_BUFRING */ KASSERT(txd->m == NULL && txd->refs == 0 && STAILQ_EMPTY(&txd->agg_list) && txd->chim_index == HN_NVS_CHIM_IDX_INVALID && txd->chim_size == 0 && (txd->flags & HN_TXD_FLAG_ONLIST) && (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; txd->refs = 1; } return txd; } static __inline void hn_txdesc_hold(struct hn_txdesc *txd) { /* 0->1 transition will never work */ KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); atomic_add_int(&txd->refs, 1); } static __inline void hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) { KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, ("recursive aggregation on aggregating txdesc")); KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, ("already aggregated")); KASSERT(STAILQ_EMPTY(&txd->agg_list), ("recursive aggregation on to-be-aggregated txdesc")); txd->flags |= HN_TXD_FLAG_ONAGG; STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); } static bool hn_tx_ring_pending(struct hn_tx_ring *txr) { bool pending = false; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) pending = true; mtx_unlock_spin(&txr->hn_txlist_spin); #else if (!buf_ring_full(txr->hn_txdesc_br)) pending = true; #endif return (pending); } static __inline void hn_txeof(struct hn_tx_ring *txr) { txr->hn_has_txeof = 0; txr->hn_txeof(txr); } static void hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan, const void *data __unused, int dlen __unused) { struct hn_txdesc *txd = sndc->hn_cbarg; struct hn_tx_ring *txr; txr = txd->txr; KASSERT(txr->hn_chan == chan, ("channel mismatch, on chan%u, should be chan%u", vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); txr->hn_has_txeof = 1; hn_txdesc_put(txr, txd); ++txr->hn_txdone_cnt; if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { txr->hn_txdone_cnt = 0; if (txr->hn_oactive) hn_txeof(txr); } } static void hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) { #if defined(INET) || defined(INET6) tcp_lro_flush_all(&rxr->hn_lro); #endif /* * NOTE: * 'txr' could be NULL, if multiple channels and * ifnet.if_start method are enabled. */ if (txr == NULL || !txr->hn_has_txeof) return; txr->hn_txdone_cnt = 0; hn_txeof(txr); } static __inline uint32_t hn_rndis_pktmsg_offset(uint32_t ofs) { KASSERT(ofs >= sizeof(struct rndis_packet_msg), ("invalid RNDIS packet msg offset %u", ofs)); return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); } static __inline void * hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, size_t pi_dlen, uint32_t pi_type) { const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); struct rndis_pktinfo *pi; KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); /* * Per-packet-info does not move; it only grows. * * NOTE: * rm_pktinfooffset in this phase counts from the beginning * of rndis_packet_msg. */ KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, ("%u pktinfo overflows RNDIS packet msg", pi_type)); pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + pkt->rm_pktinfolen); pkt->rm_pktinfolen += pi_size; pi->rm_size = pi_size; pi->rm_type = pi_type; pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; return (pi->rm_data); } static __inline int hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) { struct hn_txdesc *txd; struct mbuf *m; int error, pkts; txd = txr->hn_agg_txd; KASSERT(txd != NULL, ("no aggregate txdesc")); /* * Since hn_txpkt() will reset this temporary stat, save * it now, so that oerrors can be updated properly, if * hn_txpkt() ever fails. */ pkts = txr->hn_stat_pkts; /* * Since txd's mbuf will _not_ be freed upon hn_txpkt() * failure, save it for later freeing, if hn_txpkt() ever * fails. */ m = txd->m; error = hn_txpkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m is not. */ m_freem(m); txr->hn_flush_failed++; if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); } /* Reset all aggregation states. */ txr->hn_agg_txd = NULL; txr->hn_agg_szleft = 0; txr->hn_agg_pktleft = 0; txr->hn_agg_prevpkt = NULL; return (error); } static void * hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, int pktsize) { void *chim; if (txr->hn_agg_txd != NULL) { if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { struct hn_txdesc *agg_txd = txr->hn_agg_txd; struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; int olen; /* * Update the previous RNDIS packet's total length, * it can be increased due to the mandatory alignment * padding for this RNDIS packet. And update the * aggregating txdesc's chimney sending buffer size * accordingly. * * XXX * Zero-out the padding, as required by the RNDIS spec. */ olen = pkt->rm_len; pkt->rm_len = roundup2(olen, txr->hn_agg_align); agg_txd->chim_size += pkt->rm_len - olen; /* Link this txdesc to the parent. */ hn_txdesc_agg(agg_txd, txd); chim = (uint8_t *)pkt + pkt->rm_len; /* Save the current packet for later fixup. */ txr->hn_agg_prevpkt = chim; txr->hn_agg_pktleft--; txr->hn_agg_szleft -= pktsize; if (txr->hn_agg_szleft <= HN_PKTSIZE_MIN(txr->hn_agg_align)) { /* * Probably can't aggregate more packets, * flush this aggregating txdesc proactively. */ txr->hn_agg_pktleft = 0; } /* Done! */ return (chim); } hn_flush_txagg(ifp, txr); } KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); txr->hn_tx_chimney_tried++; txd->chim_index = hn_chim_alloc(txr->hn_sc); if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) return (NULL); txr->hn_tx_chimney++; chim = txr->hn_sc->hn_chim + (txd->chim_index * txr->hn_sc->hn_chim_szmax); if (txr->hn_agg_pktmax > 1 && txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { txr->hn_agg_txd = txd; txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; txr->hn_agg_prevpkt = chim; } return (chim); } /* * NOTE: * If this function fails, then both txd and m_head0 will be freed. */ static int hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; struct rndis_packet_msg *pkt; uint32_t *pi_data; void *chim = NULL; int pkt_hlen, pkt_size; pkt = txd->rndis_pkt; pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); if (pkt_size < txr->hn_chim_size) { chim = hn_try_txagg(ifp, txr, txd, pkt_size); if (chim != NULL) pkt = chim; } else { if (txr->hn_agg_txd != NULL) hn_flush_txagg(ifp, txr); } pkt->rm_type = REMOTE_NDIS_PACKET_MSG; pkt->rm_len = m_head->m_pkthdr.len; pkt->rm_dataoffset = 0; pkt->rm_datalen = m_head->m_pkthdr.len; pkt->rm_oobdataoffset = 0; pkt->rm_oobdatalen = 0; pkt->rm_oobdataelements = 0; pkt->rm_pktinfooffset = sizeof(*pkt); pkt->rm_pktinfolen = 0; pkt->rm_vchandle = 0; pkt->rm_reserved = 0; if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { /* * Set the hash value for this packet, so that the host could * dispatch the TX done event for this packet back to this TX * ring's channel. */ pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); *pi_data = txr->hn_tx_idx; } if (m_head->m_flags & M_VLANTAG) { pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); *pi_data = NDIS_VLAN_INFO_MAKE( EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { #if defined(INET6) || defined(INET) pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { *pi_data = NDIS_LSO2_INFO_MAKEIPV4( m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, m_head->m_pkthdr.tso_segsz); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { *pi_data = NDIS_LSO2_INFO_MAKEIPV6( m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, m_head->m_pkthdr.tso_segsz); } #endif #endif /* INET6 || INET */ } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); if (m_head->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP)) { *pi_data = NDIS_TXCSUM_INFO_IPV6; } else { *pi_data = NDIS_TXCSUM_INFO_IPV4; if (m_head->m_pkthdr.csum_flags & CSUM_IP) *pi_data |= NDIS_TXCSUM_INFO_IPCS; } if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) { *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); } else if (m_head->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) { *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); } } pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; /* Fixup RNDIS packet message total length */ pkt->rm_len += pkt_hlen; /* Convert RNDIS packet message offsets */ pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); /* * Fast path: Chimney sending. */ if (chim != NULL) { struct hn_txdesc *tgt_txd = txd; if (txr->hn_agg_txd != NULL) { tgt_txd = txr->hn_agg_txd; #ifdef INVARIANTS *m_head0 = NULL; #endif } KASSERT(pkt == chim, ("RNDIS pkt not in chimney sending buffer")); KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, ("chimney sending buffer is not used")); tgt_txd->chim_size += pkt->rm_len; m_copydata(m_head, 0, m_head->m_pkthdr.len, ((uint8_t *)chim) + pkt_hlen); txr->hn_gpa_cnt = 0; txr->hn_sendpkt = hn_txpkt_chim; goto done; } KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("chimney buffer is used")); KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (__predict_false(error)) { int freed; /* * This mbuf is not linked w/ the txd yet, so free it now. */ m_freem(m_head); *m_head0 = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); txr->hn_txdma_failed++; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; /* +1 RNDIS packet message */ txr->hn_gpa_cnt = nsegs + 1; /* send packet with page buffer */ txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; txr->hn_gpa[0].gpa_len = pkt_hlen; /* * Fill the page buffers with mbuf info after the page * buffer for RNDIS packet message. */ for (i = 0; i < nsegs; ++i) { struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; gpa->gpa_page = atop(segs[i].ds_addr); gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; gpa->gpa_len = segs[i].ds_len; } txd->chim_index = HN_NVS_CHIM_IDX_INVALID; txd->chim_size = 0; txr->hn_sendpkt = hn_txpkt_sglist; done: txd->m = m_head; /* Set the completion routine */ hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); /* Update temporary stats for later use. */ txr->hn_stat_pkts++; txr->hn_stat_size += m_head->m_pkthdr.len; if (m_head->m_flags & M_MCAST) txr->hn_stat_mcasts++; return 0; } /* * NOTE: * If this function fails, then txd will be freed, but the mbuf * associated w/ the txd will _not_ be freed. */ static int hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) { int error, send_failed = 0, has_bpf; again: has_bpf = bpf_peers_present(ifp->if_bpf); if (has_bpf) { /* * Make sure that this txd and any aggregated txds are not * freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); } error = txr->hn_sendpkt(txr, txd); if (!error) { if (has_bpf) { const struct hn_txdesc *tmp_txd; ETHER_BPF_MTAP(ifp, txd->m); STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) ETHER_BPF_MTAP(ifp, tmp_txd->m); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); #ifdef HN_IFSTART_SUPPORT if (!hn_use_if_start) #endif { if_inc_counter(ifp, IFCOUNTER_OBYTES, txr->hn_stat_size); if (txr->hn_stat_mcasts != 0) { if_inc_counter(ifp, IFCOUNTER_OMCASTS, txr->hn_stat_mcasts); } } txr->hn_pkts += txr->hn_stat_pkts; txr->hn_sends++; } if (has_bpf) hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; /* * This should "really rarely" happen. * * XXX Too many RX to be acked or too many sideband * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ txr->hn_has_txeof = 1; if (!send_failed) { txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_has_txeof; * in case that we missed the last * netvsc_channel_rollup(). */ goto again; } if_printf(ifp, "send failed\n"); /* * Caller will perform further processing on the * associated mbuf, so don't free it in hn_txdesc_put(); * only unload it from the DMA map in hn_txdesc_put(), * if it was loaded. */ txd->m = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); txr->hn_send_failed++; } /* Reset temporary stats, after this sending is done. */ txr->hn_stat_size = 0; txr->hn_stat_pkts = 0; txr->hn_stat_mcasts = 0; return (error); } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. * There should be an equivalent in the kernel mbuf code, * but there does not appear to be one yet. * * Differs from m_append() in that additional mbufs are * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * * Return 1 if able to complete the job; otherwise 0. */ static int hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space; remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) break; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } #if defined(INET) || defined(INET6) static __inline int hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) { #if __FreeBSD_version >= 1100095 if (hn_lro_mbufq_depth) { tcp_lro_queue_mbuf(lc, m); return 0; } #endif return tcp_lro_rx(lc, m, 0); } #endif static int hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, const struct hn_rxinfo *info) { struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1, is_vf = 0; int hash_type = M_HASHTYPE_NONE; int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; ifp = hn_ifp; if (rxr->hn_rxvf_ifp != NULL) { /* * Non-transparent mode VF; pretend this packet is from * the VF. */ ifp = rxr->hn_rxvf_ifp; is_vf = 1; } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { /* Transparent mode VF. */ is_vf = 1; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { /* * NOTE: * See the NOTE of hn_rndis_init_fixat(). This * function can be reached, immediately after the * RNDIS is initialized but before the ifnet is * setup on the hn_attach() path; drop the unexpected * packets. */ return (0); } if (__predict_false(dlen < ETHER_HDR_LEN)) { if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); return (0); } if (dlen <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); return (0); } memcpy(mtod(m_new, void *), data, dlen); m_new->m_pkthdr.len = m_new->m_len = dlen; rxr->hn_small_pkts++; } else { /* * Get an mbuf with a cluster. For packets 2K or less, * get a standard 2K cluster. For anything larger, get a * 4K cluster. Any buffers larger than 4K can cause problems * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; if (dlen > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (m_new == NULL) { if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); return (0); } hv_m_append(m_new, dlen, data); } m_new->m_pkthdr.rcvif = ifp; if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { /* IP csum offload */ if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; } /* * XXX * As of this write (Oct 28th, 2016), host side will turn * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so * the do_lro setting here is actually _not_ accurate. We * depend on the RSS hash type check to reset do_lro. */ if ((info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) do_lro = 1; } else { hn_rxpkt_proto(m_new, &l3proto, &l4proto); if (l3proto == ETHERTYPE_IP) { if (l4proto == IPPROTO_TCP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_TCP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } do_lro = 1; } else if (l4proto == IPPROTO_UDP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_UDP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } } else if (l4proto != IPPROTO_DONE && do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); } } } if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( NDIS_VLAN_INFO_ID(info->vlan_info), NDIS_VLAN_INFO_PRI(info->vlan_info), NDIS_VLAN_INFO_CFI(info->vlan_info)); m_new->m_flags |= M_VLANTAG; } /* * If VF is activated (tranparent/non-transparent mode does not * matter here). * * - Disable LRO * * hn(4) will only receive broadcast packets, multicast packets, * TCP SYN and SYN|ACK (in Azure), LRO is useless for these * packet types. * * For non-transparent, we definitely _cannot_ enable LRO at * all, since the LRO flush will use hn(4) as the receiving * interface; i.e. hn_ifp->if_input(hn_ifp, m). */ if (is_vf) do_lro = 0; /* * If VF is activated (tranparent/non-transparent mode does not * matter here), do _not_ mess with unsupported hash types or * functions. */ if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { rxr->hn_rss_pkts++; m_new->m_pkthdr.flowid = info->hash_value; if (!is_vf) hash_type = M_HASHTYPE_OPAQUE_HASH; if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == NDIS_HASH_FUNCTION_TOEPLITZ) { uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & rxr->hn_mbuf_hash); /* * NOTE: * do_lro is resetted, if the hash types are not TCP * related. See the comment in the above csum_flags * setup section. */ switch (type) { case NDIS_HASH_IPV4: hash_type = M_HASHTYPE_RSS_IPV4; do_lro = 0; break; case NDIS_HASH_TCP_IPV4: hash_type = M_HASHTYPE_RSS_TCP_IPV4; if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { int def_htype = M_HASHTYPE_OPAQUE_HASH; if (is_vf) def_htype = M_HASHTYPE_NONE; /* * UDP 4-tuple hash is delivered as * TCP 4-tuple hash. */ if (l3proto == ETHERTYPE_MAX) { hn_rxpkt_proto(m_new, &l3proto, &l4proto); } if (l3proto == ETHERTYPE_IP) { if (l4proto == IPPROTO_UDP && (rxr->hn_mbuf_hash & NDIS_HASH_UDP_IPV4_X)) { hash_type = M_HASHTYPE_RSS_UDP_IPV4; do_lro = 0; } else if (l4proto != IPPROTO_TCP) { hash_type = def_htype; do_lro = 0; } } else { hash_type = def_htype; do_lro = 0; } } break; case NDIS_HASH_IPV6: hash_type = M_HASHTYPE_RSS_IPV6; do_lro = 0; break; case NDIS_HASH_IPV6_EX: hash_type = M_HASHTYPE_RSS_IPV6_EX; do_lro = 0; break; case NDIS_HASH_TCP_IPV6: hash_type = M_HASHTYPE_RSS_TCP_IPV6; break; case NDIS_HASH_TCP_IPV6_EX: hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; break; } } } else if (!is_vf) { m_new->m_pkthdr.flowid = rxr->hn_rx_idx; hash_type = M_HASHTYPE_OPAQUE; } M_HASHTYPE_SET(m_new, hash_type); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if (hn_ifp != ifp) { const struct ether_header *eh; /* * Non-transparent mode VF is activated. */ /* * Allow tapping on hn(4). */ ETHER_BPF_MTAP(hn_ifp, m_new); /* * Update hn(4)'s stats. */ if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); /* Checked at the beginning of this function. */ KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); eh = mtod(m_new, struct ether_header *); if (ETHER_IS_MULTICAST(eh->ether_dhost)) if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); } rxr->hn_pkts++; if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxr->hn_lro; if (lro->lro_cnt) { rxr->hn_lro_tried++; if (hn_lro_rx(lro, m_new) == 0) { /* DONE! */ return 0; } } #endif } ifp->if_input(ifp, m_new); return (0); } static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct hn_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data, ifr_vf; struct ifnet *vf_ifp; int mask, error = 0; struct ifrsskey *ifrk; struct ifrsshash *ifrh; uint32_t mtu; switch (cmd) { case SIOCSIFMTU: if (ifr->ifr_mtu > HN_MTU_MAX) { error = EINVAL; break; } HN_LOCK(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { HN_UNLOCK(sc); break; } if ((sc->hn_caps & HN_CAP_MTU) == 0) { /* Can't change MTU */ HN_UNLOCK(sc); error = EOPNOTSUPP; break; } if (ifp->if_mtu == ifr->ifr_mtu) { HN_UNLOCK(sc); break; } if (hn_xpnt_vf_isready(sc)) { vf_ifp = sc->hn_vf_ifp; ifr_vf = *ifr; strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, sizeof(ifr_vf.ifr_name)); error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr_vf); if (error) { HN_UNLOCK(sc); if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", vf_ifp->if_xname, ifr->ifr_mtu, error); break; } } /* * Suspend this interface before the synthetic parts * are ripped. */ hn_suspend(sc); /* * Detach the synthetics parts, i.e. NVS and RNDIS. */ hn_synth_detach(sc); /* * Reattach the synthetic parts, i.e. NVS and RNDIS, * with the new MTU setting. */ error = hn_synth_attach(sc, ifr->ifr_mtu); if (error) { HN_UNLOCK(sc); break; } error = hn_rndis_get_mtu(sc, &mtu); if (error) mtu = ifr->ifr_mtu; else if (bootverbose) if_printf(ifp, "RNDIS mtu %u\n", mtu); /* * Commit the requested MTU, after the synthetic parts * have been successfully attached. */ if (mtu >= ifr->ifr_mtu) { mtu = ifr->ifr_mtu; } else { if_printf(ifp, "fixup mtu %d -> %u\n", ifr->ifr_mtu, mtu); } ifp->if_mtu = mtu; /* * Synthetic parts' reattach may change the chimney * sending size; update it. */ if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) hn_set_chim_size(sc, sc->hn_chim_szmax); /* * Make sure that various parameters based on MTU are * still valid, after the MTU change. */ hn_mtu_change_fixup(sc); /* * All done! Resume the interface now. */ hn_resume(sc); if ((sc->hn_flags & HN_FLAG_RXVF) || (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { /* * Since we have reattached the NVS part, * change the datapath to VF again; in case * that it is lost, after the NVS was detached. */ hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); } HN_UNLOCK(sc); break; case SIOCSIFFLAGS: HN_LOCK(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { HN_UNLOCK(sc); break; } if (hn_xpnt_vf_isready(sc)) hn_xpnt_vf_saveifflags(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* * Caller meight hold mutex, e.g. * bpf; use busy-wait for the RNDIS * reply. */ HN_NO_SLEEPING(sc); hn_rxfilter_config(sc); HN_SLEEPING_OK(sc); if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) error = hn_xpnt_vf_iocsetflags(sc); } else { hn_init_locked(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) hn_stop(sc, false); } sc->hn_if_flags = ifp->if_flags; HN_UNLOCK(sc); break; case SIOCSIFCAP: HN_LOCK(sc); if (hn_xpnt_vf_isready(sc)) { ifr_vf = *ifr; strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, sizeof(ifr_vf.ifr_name)); error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); HN_UNLOCK(sc); break; } /* * Fix up requested capabilities w/ supported capabilities, * since the supported capabilities could have been changed. */ mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); else ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); else ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); } /* TODO: flip RNDIS offload parameters for RXCSUM. */ if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; #ifdef foo /* We can't diff IPv6 packets from IPv4 packets on RX path. */ if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; #endif if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; else ifp->if_hwassist &= ~CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; else ifp->if_hwassist &= ~CSUM_IP6_TSO; } HN_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: HN_LOCK(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { HN_UNLOCK(sc); break; } if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* * Multicast uses mutex; use busy-wait for * the RNDIS reply. */ HN_NO_SLEEPING(sc); hn_rxfilter_config(sc); HN_SLEEPING_OK(sc); } /* XXX vlan(4) style mcast addr maintenance */ if (hn_xpnt_vf_isready(sc)) { int old_if_flags; old_if_flags = sc->hn_vf_ifp->if_flags; hn_xpnt_vf_saveifflags(sc); if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & IFF_ALLMULTI)) error = hn_xpnt_vf_iocsetflags(sc); } HN_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: HN_LOCK(sc); if (hn_xpnt_vf_isready(sc)) { /* * SIOCGIFMEDIA expects ifmediareq, so don't * create and pass ifr_vf to the VF here; just * replace the ifr_name. */ vf_ifp = sc->hn_vf_ifp; strlcpy(ifr->ifr_name, vf_ifp->if_xname, sizeof(ifr->ifr_name)); error = vf_ifp->if_ioctl(vf_ifp, cmd, data); /* Restore the ifr_name. */ strlcpy(ifr->ifr_name, ifp->if_xname, sizeof(ifr->ifr_name)); HN_UNLOCK(sc); break; } HN_UNLOCK(sc); error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; case SIOCGIFRSSHASH: ifrh = (struct ifrsshash *)data; HN_LOCK(sc); if (sc->hn_rx_ring_inuse == 1) { HN_UNLOCK(sc); ifrh->ifrh_func = RSS_FUNC_NONE; ifrh->ifrh_types = 0; break; } if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; else ifrh->ifrh_func = RSS_FUNC_PRIVATE; ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); HN_UNLOCK(sc); break; case SIOCGIFRSSKEY: ifrk = (struct ifrsskey *)data; HN_LOCK(sc); if (sc->hn_rx_ring_inuse == 1) { HN_UNLOCK(sc); ifrk->ifrk_func = RSS_FUNC_NONE; ifrk->ifrk_keylen = 0; break; } if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; else ifrk->ifrk_func = RSS_FUNC_PRIVATE; ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, NDIS_HASH_KEYSIZE_TOEPLITZ); HN_UNLOCK(sc); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void hn_stop(struct hn_softc *sc, bool detaching) { struct ifnet *ifp = sc->hn_ifp; int i; HN_LOCK_ASSERT(sc); KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, ("synthetic parts were not attached")); /* Clear RUNNING bit ASAP. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); /* Disable polling. */ hn_polling(sc, 0); if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { KASSERT(sc->hn_vf_ifp != NULL, ("%s: VF is not attached", ifp->if_xname)); /* Mark transparent mode VF as disabled. */ hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); /* * NOTE: * Datapath setting must happen _before_ bringing * the VF down. */ hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); /* * Bring the VF down. */ hn_xpnt_vf_saveifflags(sc); sc->hn_vf_ifp->if_flags &= ~IFF_UP; hn_xpnt_vf_iocsetflags(sc); } /* Suspend data transfers. */ hn_suspend_data(sc); /* Clear OACTIVE bit. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; /* * If the non-transparent mode VF is active, make sure * that the RX filter still allows packet reception. */ if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) hn_rxfilter_config(sc); } static void hn_init_locked(struct hn_softc *sc) { struct ifnet *ifp = sc->hn_ifp; int i; HN_LOCK_ASSERT(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) return; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; /* Configure RX filter */ hn_rxfilter_config(sc); /* Clear OACTIVE bit. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; /* Clear TX 'suspended' bit. */ hn_resume_tx(sc, sc->hn_tx_ring_inuse); if (hn_xpnt_vf_isready(sc)) { /* Initialize transparent VF. */ hn_xpnt_vf_init(sc); } /* Everything is ready; unleash! */ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); /* Re-enable polling if requested. */ if (sc->hn_pollhz > 0) hn_polling(sc, sc->hn_pollhz); } static void hn_init(void *xsc) { struct hn_softc *sc = xsc; HN_LOCK(sc); hn_init_locked(sc); HN_UNLOCK(sc); } #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; unsigned int lenlim; int error; lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; HN_LOCK(sc); if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || lenlim > TCP_LRO_LENGTH_MAX) { HN_UNLOCK(sc); return EINVAL; } hn_set_lro_lenlim(sc, lenlim); HN_UNLOCK(sc); return 0; } static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ackcnt, error, i; /* * lro_ackcnt_lim is append count limit, * +1 to turn it into aggregation limit. */ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; error = sysctl_handle_int(oidp, &ackcnt, 0, req); if (error || req->newptr == NULL) return error; if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) return EINVAL; /* * Convert aggregation limit back to append * count limit. */ --ackcnt; HN_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; HN_UNLOCK(sc); return 0; } #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int hcsum = arg2; int on, error, i; on = 0; if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) on = 1; error = sysctl_handle_int(oidp, &on, 0, req); if (error || req->newptr == NULL) return error; HN_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (on) rxr->hn_trust_hcsum |= hcsum; else rxr->hn_trust_hcsum &= ~hcsum; } HN_UNLOCK(sc); return 0; } static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int chim_size, error; chim_size = sc->hn_tx_ring[0].hn_chim_size; error = sysctl_handle_int(oidp, &chim_size, 0, req); if (error || req->newptr == NULL) return error; if (chim_size > sc->hn_chim_szmax || chim_size <= 0) return EINVAL; HN_LOCK(sc); hn_set_chim_size(sc, chim_size); HN_UNLOCK(sc); return 0; } #if __FreeBSD_version < 1100095 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((int *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; *((int *)((uint8_t *)rxr + ofs)) = 0; } return 0; } #else static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((uint64_t *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; } return 0; } #endif static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; u_long stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((u_long *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; *((u_long *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_tx_ring *txr; u_long stat; stat = 0; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { txr = &sc->hn_tx_ring[i]; stat += *((u_long *)((uint8_t *)txr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { txr = &sc->hn_tx_ring[i]; *((u_long *)((uint8_t *)txr + ofs)) = 0; } return 0; } static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error, conf; struct hn_tx_ring *txr; txr = &sc->hn_tx_ring[0]; conf = *((int *)((uint8_t *)txr + ofs)); error = sysctl_handle_int(oidp, &conf, 0, req); if (error || req->newptr == NULL) return error; HN_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { txr = &sc->hn_tx_ring[i]; *((int *)((uint8_t *)txr + ofs)) = conf; } HN_UNLOCK(sc); return 0; } static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error, size; size = sc->hn_agg_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || req->newptr == NULL) return (error); HN_LOCK(sc); sc->hn_agg_size = size; hn_set_txagg(sc); HN_UNLOCK(sc); return (0); } static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error, pkts; pkts = sc->hn_agg_pkts; error = sysctl_handle_int(oidp, &pkts, 0, req); if (error || req->newptr == NULL) return (error); HN_LOCK(sc); sc->hn_agg_pkts = pkts; hn_set_txagg(sc); HN_UNLOCK(sc); return (0); } static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int pkts; pkts = sc->hn_tx_ring[0].hn_agg_pktmax; return (sysctl_handle_int(oidp, &pkts, 0, req)); } static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int align; align = sc->hn_tx_ring[0].hn_agg_align; return (sysctl_handle_int(oidp, &align, 0, req)); } static void hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) { if (pollhz == 0) vmbus_chan_poll_disable(chan); else vmbus_chan_poll_enable(chan, pollhz); } static void hn_polling(struct hn_softc *sc, u_int pollhz) { int nsubch = sc->hn_rx_ring_inuse - 1; HN_LOCK_ASSERT(sc); if (nsubch > 0) { struct vmbus_channel **subch; int i; subch = vmbus_subchan_get(sc->hn_prichan, nsubch); for (i = 0; i < nsubch; ++i) hn_chan_polling(subch[i], pollhz); vmbus_subchan_rel(subch, nsubch); } hn_chan_polling(sc->hn_prichan, pollhz); } static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int pollhz, error; pollhz = sc->hn_pollhz; error = sysctl_handle_int(oidp, &pollhz, 0, req); if (error || req->newptr == NULL) return (error); if (pollhz != 0 && (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) return (EINVAL); HN_LOCK(sc); if (sc->hn_pollhz != pollhz) { sc->hn_pollhz = pollhz; if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) hn_polling(sc, sc->hn_pollhz); } HN_UNLOCK(sc); return (0); } static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char verstr[16]; snprintf(verstr, sizeof(verstr), "%u.%u", HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); } static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char caps_str[128]; uint32_t caps; HN_LOCK(sc); caps = sc->hn_caps; HN_UNLOCK(sc); snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); } static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char assist_str[128]; uint32_t hwassist; HN_LOCK(sc); hwassist = sc->hn_ifp->if_hwassist; HN_UNLOCK(sc); snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); } static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char filter_str[128]; uint32_t filter; HN_LOCK(sc); filter = sc->hn_rx_filter; HN_UNLOCK(sc); snprintf(filter_str, sizeof(filter_str), "%b", filter, NDIS_PACKET_TYPES); return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); } #ifndef RSS static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error; HN_LOCK(sc); error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); if (error || req->newptr == NULL) goto back; if ((sc->hn_flags & HN_FLAG_RXVF) || (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { /* * RSS key is synchronized w/ VF's, don't allow users * to change it. */ error = EBUSY; goto back; } error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); if (error) goto back; sc->hn_flags |= HN_FLAG_HAS_RSSKEY; if (sc->hn_rx_ring_inuse > 1) { error = hn_rss_reconfig(sc); } else { /* Not RSS capable, at least for now; just save the RSS key. */ error = 0; } back: HN_UNLOCK(sc); return (error); } static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error; HN_LOCK(sc); error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); if (error || req->newptr == NULL) goto back; /* * Don't allow RSS indirect table change, if this interface is not * RSS capable currently. */ if (sc->hn_rx_ring_inuse == 1) { error = EOPNOTSUPP; goto back; } error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); if (error) goto back; sc->hn_flags |= HN_FLAG_HAS_RSSIND; hn_rss_ind_fixup(sc); error = hn_rss_reconfig(sc); back: HN_UNLOCK(sc); return (error); } #endif /* !RSS */ static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char hash_str[128]; uint32_t hash; HN_LOCK(sc); hash = sc->hn_rss_hash; HN_UNLOCK(sc); snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); } static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char hash_str[128]; uint32_t hash; HN_LOCK(sc); hash = sc->hn_rss_hcap; HN_UNLOCK(sc); snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); } static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char hash_str[128]; uint32_t hash; HN_LOCK(sc); hash = sc->hn_rx_ring[0].hn_mbuf_hash; HN_UNLOCK(sc); snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); } static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char vf_name[IFNAMSIZ + 1]; struct ifnet *vf_ifp; HN_LOCK(sc); vf_name[0] = '\0'; vf_ifp = sc->hn_vf_ifp; if (vf_ifp != NULL) snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); HN_UNLOCK(sc); return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); } static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char vf_name[IFNAMSIZ + 1]; struct ifnet *vf_ifp; HN_LOCK(sc); vf_name[0] = '\0'; vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; if (vf_ifp != NULL) snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); HN_UNLOCK(sc); return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); } static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) { struct rm_priotracker pt; struct sbuf *sb; int error, i; bool first; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); rm_rlock(&hn_vfmap_lock, &pt); first = true; for (i = 0; i < hn_vfmap_size; ++i) { struct ifnet *ifp; if (hn_vfmap[i] == NULL) continue; ifp = ifnet_byindex(i); if (ifp != NULL) { if (first) sbuf_printf(sb, "%s", ifp->if_xname); else sbuf_printf(sb, " %s", ifp->if_xname); first = false; } } rm_runlock(&hn_vfmap_lock, &pt); error = sbuf_finish(sb); sbuf_delete(sb); return (error); } static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) { struct rm_priotracker pt; struct sbuf *sb; int error, i; bool first; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); rm_rlock(&hn_vfmap_lock, &pt); first = true; for (i = 0; i < hn_vfmap_size; ++i) { struct ifnet *ifp, *hn_ifp; hn_ifp = hn_vfmap[i]; if (hn_ifp == NULL) continue; ifp = ifnet_byindex(i); if (ifp != NULL) { if (first) { sbuf_printf(sb, "%s:%s", ifp->if_xname, hn_ifp->if_xname); } else { sbuf_printf(sb, " %s:%s", ifp->if_xname, hn_ifp->if_xname); } first = false; } } rm_runlock(&hn_vfmap_lock, &pt); error = sbuf_finish(sb); sbuf_delete(sb); return (error); } static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error, onoff = 0; if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) onoff = 1; error = sysctl_handle_int(oidp, &onoff, 0, req); if (error || req->newptr == NULL) return (error); HN_LOCK(sc); /* NOTE: hn_vf_lock for hn_transmit() */ rm_wlock(&sc->hn_vf_lock); if (onoff) sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; else sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; rm_wunlock(&sc->hn_vf_lock); HN_UNLOCK(sc); return (0); } static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int enabled = 0; if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) enabled = 1; return (sysctl_handle_int(oidp, &enabled, 0, req)); } static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; int len, iphlen, iplen; const struct tcphdr *th; int thoff; /* TCP data offset */ len = hoff + sizeof(struct ip); /* The packet must be at least the size of an IP header. */ if (m->m_pkthdr.len < len) return IPPROTO_DONE; /* The fixed IP header must reside completely in the first mbuf. */ if (m->m_len < len) return IPPROTO_DONE; ip = mtodo(m, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; if (iphlen < sizeof(struct ip)) /* minimum header length */ return IPPROTO_DONE; /* The full IP header must reside completely in the one mbuf. */ if (m->m_len < hoff + iphlen) return IPPROTO_DONE; iplen = ntohs(ip->ip_len); /* * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ if (m->m_pkthdr.len < hoff + iplen) return IPPROTO_DONE; /* * Ignore IP fragments. */ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) return IPPROTO_DONE; /* * The TCP/IP or UDP/IP header must be entirely contained within * the first fragment of a packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (iplen < iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); thoff = th->th_off << 2; if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + thoff) return IPPROTO_DONE; break; case IPPROTO_UDP: if (iplen < iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; break; default: if (iplen < iphlen) return IPPROTO_DONE; break; } return ip->ip_p; } static void hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) { const struct ether_header *eh; uint16_t etype; int hoff; hoff = sizeof(*eh); /* Checked at the beginning of this function. */ KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); eh = mtod(m_new, const struct ether_header *); etype = ntohs(eh->ether_type); if (etype == ETHERTYPE_VLAN) { const struct ether_vlan_header *evl; hoff = sizeof(*evl); if (m_new->m_len < hoff) return; evl = mtod(m_new, const struct ether_vlan_header *); etype = ntohs(evl->evl_proto); } *l3proto = etype; if (etype == ETHERTYPE_IP) *l4proto = hn_check_iplen(m_new, hoff); else *l4proto = IPPROTO_DONE; } static int hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev = sc->hn_dev; #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 int lroent_cnt; #endif #endif int i; /* * Create RXBUF for reception. * * NOTE: * - It is shared by all channels. * - A large enough buffer is allocated, certain version of NVSes * may further limit the usable space. */ sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_rxbuf == NULL) { device_printf(sc->hn_dev, "allocate rxbuf failed\n"); return (ENOMEM); } sc->hn_rx_ring_cnt = ring_cnt; sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, M_DEVBUF, M_WAITOK | M_ZERO); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 lroent_cnt = hn_lro_entry_count; if (lroent_cnt < TCP_LRO_ENTRIES) lroent_cnt = TCP_LRO_ENTRIES; if (bootverbose) device_printf(dev, "LRO: entry count %d\n", lroent_cnt); #endif #endif /* INET || INET6 */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Create dev.hn.UNIT.rx sysctl tree */ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, &rxr->hn_br_dma, BUS_DMA_WAITOK); if (rxr->hn_br == NULL) { device_printf(dev, "allocate bufring failed\n"); return (ENOMEM); } if (hn_trust_hosttcp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; if (hn_trust_hostudp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; if (hn_trust_hostip) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; rxr->hn_mbuf_hash = NDIS_HASH_ALL; rxr->hn_ifp = sc->hn_ifp; if (i < sc->hn_tx_ring_cnt) rxr->hn_txr = &sc->hn_tx_ring[i]; rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); rxr->hn_rx_idx = i; rxr->hn_rxbuf = sc->hn_rxbuf; /* * Initialize LRO. */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, hn_lro_mbufq_depth); #else tcp_lro_init(&rxr->hn_lro); rxr->hn_lro.ifp = sc->hn_ifp; #endif #if __FreeBSD_version >= 1100099 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif #endif /* INET || INET6 */ if (sc->hn_rx_sysctl_tree != NULL) { char name[16]; /* * Create per RX ring sysctl tree: * dev.hn.UNIT.rx.RINGID */ snprintf(name, sizeof(name), "%d", i); rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (rxr->hn_rx_sysctl_tree != NULL) { SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "packets", CTLFLAG_RW, &rxr->hn_pkts, "# of packets received"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "rss_pkts", CTLFLAG_RW, &rxr->hn_rss_pkts, "# of packets w/ RSS info received"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "pktbuf_len", CTLFLAG_RD, &rxr->hn_pktbuf_len, 0, "Temporary channel packet buffer length"); } } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_queued), #if __FreeBSD_version < 1100095 hn_rx_stat_int_sysctl, #else hn_rx_stat_u64_sysctl, #endif "LU", "LRO queued"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), #if __FreeBSD_version < 1100095 hn_rx_stat_int_sysctl, #else hn_rx_stat_u64_sysctl, #endif "LU", "LRO flushed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro_tried), hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); #if __FreeBSD_version >= 1100099 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_lenlim_sysctl, "IU", "Max # of data bytes to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_ackcnt_sysctl, "I", "Max # of ACKs to be aggregated by LRO"); #endif SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", "Trust tcp segement verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, hn_trust_hcsum_sysctl, "I", "Trust udp datagram verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, hn_trust_hcsum_sysctl, "I", "Trust ip packet verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_ip), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_tcp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_udp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_trusted), hn_rx_stat_ulong_sysctl, "LU", "# of packets that we trust host's csum verification"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_small_pkts), hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_ack_failed), hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); return (0); } static void hn_destroy_rx_data(struct hn_softc *sc) { int i; if (sc->hn_rxbuf != NULL) { if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); else device_printf(sc->hn_dev, "RXBUF is referenced\n"); sc->hn_rxbuf = NULL; } if (sc->hn_rx_ring_cnt == 0) return; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (rxr->hn_br == NULL) continue; if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); } else { device_printf(sc->hn_dev, "%dth channel bufring is referenced", i); } rxr->hn_br = NULL; #if defined(INET) || defined(INET6) tcp_lro_free(&rxr->hn_lro); #endif free(rxr->hn_pktbuf, M_DEVBUF); } free(sc->hn_rx_ring, M_DEVBUF); sc->hn_rx_ring = NULL; sc->hn_rx_ring_cnt = 0; sc->hn_rx_ring_inuse = 0; } static int hn_tx_ring_create(struct hn_softc *sc, int id) { struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; device_t dev = sc->hn_dev; bus_dma_tag_t parent_dtag; int error, i; txr->hn_sc = sc; txr->hn_tx_idx = id; #ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); #endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_DEVBUF, M_WAITOK | M_ZERO); #ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); #else txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, M_WAITOK, &txr->hn_tx_lock); #endif if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); } else { txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; } #ifdef HN_IFSTART_SUPPORT if (hn_use_if_start) { txr->hn_txeof = hn_start_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); } else #endif { int br_depth; txr->hn_txeof = hn_xmit_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); br_depth = hn_get_txswq_depth(txr); txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, M_WAITOK, &txr->hn_tx_lock); } txr->hn_direct_tx_size = hn_direct_tx_size; /* * Always schedule transmission instead of trying to do direct * transmission. This one gives the best performance so far. */ txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(dev); /* DMA tag for RNDIS packet messages. */ error = bus_dma_tag_create(parent_dtag, /* parent */ HN_RNDIS_PKT_ALIGN, /* alignment */ HN_RNDIS_PKT_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_RNDIS_PKT_LEN, /* maxsize */ 1, /* nsegments */ HN_RNDIS_PKT_LEN, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_rndis_dtag); if (error) { device_printf(dev, "failed to create rndis dmatag\n"); return error; } /* DMA tag for data. */ error = bus_dma_tag_create(parent_dtag, /* parent */ 1, /* alignment */ HN_TX_DATA_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_TX_DATA_MAXSIZE, /* maxsize */ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ HN_TX_DATA_SEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_data_dtag); if (error) { device_printf(dev, "failed to create data dmatag\n"); return error; } for (i = 0; i < txr->hn_txdesc_cnt; ++i) { struct hn_txdesc *txd = &txr->hn_txdesc[i]; txd->txr = txr; txd->chim_index = HN_NVS_CHIM_IDX_INVALID; STAILQ_INIT(&txd->agg_list); /* * Allocate and load RNDIS packet message. */ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_pkt, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &txd->rndis_pkt_dmap); if (error) { device_printf(dev, "failed to allocate rndis_packet_msg, %d\n", i); return error; } error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap, txd->rndis_pkt, HN_RNDIS_PKT_LEN, hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to load rndis_packet_msg, %d\n", i); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); return error; } /* DMA map for TX data. */ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(dev, "failed to allocate tx data dmamap\n"); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); #else buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; if (sc->hn_tx_sysctl_tree != NULL) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; char name[16]; /* * Create per TX ring sysctl tree: * dev.hn.UNIT.tx.RINGID */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); snprintf(name, sizeof(name), "%d", id); txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (txr->hn_tx_sysctl_tree != NULL) { child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); #ifdef HN_DEBUG SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", CTLFLAG_RD, &txr->hn_txdesc_avail, 0, "# of available TX descs"); #endif #ifdef HN_IFSTART_SUPPORT if (!hn_use_if_start) #endif { SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", CTLFLAG_RD, &txr->hn_oactive, 0, "over active"); } SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", CTLFLAG_RW, &txr->hn_pkts, "# of packets transmitted"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", CTLFLAG_RW, &txr->hn_sends, "# of sends"); } } return 0; } static void hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) { struct hn_tx_ring *txr = txd->txr; KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } static void hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT(txd->refs == 0 || txd->refs == 1, ("invalid txd refs %d", txd->refs)); /* Aggregated txds will be freed by their aggregating txd. */ if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { int freed; freed = hn_txdesc_put(txr, txd); KASSERT(freed, ("can't free txdesc")); } } static void hn_tx_ring_destroy(struct hn_tx_ring *txr) { int i; if (txr->hn_txdesc == NULL) return; /* * NOTE: * Because the freeing of aggregated txds will be deferred * to the aggregating txd, two passes are used here: * - The first pass GCes any pending txds. This GC is necessary, * since if the channels are revoked, hypervisor will not * deliver send-done for all pending txds. * - The second pass frees the busdma stuffs, i.e. after all txds * were freed. */ for (i = 0; i < txr->hn_txdesc_cnt; ++i) hn_txdesc_gc(txr, &txr->hn_txdesc[i]); for (i = 0; i < txr->hn_txdesc_cnt; ++i) hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); #ifdef HN_USE_TXDESC_BUFRING buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); #endif free(txr->hn_txdesc, M_DEVBUF); txr->hn_txdesc = NULL; if (txr->hn_mbuf_br != NULL) buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); #ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); #endif mtx_destroy(&txr->hn_tx_lock); } static int hn_create_tx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int i; /* * Create TXBUF for chimney sending. * * NOTE: It is shared by all channels. */ sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_chim == NULL) { device_printf(sc->hn_dev, "allocate txbuf failed\n"); return (ENOMEM); } sc->hn_tx_ring_cnt = ring_cnt; sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, M_DEVBUF, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); /* Create dev.hn.UNIT.tx sysctl tree */ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { int error; error = hn_tx_ring_create(sc, i); if (error) return error; } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_no_txdescs), hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_send_failed), hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_txdma_failed), hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_flush_failed), hn_tx_stat_ulong_sysctl, "LU", "# of packet transmission aggregation flush failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_collapsed), hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, "# of total TX descs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", CTLFLAG_RD, &sc->hn_chim_szmax, 0, "Chimney send packet size upper boundary"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_chim_size_sysctl, "I", "Chimney send packet size limit"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_direct_tx_size), hn_tx_conf_int_sysctl, "I", "Size of the packet for direct transmission"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_sched_tx), hn_tx_conf_int_sysctl, "I", "Always schedule transmission " "instead of doing direct transmission"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, "Applied packet transmission aggregation size"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_txagg_pktmax_sysctl, "I", "Applied packet transmission aggregation packets"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_txagg_align_sysctl, "I", "Applied packet transmission aggregation alignment"); return 0; } static void hn_set_chim_size(struct hn_softc *sc, int chim_size) { int i; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) sc->hn_tx_ring[i].hn_chim_size = chim_size; } static void hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) { struct ifnet *ifp = sc->hn_ifp; u_int hw_tsomax; int tso_minlen; HN_LOCK_ASSERT(sc); if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) return; KASSERT(sc->hn_ndis_tso_sgmin >= 2, ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); tso_minlen = sc->hn_ndis_tso_sgmin * mtu; KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && sc->hn_ndis_tso_szmax <= IP_MAXPACKET, ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); if (tso_maxlen < tso_minlen) tso_maxlen = tso_minlen; else if (tso_maxlen > IP_MAXPACKET) tso_maxlen = IP_MAXPACKET; if (tso_maxlen > sc->hn_ndis_tso_szmax) tso_maxlen = sc->hn_ndis_tso_szmax; hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); if (hn_xpnt_vf_isready(sc)) { if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; } ifp->if_hw_tsomax = hw_tsomax; if (bootverbose) if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); } static void hn_fixup_tx_data(struct hn_softc *sc) { uint64_t csum_assist; int i; hn_set_chim_size(sc, sc->hn_chim_szmax); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_chim_szmax) hn_set_chim_size(sc, hn_tx_chimney_size); csum_assist = 0; if (sc->hn_caps & HN_CAP_IPCS) csum_assist |= CSUM_IP; if (sc->hn_caps & HN_CAP_TCP4CS) csum_assist |= CSUM_IP_TCP; if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) csum_assist |= CSUM_IP_UDP; if (sc->hn_caps & HN_CAP_TCP6CS) csum_assist |= CSUM_IP6_TCP; if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) csum_assist |= CSUM_IP6_UDP; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) sc->hn_tx_ring[i].hn_csum_assist = csum_assist; if (sc->hn_caps & HN_CAP_HASHVAL) { /* * Support HASHVAL pktinfo on TX path. */ if (bootverbose) if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; } } static void hn_fixup_rx_data(struct hn_softc *sc) { if (sc->hn_caps & HN_CAP_UDPHASH) { int i; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; } } static void hn_destroy_tx_data(struct hn_softc *sc) { int i; if (sc->hn_chim != NULL) { if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); } else { device_printf(sc->hn_dev, "chimney sending buffer is referenced"); } sc->hn_chim = NULL; } if (sc->hn_tx_ring_cnt == 0) return; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) hn_tx_ring_destroy(&sc->hn_tx_ring[i]); free(sc->hn_tx_ring, M_DEVBUF); sc->hn_tx_ring = NULL; sc->hn_tx_ring_cnt = 0; sc->hn_tx_ring_inuse = 0; } #ifdef HN_IFSTART_SUPPORT static void hn_start_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static int hn_start_locked(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; int sched = 0; KASSERT(hn_use_if_start, ("hn_start_locked is called, when if_start is disabled")); KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); mtx_assert(&txr->hn_tx_lock, MA_OWNED); KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); if (__predict_false(txr->hn_suspended)) return (0); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (0); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { struct hn_txdesc *txd; struct mbuf *m_head; int error; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); sched = 1; break; } #if defined(INET6) || defined(INET) if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { m_head = hn_tso_fixup(m_head); if (__predict_false(m_head == NULL)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); continue; } } else if (m_head->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { m_head = hn_set_hlen(m_head); if (__predict_false(m_head == NULL)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); continue; } } #endif txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } error = hn_encap(ifp, txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ KASSERT(txr->hn_agg_txd == NULL, ("encap failed w/ pending aggregating txdesc")); continue; } if (txr->hn_agg_pktleft == 0) { if (txr->hn_agg_txd != NULL) { KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); error = hn_flush_txagg(ifp, txr); if (__predict_false(error)) { atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } else { KASSERT(m_head != NULL, ("mbuf was freed")); error = hn_txpkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } } #ifdef INVARIANTS else { KASSERT(txr->hn_agg_txd != NULL, ("no aggregating txdesc")); KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); } #endif } /* Flush pending aggerated transmission. */ if (txr->hn_agg_txd != NULL) hn_flush_txagg(ifp, txr); return (sched); } static void hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } static void hn_start_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_start_txeof(struct hn_tx_ring *txr) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } #endif /* HN_IFSTART_SUPPORT */ static int hn_xmit(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; struct mbuf *m_head; int sched = 0; mtx_assert(&txr->hn_tx_lock, MA_OWNED); #ifdef HN_IFSTART_SUPPORT KASSERT(hn_use_if_start == 0, ("hn_xmit is called, when if_start is enabled")); #endif KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); if (__predict_false(txr->hn_suspended)) return (0); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) return (0); while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { struct hn_txdesc *txd; int error; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); sched = 1; break; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } error = hn_encap(ifp, txr, txd, &m_head); if (error) { /* Both txd and m_head are freed; discard */ KASSERT(txr->hn_agg_txd == NULL, ("encap failed w/ pending aggregating txdesc")); drbr_advance(ifp, txr->hn_mbuf_br); continue; } if (txr->hn_agg_pktleft == 0) { if (txr->hn_agg_txd != NULL) { KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); error = hn_flush_txagg(ifp, txr); if (__predict_false(error)) { txr->hn_oactive = 1; break; } } else { KASSERT(m_head != NULL, ("mbuf was freed")); error = hn_txpkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } } } #ifdef INVARIANTS else { KASSERT(txr->hn_agg_txd != NULL, ("no aggregating txdesc")); KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); } #endif /* Sent */ drbr_advance(ifp, txr->hn_mbuf_br); } /* Flush pending aggerated transmission. */ if (txr->hn_agg_txd != NULL) hn_flush_txagg(ifp, txr); return (sched); } static int hn_transmit(struct ifnet *ifp, struct mbuf *m) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr; int error, idx = 0; if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { struct rm_priotracker pt; rm_rlock(&sc->hn_vf_lock, &pt); if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { struct mbuf *m_bpf = NULL; int obytes, omcast; obytes = m->m_pkthdr.len; omcast = (m->m_flags & M_MCAST) != 0; if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { if (bpf_peers_present(ifp->if_bpf)) { m_bpf = m_copypacket(m, M_NOWAIT); if (m_bpf == NULL) { /* * Failed to grab a shallow * copy; tap now. */ ETHER_BPF_MTAP(ifp, m); } } } else { ETHER_BPF_MTAP(ifp, m); } error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); rm_runlock(&sc->hn_vf_lock, &pt); if (m_bpf != NULL) { if (!error) ETHER_BPF_MTAP(ifp, m_bpf); m_freem(m_bpf); } if (error == ENOBUFS) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); } else if (error) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); if (omcast) { if_inc_counter(ifp, IFCOUNTER_OMCASTS, omcast); } } return (error); } rm_runlock(&sc->hn_vf_lock, &pt); } #if defined(INET6) || defined(INET) /* * Perform TSO packet header fixup or get l2/l3 header length now, * since packet headers should be cache-hot. */ if (m->m_pkthdr.csum_flags & CSUM_TSO) { m = hn_tso_fixup(m); if (__predict_false(m == NULL)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return EIO; } } else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { m = hn_set_hlen(m); if (__predict_false(m == NULL)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return EIO; } } #endif /* * Select the TX ring based on flowid */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { #ifdef RSS uint32_t bid; if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), &bid) == 0) idx = bid % sc->hn_tx_ring_inuse; else #endif { #if defined(INET6) || defined(INET) int tcpsyn = 0; if (m->m_pkthdr.len < 128 && (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) && (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { m = hn_check_tcpsyn(m, &tcpsyn); if (__predict_false(m == NULL)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (EIO); } } #else const int tcpsyn = 0; #endif if (tcpsyn) idx = 0; else idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; } } txr = &sc->hn_tx_ring[idx]; error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); if (error) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return error; } if (txr->hn_oactive) return 0; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return 0; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); return 0; } static void hn_tx_ring_qflush(struct hn_tx_ring *txr) { struct mbuf *m; mtx_lock(&txr->hn_tx_lock); while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) m_freem(m); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct rm_priotracker pt; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) hn_tx_ring_qflush(&sc->hn_tx_ring[i]); if_qflush(ifp); rm_rlock(&sc->hn_vf_lock, &pt); if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); rm_runlock(&sc->hn_vf_lock, &pt); } static void hn_xmit_txeof(struct hn_tx_ring *txr) { if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; txr->hn_oactive = 0; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the oactive earlier, with the hope, that * others could catch up. The task will clear the * oactive again with the hn_tx_lock to avoid possible * races. */ txr->hn_oactive = 0; taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_xmit_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); txr->hn_oactive = 0; hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static int hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) { struct vmbus_chan_br cbr; struct hn_rx_ring *rxr; struct hn_tx_ring *txr = NULL; int idx, error; idx = vmbus_chan_subidx(chan); /* * Link this channel to RX/TX ring. */ KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("RX ring %d already attached", idx)); rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; rxr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", idx, vmbus_chan_id(chan)); } if (idx < sc->hn_tx_ring_inuse) { txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("TX ring %d already attached", idx)); txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; txr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", idx, vmbus_chan_id(chan)); } } /* Bind this channel to a proper CPU. */ vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); /* * Open this channel */ cbr.cbr = rxr->hn_br; cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; cbr.cbr_txsz = HN_TXBR_SIZE; cbr.cbr_rxsz = HN_RXBR_SIZE; error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); if (error) { if (error == EISCONN) { if_printf(sc->hn_ifp, "bufring is connected after " "chan%u open failure\n", vmbus_chan_id(chan)); rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; } else { if_printf(sc->hn_ifp, "open chan%u failed: %d\n", vmbus_chan_id(chan), error); } } return (error); } static void hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) { struct hn_rx_ring *rxr; int idx, error; idx = vmbus_chan_subidx(chan); /* * Link this channel to RX/TX ring. */ KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), ("RX ring %d is not attached", idx)); rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; if (idx < sc->hn_tx_ring_inuse) { struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), ("TX ring %d is not attached attached", idx)); txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; } /* * Close this channel. * * NOTE: * Channel closing does _not_ destroy the target channel. */ error = vmbus_chan_close_direct(chan); if (error == EISCONN) { if_printf(sc->hn_ifp, "chan%u bufring is connected " "after being closed\n", vmbus_chan_id(chan)); rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; } else if (error) { if_printf(sc->hn_ifp, "chan%u close failed: %d\n", vmbus_chan_id(chan), error); } } static int hn_attach_subchans(struct hn_softc *sc) { struct vmbus_channel **subchans; int subchan_cnt = sc->hn_rx_ring_inuse - 1; int i, error = 0; KASSERT(subchan_cnt > 0, ("no sub-channels")); /* Attach the sub-channels. */ subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) { int error1; error1 = hn_chan_attach(sc, subchans[i]); if (error1) { error = error1; /* Move on; all channels will be detached later. */ } } vmbus_subchan_rel(subchans, subchan_cnt); if (error) { if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); } else { if (bootverbose) { if_printf(sc->hn_ifp, "%d sub-channels attached\n", subchan_cnt); } } return (error); } static void hn_detach_allchans(struct hn_softc *sc) { struct vmbus_channel **subchans; int subchan_cnt = sc->hn_rx_ring_inuse - 1; int i; if (subchan_cnt == 0) goto back; /* Detach the sub-channels. */ subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) hn_chan_detach(sc, subchans[i]); vmbus_subchan_rel(subchans, subchan_cnt); back: /* * Detach the primary channel, _after_ all sub-channels * are detached. */ hn_chan_detach(sc, sc->hn_prichan); /* Wait for sub-channels to be destroyed, if any. */ vmbus_subchan_drain(sc->hn_prichan); #ifdef INVARIANTS for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { KASSERT((sc->hn_rx_ring[i].hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("%dth RX ring is still attached", i)); } for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { KASSERT((sc->hn_tx_ring[i].hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("%dth TX ring is still attached", i)); } #endif } static int hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) { struct vmbus_channel **subchans; int nchan, rxr_cnt, error; nchan = *nsubch + 1; if (nchan == 1) { /* * Multiple RX/TX rings are not requested. */ *nsubch = 0; return (0); } /* * Query RSS capabilities, e.g. # of RX rings, and # of indirect * table entries. */ error = hn_rndis_query_rsscaps(sc, &rxr_cnt); if (error) { /* No RSS; this is benign. */ *nsubch = 0; return (0); } if (bootverbose) { if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", rxr_cnt, nchan); } if (nchan > rxr_cnt) nchan = rxr_cnt; if (nchan == 1) { if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); *nsubch = 0; return (0); } /* * Allocate sub-channels from NVS. */ *nsubch = nchan - 1; error = hn_nvs_alloc_subchans(sc, nsubch); if (error || *nsubch == 0) { /* Failed to allocate sub-channels. */ *nsubch = 0; return (0); } /* * Wait for all sub-channels to become ready before moving on. */ subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); vmbus_subchan_rel(subchans, *nsubch); return (0); } static bool hn_synth_attachable(const struct hn_softc *sc) { int i; if (sc->hn_flags & HN_FLAG_ERRORS) return (false); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) return (false); } return (true); } /* * Make sure that the RX filter is zero after the successful * RNDIS initialization. * * NOTE: * Under certain conditions on certain versions of Hyper-V, * the RNDIS rxfilter is _not_ zero on the hypervisor side * after the successful RNDIS initialization, which breaks * the assumption of any following code (well, it breaks the * RNDIS API contract actually). Clear the RNDIS rxfilter * explicitly, drain packets sneaking through, and drain the * interrupt taskqueues scheduled due to the stealth packets. */ static void hn_rndis_init_fixat(struct hn_softc *sc, int nchan) { hn_disable_rx(sc); hn_drain_rxtx(sc, nchan); } static int hn_synth_attach(struct hn_softc *sc, int mtu) { #define ATTACHED_NVS 0x0002 #define ATTACHED_RNDIS 0x0004 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; int error, nsubch, nchan = 1, i, rndis_inited; uint32_t old_caps, attached = 0; KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, ("synthetic parts were attached")); if (!hn_synth_attachable(sc)) return (ENXIO); /* Save capabilities for later verification. */ old_caps = sc->hn_caps; sc->hn_caps = 0; /* Clear RSS stuffs. */ sc->hn_rss_ind_size = 0; sc->hn_rss_hash = 0; sc->hn_rss_hcap = 0; /* * Attach the primary channel _before_ attaching NVS and RNDIS. */ error = hn_chan_attach(sc, sc->hn_prichan); if (error) goto failed; /* * Attach NVS. */ error = hn_nvs_attach(sc, mtu); if (error) goto failed; attached |= ATTACHED_NVS; /* * Attach RNDIS _after_ NVS is attached. */ error = hn_rndis_attach(sc, mtu, &rndis_inited); if (rndis_inited) attached |= ATTACHED_RNDIS; if (error) goto failed; /* * Make sure capabilities are not changed. */ if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", old_caps, sc->hn_caps); error = ENXIO; goto failed; } /* * Allocate sub-channels for multi-TX/RX rings. * * NOTE: * The # of RX rings that can be used is equivalent to the # of * channels to be requested. */ nsubch = sc->hn_rx_ring_cnt - 1; error = hn_synth_alloc_subchans(sc, &nsubch); if (error) goto failed; /* NOTE: _Full_ synthetic parts detach is required now. */ sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; /* * Set the # of TX/RX rings that could be used according to * the # of channels that NVS offered. */ nchan = nsubch + 1; hn_set_ring_inuse(sc, nchan); if (nchan == 1) { /* Only the primary channel can be used; done */ goto back; } /* * Attach the sub-channels. * * NOTE: hn_set_ring_inuse() _must_ have been called. */ error = hn_attach_subchans(sc); if (error) goto failed; /* * Configure RSS key and indirect table _after_ all sub-channels * are attached. */ if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { /* * RSS key is not set yet; set it to the default RSS key. */ if (bootverbose) if_printf(sc->hn_ifp, "setup default RSS key\n"); #ifdef RSS rss_getkey(rss->rss_key); #else memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); #endif sc->hn_flags |= HN_FLAG_HAS_RSSKEY; } if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { /* * RSS indirect table is not set yet; set it up in round- * robin fashion. */ if (bootverbose) { if_printf(sc->hn_ifp, "setup default RSS indirect " "table\n"); } for (i = 0; i < NDIS_HASH_INDCNT; ++i) { uint32_t subidx; #ifdef RSS subidx = rss_get_indirection_to_bucket(i); #else subidx = i; #endif rss->rss_ind[i] = subidx % nchan; } sc->hn_flags |= HN_FLAG_HAS_RSSIND; } else { /* * # of usable channels may be changed, so we have to * make sure that all entries in RSS indirect table * are valid. * * NOTE: hn_set_ring_inuse() _must_ have been called. */ hn_rss_ind_fixup(sc); } sc->hn_rss_hash = sc->hn_rss_hcap; if ((sc->hn_flags & HN_FLAG_RXVF) || (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { /* NOTE: Don't reconfigure RSS; will do immediately. */ hn_vf_rss_fixup(sc, false); } error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); if (error) goto failed; back: /* * Fixup transmission aggregation setup. */ hn_set_txagg(sc); hn_rndis_init_fixat(sc, nchan); return (0); failed: if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { hn_rndis_init_fixat(sc, nchan); hn_synth_detach(sc); } else { if (attached & ATTACHED_RNDIS) { hn_rndis_init_fixat(sc, nchan); hn_rndis_detach(sc); } if (attached & ATTACHED_NVS) hn_nvs_detach(sc); hn_chan_detach(sc, sc->hn_prichan); /* Restore old capabilities. */ sc->hn_caps = old_caps; } return (error); #undef ATTACHED_RNDIS #undef ATTACHED_NVS } /* * NOTE: * The interface must have been suspended though hn_suspend(), before * this function get called. */ static void hn_synth_detach(struct hn_softc *sc) { KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, ("synthetic parts were not attached")); /* Detach the RNDIS first. */ hn_rndis_detach(sc); /* Detach NVS. */ hn_nvs_detach(sc); /* Detach all of the channels. */ hn_detach_allchans(sc); if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) { /* * Host is post-Win2016, disconnect RXBUF from primary channel here. */ int error; error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, sc->hn_rxbuf_gpadl); if (error) { if_printf(sc->hn_ifp, "rxbuf gpadl disconn failed: %d\n", error); sc->hn_flags |= HN_FLAG_RXBUF_REF; } sc->hn_rxbuf_gpadl = 0; } if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) { /* * Host is post-Win2016, disconnect chimney sending buffer from * primary channel here. */ int error; error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, sc->hn_chim_gpadl); if (error) { if_printf(sc->hn_ifp, "chim gpadl disconn failed: %d\n", error); sc->hn_flags |= HN_FLAG_CHIM_REF; } sc->hn_chim_gpadl = 0; } sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; } static void hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) { KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, ("invalid ring count %d", ring_cnt)); if (sc->hn_tx_ring_cnt > ring_cnt) sc->hn_tx_ring_inuse = ring_cnt; else sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_rx_ring_inuse = ring_cnt; #ifdef RSS if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, rss_getnumbuckets()); } #endif if (bootverbose) { if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); } } static void hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) { /* * NOTE: * The TX bufring will not be drained by the hypervisor, * if the primary channel is revoked. */ while (!vmbus_chan_rx_empty(chan) || (!vmbus_chan_is_revoked(sc->hn_prichan) && !vmbus_chan_tx_empty(chan))) pause("waitch", 1); vmbus_chan_intr_drain(chan); } static void hn_disable_rx(struct hn_softc *sc) { /* * Disable RX by clearing RX filter forcefully. */ sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ /* * Give RNDIS enough time to flush all pending data packets. */ pause("waitrx", (200 * hz) / 1000); } /* * NOTE: * RX/TX _must_ have been suspended/disabled, before this function * is called. */ static void hn_drain_rxtx(struct hn_softc *sc, int nchan) { struct vmbus_channel **subch = NULL; int nsubch; /* * Drain RX/TX bufrings and interrupts. */ nsubch = nchan - 1; if (nsubch > 0) subch = vmbus_subchan_get(sc->hn_prichan, nsubch); if (subch != NULL) { int i; for (i = 0; i < nsubch; ++i) hn_chan_drain(sc, subch[i]); } hn_chan_drain(sc, sc->hn_prichan); if (subch != NULL) vmbus_subchan_rel(subch, nsubch); } static void hn_suspend_data(struct hn_softc *sc) { struct hn_tx_ring *txr; int i; HN_LOCK_ASSERT(sc); /* * Suspend TX. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; mtx_lock(&txr->hn_tx_lock); txr->hn_suspended = 1; mtx_unlock(&txr->hn_tx_lock); /* No one is able send more packets now. */ /* * Wait for all pending sends to finish. * * NOTE: * We will _not_ receive all pending send-done, if the * primary channel is revoked. */ while (hn_tx_ring_pending(txr) && !vmbus_chan_is_revoked(sc->hn_prichan)) pause("hnwtx", 1 /* 1 tick */); } /* * Disable RX. */ hn_disable_rx(sc); /* * Drain RX/TX. */ hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); /* * Drain any pending TX tasks. * * NOTE: * The above hn_drain_rxtx() can dispatch TX tasks, so the TX * tasks will have to be drained _after_ the above hn_drain_rxtx(). */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) { ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; } static void hn_suspend_mgmt(struct hn_softc *sc) { struct task task; HN_LOCK_ASSERT(sc); /* * Make sure that hn_mgmt_taskq0 can nolonger be accessed * through hn_mgmt_taskq. */ TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); vmbus_chan_run_task(sc->hn_prichan, &task); /* * Make sure that all pending management tasks are completed. */ taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); taskqueue_drain_all(sc->hn_mgmt_taskq0); } static void hn_suspend(struct hn_softc *sc) { /* Disable polling. */ hn_polling(sc, 0); /* * If the non-transparent mode VF is activated, the synthetic * device is receiving packets, so the data path of the * synthetic device must be suspended. */ if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || (sc->hn_flags & HN_FLAG_RXVF)) hn_suspend_data(sc); hn_suspend_mgmt(sc); } static void hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) { int i; KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, ("invalid TX ring count %d", tx_ring_cnt)); for (i = 0; i < tx_ring_cnt; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; mtx_lock(&txr->hn_tx_lock); txr->hn_suspended = 0; mtx_unlock(&txr->hn_tx_lock); } } static void hn_resume_data(struct hn_softc *sc) { int i; HN_LOCK_ASSERT(sc); /* * Re-enable RX. */ hn_rxfilter_config(sc); /* * Make sure to clear suspend status on "all" TX rings, * since hn_tx_ring_inuse can be changed after * hn_suspend_data(). */ hn_resume_tx(sc, sc->hn_tx_ring_cnt); #ifdef HN_IFSTART_SUPPORT if (!hn_use_if_start) #endif { /* * Flush unused drbrs, since hn_tx_ring_inuse may be * reduced. */ for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) hn_tx_ring_qflush(&sc->hn_tx_ring[i]); } /* * Kick start TX. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; /* * Use txeof task, so that any pending oactive can be * cleared properly. */ taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_resume_mgmt(struct hn_softc *sc) { sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; /* * Kick off network change detection, if it was pending. * If no network change was pending, start link status * checks, which is more lightweight than network change * detection. */ if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) hn_change_network(sc); else hn_update_link_status(sc); } static void hn_resume(struct hn_softc *sc) { /* * If the non-transparent mode VF is activated, the synthetic * device have to receive packets, so the data path of the * synthetic device must be resumed. */ if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || (sc->hn_flags & HN_FLAG_RXVF)) hn_resume_data(sc); /* * Don't resume link status change if VF is attached/activated. * - In the non-transparent VF mode, the synthetic device marks * link down until the VF is deactivated; i.e. VF is down. * - In transparent VF mode, VF's media status is used until * the VF is detached. */ if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) hn_resume_mgmt(sc); /* * Re-enable polling if this interface is running and * the polling is requested. */ if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) hn_polling(sc, sc->hn_pollhz); } static void hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) { const struct rndis_status_msg *msg; int ofs; if (dlen < sizeof(*msg)) { if_printf(sc->hn_ifp, "invalid RNDIS status\n"); return; } msg = data; switch (msg->rm_status) { case RNDIS_STATUS_MEDIA_CONNECT: case RNDIS_STATUS_MEDIA_DISCONNECT: hn_update_link_status(sc); break; case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: case RNDIS_STATUS_LINK_SPEED_CHANGE: /* Not really useful; ignore. */ break; case RNDIS_STATUS_NETWORK_CHANGE: ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); if (dlen < ofs + msg->rm_stbuflen || msg->rm_stbuflen < sizeof(uint32_t)) { if_printf(sc->hn_ifp, "network changed\n"); } else { uint32_t change; memcpy(&change, ((const uint8_t *)msg) + ofs, sizeof(change)); if_printf(sc->hn_ifp, "network changed, change %u\n", change); } hn_change_network(sc); break; default: if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", msg->rm_status); break; } } static int hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) { const struct rndis_pktinfo *pi = info_data; uint32_t mask = 0; while (info_dlen != 0) { const void *data; uint32_t dlen; if (__predict_false(info_dlen < sizeof(*pi))) return (EINVAL); if (__predict_false(info_dlen < pi->rm_size)) return (EINVAL); info_dlen -= pi->rm_size; if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) return (EINVAL); if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) return (EINVAL); dlen = pi->rm_size - pi->rm_pktinfooffset; data = pi->rm_data; switch (pi->rm_type) { case NDIS_PKTINFO_TYPE_VLAN: if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) return (EINVAL); info->vlan_info = *((const uint32_t *)data); mask |= HN_RXINFO_VLAN; break; case NDIS_PKTINFO_TYPE_CSUM: if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) return (EINVAL); info->csum_info = *((const uint32_t *)data); mask |= HN_RXINFO_CSUM; break; case HN_NDIS_PKTINFO_TYPE_HASHVAL: if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) return (EINVAL); info->hash_value = *((const uint32_t *)data); mask |= HN_RXINFO_HASHVAL; break; case HN_NDIS_PKTINFO_TYPE_HASHINF: if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) return (EINVAL); info->hash_info = *((const uint32_t *)data); mask |= HN_RXINFO_HASHINF; break; default: goto next; } if (mask == HN_RXINFO_ALL) { /* All found; done */ break; } next: pi = (const struct rndis_pktinfo *) ((const uint8_t *)pi + pi->rm_size); } /* * Final fixup. * - If there is no hash value, invalidate the hash info. */ if ((mask & HN_RXINFO_HASHVAL) == 0) info->hash_info = HN_NDIS_HASH_INFO_INVALID; return (0); } static __inline bool hn_rndis_check_overlap(int off, int len, int check_off, int check_len) { if (off < check_off) { if (__predict_true(off + len <= check_off)) return (false); } else if (off > check_off) { if (__predict_true(check_off + check_len <= off)) return (false); } return (true); } static void hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) { const struct rndis_packet_msg *pkt; struct hn_rxinfo info; int data_off, pktinfo_off, data_len, pktinfo_len; /* * Check length. */ if (__predict_false(dlen < sizeof(*pkt))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); return; } pkt = data; if (__predict_false(dlen < pkt->rm_len)) { if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " "dlen %d, msglen %u\n", dlen, pkt->rm_len); return; } if (__predict_false(pkt->rm_len < pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " "msglen %u, data %u, oob %u, pktinfo %u\n", pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, pkt->rm_pktinfolen); return; } if (__predict_false(pkt->rm_datalen == 0)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); return; } /* * Check offests. */ #define IS_OFFSET_INVALID(ofs) \ ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) /* XXX Hyper-V does not meet data offset alignment requirement */ if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "data offset %u\n", pkt->rm_dataoffset); return; } if (__predict_false(pkt->rm_oobdataoffset > 0 && IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob offset %u\n", pkt->rm_oobdataoffset); return; } if (__predict_true(pkt->rm_pktinfooffset > 0) && __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo offset %u\n", pkt->rm_pktinfooffset); return; } #undef IS_OFFSET_INVALID data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); data_len = pkt->rm_datalen; pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); pktinfo_len = pkt->rm_pktinfolen; /* * Check OOB coverage. */ if (__predict_false(pkt->rm_oobdatalen != 0)) { int oob_off, oob_len; if_printf(rxr->hn_ifp, "got oobdata\n"); oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); oob_len = pkt->rm_oobdatalen; if (__predict_false(oob_off + oob_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overflow, msglen %u, oob abs %d len %d\n", pkt->rm_len, oob_off, oob_len); return; } /* * Check against data. */ if (hn_rndis_check_overlap(oob_off, oob_len, data_off, data_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overlaps data, oob abs %d len %d, " "data abs %d len %d\n", oob_off, oob_len, data_off, data_len); return; } /* * Check against pktinfo. */ if (pktinfo_len != 0 && hn_rndis_check_overlap(oob_off, oob_len, pktinfo_off, pktinfo_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overlaps pktinfo, oob abs %d len %d, " "pktinfo abs %d len %d\n", oob_off, oob_len, pktinfo_off, pktinfo_len); return; } } /* * Check per-packet-info coverage and find useful per-packet-info. */ info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; info.hash_info = HN_NDIS_HASH_INFO_INVALID; if (__predict_true(pktinfo_len != 0)) { bool overlap; int error; if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo overflow, msglen %u, " "pktinfo abs %d len %d\n", pkt->rm_len, pktinfo_off, pktinfo_len); return; } /* * Check packet info coverage. */ overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, data_off, data_len); if (__predict_false(overlap)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo overlap data, pktinfo abs %d len %d, " "data abs %d len %d\n", pktinfo_off, pktinfo_len, data_off, data_len); return; } /* * Find useful per-packet-info. */ error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, pktinfo_len, &info); if (__predict_false(error)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " "pktinfo\n"); return; } } if (__predict_false(data_off + data_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "data overflow, msglen %u, data abs %d len %d\n", pkt->rm_len, data_off, data_len); return; } hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); } static __inline void hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) { const struct rndis_msghdr *hdr; if (__predict_false(dlen < sizeof(*hdr))) { if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); return; } hdr = data; if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { /* Hot data path. */ hn_rndis_rx_data(rxr, data, dlen); /* Done! */ return; } if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); else hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); } static void hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) { const struct hn_nvs_hdr *hdr; if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { if_printf(sc->hn_ifp, "invalid nvs notify\n"); return; } hdr = VMBUS_CHANPKT_CONST_DATA(pkt); if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { /* Useless; ignore */ return; } if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); } static void hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt) { struct hn_nvs_sendctx *sndc; sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), VMBUS_CHANPKT_DATALEN(pkt)); /* * NOTE: * 'sndc' CAN NOT be accessed anymore, since it can be freed by * its callback. */ } static void hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkthdr) { const struct vmbus_chanpkt_rxbuf *pkt; const struct hn_nvs_hdr *nvs_hdr; int count, i, hlen; if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); return; } nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); /* Make sure that this is a RNDIS message. */ if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", nvs_hdr->nvs_type); return; } hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); if (__predict_false(hlen < sizeof(*pkt))) { if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); return; } pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", pkt->cp_rxbuf_id); return; } count = pkt->cp_rxbuf_cnt; if (__predict_false(hlen < __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); return; } /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ for (i = 0; i < count; ++i) { int ofs, len; ofs = pkt->cp_rxbuf[i].rb_ofs; len = pkt->cp_rxbuf[i].rb_len; if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " "ofs %d, len %d\n", i, ofs, len); continue; } hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); } /* * Ack the consumed RXBUF associated w/ this channel packet, * so that this RXBUF can be recycled by the hypervisor. */ hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); } static void hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, uint64_t tid) { struct hn_nvs_rndis_ack ack; int retries, error; ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; ack.nvs_status = HN_NVS_STATUS_OK; retries = 0; again: error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); if (__predict_false(error == EAGAIN)) { /* * NOTE: * This should _not_ happen in real world, since the * consumption of the TX bufring from the TX path is * controlled. */ if (rxr->hn_ack_failed == 0) if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); rxr->hn_ack_failed++; retries++; if (retries < 10) { DELAY(100); goto again; } /* RXBUF leaks! */ if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); } } static void hn_chan_callback(struct vmbus_channel *chan, void *xrxr) { struct hn_rx_ring *rxr = xrxr; struct hn_softc *sc = rxr->hn_ifp->if_softc; for (;;) { struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; int error, pktlen; pktlen = rxr->hn_pktbuf_len; error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); if (__predict_false(error == ENOBUFS)) { void *nbuf; int nlen; /* * Expand channel packet buffer. * * XXX * Use M_WAITOK here, since allocation failure * is fatal. */ nlen = rxr->hn_pktbuf_len * 2; while (nlen < pktlen) nlen *= 2; nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", rxr->hn_pktbuf_len, nlen); free(rxr->hn_pktbuf, M_DEVBUF); rxr->hn_pktbuf = nbuf; rxr->hn_pktbuf_len = nlen; /* Retry! */ continue; } else if (__predict_false(error == EAGAIN)) { /* No more channel packets; done! */ break; } KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); switch (pkt->cph_type) { case VMBUS_CHANPKT_TYPE_COMP: hn_nvs_handle_comp(sc, chan, pkt); break; case VMBUS_CHANPKT_TYPE_RXBUF: hn_nvs_handle_rxbuf(rxr, chan, pkt); break; case VMBUS_CHANPKT_TYPE_INBAND: hn_nvs_handle_notify(sc, pkt); break; default: if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", pkt->cph_type); break; } } hn_chan_rollup(rxr, rxr->hn_txr); } static void hn_sysinit(void *arg __unused) { int i; hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); #ifdef HN_IFSTART_SUPPORT /* * Don't use ifnet.if_start if transparent VF mode is requested; * mainly due to the IFF_DRV_OACTIVE flag. */ if (hn_xpnt_vf && hn_use_if_start) { hn_use_if_start = 0; printf("hn: tranparent VF mode, if_transmit will be used, " "instead of if_start\n"); } #endif if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { printf("hn: invalid transparent VF attach routing " "wait timeout %d, reset to %d\n", hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; } /* * Initialize VF map. */ rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); hn_vfmap_size = HN_VFMAP_SIZE_DEF; hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, M_WAITOK | M_ZERO); /* * Fix the # of TX taskqueues. */ if (hn_tx_taskq_cnt <= 0) hn_tx_taskq_cnt = 1; else if (hn_tx_taskq_cnt > mp_ncpus) hn_tx_taskq_cnt = mp_ncpus; /* * Fix the TX taskqueue mode. */ switch (hn_tx_taskq_mode) { case HN_TX_TASKQ_M_INDEP: case HN_TX_TASKQ_M_GLOBAL: case HN_TX_TASKQ_M_EVTTQ: break; default: hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; break; } if (vm_guest != VM_GUEST_HV) return; if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) return; hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), M_DEVBUF, M_WAITOK); for (i = 0; i < hn_tx_taskq_cnt; ++i) { hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskque[i]); taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, "hn tx%d", i); } } SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); static void hn_sysuninit(void *arg __unused) { if (hn_tx_taskque != NULL) { int i; for (i = 0; i < hn_tx_taskq_cnt; ++i) taskqueue_free(hn_tx_taskque[i]); free(hn_tx_taskque, M_DEVBUF); } if (hn_vfmap != NULL) free(hn_vfmap, M_DEVBUF); rm_destroy(&hn_vfmap_lock); counter_u64_free(hn_udpcs_fixup); } SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); Index: head/sys/dev/if_ndis/if_ndis.c =================================================================== --- head/sys/dev/if_ndis/if_ndis.c (revision 357009) +++ head/sys/dev/if_ndis/if_ndis.c (revision 357010) @@ -1,3423 +1,3424 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * WPA support originally contributed by Arvind Srinivasan * then hacked upon mercilessly by my. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NDIS_DEBUG #ifdef NDIS_DEBUG #define DPRINTF(x) do { if (ndis_debug > 0) printf x; } while (0) int ndis_debug = 0; SYSCTL_INT(_debug, OID_AUTO, ndis, CTLFLAG_RW, &ndis_debug, 0, "if_ndis debug level"); #else #define DPRINTF(x) #endif SYSCTL_DECL(_hw_ndisusb); int ndisusb_halt = 1; SYSCTL_INT(_hw_ndisusb, OID_AUTO, halt, CTLFLAG_RW, &ndisusb_halt, 0, "Halt NDIS USB driver when it's attached"); /* 0 - 30 dBm to mW conversion table */ static const uint16_t dBm2mW[] = { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 25, 28, 32, 35, 40, 45, 50, 56, 63, 71, 79, 89, 100, 112, 126, 141, 158, 178, 200, 224, 251, 282, 316, 355, 398, 447, 501, 562, 631, 708, 794, 891, 1000 }; MODULE_DEPEND(ndis, ether, 1, 1, 1); MODULE_DEPEND(ndis, wlan, 1, 1, 1); MODULE_DEPEND(ndis, ndisapi, 1, 1, 1); MODULE_VERSION(ndis, 1); int ndis_attach (device_t); int ndis_detach (device_t); int ndis_suspend (device_t); int ndis_resume (device_t); void ndis_shutdown (device_t); int ndisdrv_modevent (module_t, int, void *); static void ndis_txeof (ndis_handle, ndis_packet *, ndis_status); static void ndis_rxeof (ndis_handle, ndis_packet **, uint32_t); static void ndis_rxeof_eth (ndis_handle, ndis_handle, char *, void *, uint32_t, void *, uint32_t, uint32_t); static void ndis_rxeof_done (ndis_handle); static void ndis_rxeof_xfr (kdpc *, ndis_handle, void *, void *); static void ndis_rxeof_xfr_done (ndis_handle, ndis_packet *, uint32_t, uint32_t); static void ndis_linksts (ndis_handle, ndis_status, void *, uint32_t); static void ndis_linksts_done (ndis_handle); /* We need to wrap these functions for amd64. */ static funcptr ndis_txeof_wrap; static funcptr ndis_rxeof_wrap; static funcptr ndis_rxeof_eth_wrap; static funcptr ndis_rxeof_done_wrap; static funcptr ndis_rxeof_xfr_wrap; static funcptr ndis_rxeof_xfr_done_wrap; static funcptr ndis_linksts_wrap; static funcptr ndis_linksts_done_wrap; static funcptr ndis_ticktask_wrap; static funcptr ndis_ifstarttask_wrap; static funcptr ndis_resettask_wrap; static funcptr ndis_inputtask_wrap; static struct ieee80211vap *ndis_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void ndis_vap_delete (struct ieee80211vap *); static void ndis_tick (void *); static void ndis_ticktask (device_object *, void *); static int ndis_raw_xmit (struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void ndis_update_mcast (struct ieee80211com *); static void ndis_update_promisc (struct ieee80211com *); static void ndis_ifstart (struct ifnet *); static void ndis_ifstarttask (device_object *, void *); static void ndis_resettask (device_object *, void *); static void ndis_inputtask (device_object *, void *); static int ndis_ifioctl (struct ifnet *, u_long, caddr_t); static int ndis_newstate (struct ieee80211vap *, enum ieee80211_state, int); static int ndis_nettype_chan (uint32_t); static int ndis_nettype_mode (uint32_t); static void ndis_scan (void *); static void ndis_scan_results (struct ndis_softc *); static void ndis_scan_start (struct ieee80211com *); static void ndis_scan_end (struct ieee80211com *); static void ndis_set_channel (struct ieee80211com *); static void ndis_scan_curchan (struct ieee80211_scan_state *, unsigned long); static void ndis_scan_mindwell (struct ieee80211_scan_state *); static void ndis_init (void *); static void ndis_stop (struct ndis_softc *); static int ndis_ifmedia_upd (struct ifnet *); static void ndis_ifmedia_sts (struct ifnet *, struct ifmediareq *); static int ndis_get_bssid_list (struct ndis_softc *, ndis_80211_bssid_list_ex **); static int ndis_get_assoc (struct ndis_softc *, ndis_wlan_bssid_ex **); static int ndis_probe_offload (struct ndis_softc *); static int ndis_set_offload (struct ndis_softc *); static void ndis_getstate_80211 (struct ndis_softc *); static void ndis_setstate_80211 (struct ndis_softc *); static void ndis_auth_and_assoc (struct ndis_softc *, struct ieee80211vap *); static void ndis_media_status (struct ifnet *, struct ifmediareq *); static int ndis_set_cipher (struct ndis_softc *, int); static int ndis_set_wpa (struct ndis_softc *, void *, int); static int ndis_add_key (struct ieee80211vap *, const struct ieee80211_key *); static int ndis_del_key (struct ieee80211vap *, const struct ieee80211_key *); static void ndis_setmulti (struct ndis_softc *); static void ndis_map_sclist (void *, bus_dma_segment_t *, int, bus_size_t, int); static int ndis_ifattach(struct ndis_softc *); static int ndis_80211attach(struct ndis_softc *); static int ndis_80211ioctl(struct ieee80211com *, u_long , void *); static int ndis_80211transmit(struct ieee80211com *, struct mbuf *); static void ndis_80211parent(struct ieee80211com *); static int ndisdrv_loaded = 0; /* * This routine should call windrv_load() once for each driver * image. This will do the relocation and dynalinking for the * image, and create a Windows driver object which will be * saved in our driver database. */ int ndisdrv_modevent(mod, cmd, arg) module_t mod; int cmd; void *arg; { int error = 0; switch (cmd) { case MOD_LOAD: ndisdrv_loaded++; if (ndisdrv_loaded > 1) break; windrv_wrap((funcptr)ndis_rxeof, &ndis_rxeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_eth, &ndis_rxeof_eth_wrap, 8, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_done, &ndis_rxeof_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr, &ndis_rxeof_xfr_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr_done, &ndis_rxeof_xfr_done_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_txeof, &ndis_txeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts, &ndis_linksts_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts_done, &ndis_linksts_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ticktask, &ndis_ticktask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ifstarttask, &ndis_ifstarttask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_resettask, &ndis_resettask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_inputtask, &ndis_inputtask_wrap, 2, WINDRV_WRAP_STDCALL); break; case MOD_UNLOAD: ndisdrv_loaded--; if (ndisdrv_loaded > 0) break; /* fallthrough */ case MOD_SHUTDOWN: windrv_unwrap(ndis_rxeof_wrap); windrv_unwrap(ndis_rxeof_eth_wrap); windrv_unwrap(ndis_rxeof_done_wrap); windrv_unwrap(ndis_rxeof_xfr_wrap); windrv_unwrap(ndis_rxeof_xfr_done_wrap); windrv_unwrap(ndis_txeof_wrap); windrv_unwrap(ndis_linksts_wrap); windrv_unwrap(ndis_linksts_done_wrap); windrv_unwrap(ndis_ticktask_wrap); windrv_unwrap(ndis_ifstarttask_wrap); windrv_unwrap(ndis_resettask_wrap); windrv_unwrap(ndis_inputtask_wrap); break; default: error = EINVAL; break; } return (error); } struct mclist_ctx { uint8_t *mclist; int mclistsz; }; static u_int ndis_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { struct mclist_ctx *ctx = arg; if (cnt < ctx->mclistsz) bcopy(LLADDR(sdl), ctx->mclist + (ETHER_ADDR_LEN * cnt), ETHER_ADDR_LEN); return (1); } /* * Program the 64-bit multicast hash filter. */ static void ndis_setmulti(sc) struct ndis_softc *sc; { struct ifnet *ifp; struct mclist_ctx ctx; int len, error; if (!NDIS_INITIALIZED(sc)) return; if (sc->ndis_80211) return; ifp = sc->ifp; if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set allmulti failed: %d\n", error); return; } if (if_llmaddr_count(ifp) == 0) return; len = sizeof(ctx.mclistsz); ndis_get_info(sc, OID_802_3_MAXIMUM_LIST_SIZE, &ctx.mclistsz, &len); ctx.mclist = malloc(ETHER_ADDR_LEN * ctx.mclistsz, M_TEMP, M_NOWAIT | M_ZERO); if (ctx.mclist == NULL) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; goto out; } sc->ndis_filter |= NDIS_PACKET_TYPE_MULTICAST; len = if_foreach_llmaddr(ifp, ndis_copy_maddr, &ctx); if (len > ctx.mclistsz) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; goto out; } len = len * ETHER_ADDR_LEN; error = ndis_set_info(sc, OID_802_3_MULTICAST_LIST, ctx.mclist, &len); if (error) { device_printf(sc->ndis_dev, "set mclist failed: %d\n", error); sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; } out: free(ctx.mclist, M_TEMP); len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set multi failed: %d\n", error); } static int ndis_set_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc; struct ifnet *ifp; int len, error; if (!NDIS_INITIALIZED(sc)) return (EINVAL); if (sc->ndis_80211) return (EINVAL); /* See if there's anything to set. */ ifp = sc->ifp; error = ndis_probe_offload(sc); if (error) return (error); if (sc->ndis_hwassist == 0 && ifp->if_capabilities == 0) return (0); len = sizeof(ndis_task_offload_hdr) + sizeof(ndis_task_offload) + sizeof(ndis_task_tcpip_csum); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_offset_firsttask = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); nto->nto_vers = NDIS_TASK_OFFLOAD_VERSION; nto->nto_len = sizeof(ndis_task_offload); nto->nto_task = NDIS_TASK_TCPIP_CSUM; nto->nto_offset_nexttask = 0; nto->nto_taskbuflen = sizeof(ndis_task_tcpip_csum); nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; if (ifp->if_capenable & IFCAP_TXCSUM) nttc->nttc_v4tx = sc->ndis_v4tx; if (ifp->if_capenable & IFCAP_RXCSUM) nttc->nttc_v4rx = sc->ndis_v4rx; error = ndis_set_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); free(ntoh, M_TEMP); return (error); } static int ndis_probe_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc = NULL; struct ifnet *ifp; int len, error, dummy; ifp = sc->ifp; len = sizeof(dummy); error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, &dummy, &len); if (error != ENOSPC) return (error); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); if (error) { free(ntoh, M_TEMP); return (error); } if (ntoh->ntoh_vers != NDIS_TASK_OFFLOAD_VERSION) { free(ntoh, M_TEMP); return (EINVAL); } nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); while (1) { switch (nto->nto_task) { case NDIS_TASK_TCPIP_CSUM: nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; break; /* Don't handle these yet. */ case NDIS_TASK_IPSEC: case NDIS_TASK_TCP_LARGESEND: default: break; } if (nto->nto_offset_nexttask == 0) break; nto = (ndis_task_offload *)((char *)nto + nto->nto_offset_nexttask); } if (nttc == NULL) { free(ntoh, M_TEMP); return (ENOENT); } sc->ndis_v4tx = nttc->nttc_v4tx; sc->ndis_v4rx = nttc->nttc_v4rx; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_IP_CSUM) sc->ndis_hwassist |= CSUM_IP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_TCP_CSUM) sc->ndis_hwassist |= CSUM_TCP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_UDP_CSUM) sc->ndis_hwassist |= CSUM_UDP; if (sc->ndis_hwassist) ifp->if_capabilities |= IFCAP_TXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_IP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_TCP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_UDP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; free(ntoh, M_TEMP); return (0); } static int ndis_nettype_chan(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_CHAN_FHSS); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_CHAN_B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_CHAN_A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_CHAN_G); } DPRINTF(("unknown channel nettype %d\n", type)); return (IEEE80211_CHAN_B); /* Default to 11B chan */ } static int ndis_nettype_mode(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_MODE_FH); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_MODE_11B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_MODE_11A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_MODE_11G); } DPRINTF(("unknown mode nettype %d\n", type)); return (IEEE80211_MODE_AUTO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ int ndis_attach(device_t dev) { struct ndis_softc *sc; driver_object *pdrv; device_object *pdo; int error = 0, len; int i; sc = device_get_softc(dev); mtx_init(&sc->ndis_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); KeInitializeSpinLock(&sc->ndis_rxlock); KeInitializeSpinLock(&sc->ndisusb_tasklock); KeInitializeSpinLock(&sc->ndisusb_xferdonelock); InitializeListHead(&sc->ndis_shlist); InitializeListHead(&sc->ndisusb_tasklist); InitializeListHead(&sc->ndisusb_xferdonelist); callout_init(&sc->ndis_stat_callout, 1); mbufq_init(&sc->ndis_rxqueue, INT_MAX); /* XXXGL: sane maximum */ /* Create sysctl registry nodes */ ndis_create_sysctls(sc); /* Find the PDO for this device instance. */ if (sc->ndis_iftype == PCIBus) pdrv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) pdrv = windrv_lookup(0, "PCCARD Bus"); else pdrv = windrv_lookup(0, "USB Bus"); pdo = windrv_find_pdo(pdrv, dev); /* * Create a new functional device object for this * device. This is what creates the miniport block * for this device instance. */ if (NdisAddDevice(sc->ndis_dobj, pdo) != STATUS_SUCCESS) { device_printf(dev, "failed to create FDO!\n"); error = ENXIO; goto fail; } /* Tell the user what version of the API the driver is using. */ device_printf(dev, "NDIS API version: %d.%d\n", sc->ndis_chars->nmc_version_major, sc->ndis_chars->nmc_version_minor); /* Do resource conversion. */ if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus) ndis_convert_res(sc); else sc->ndis_block->nmb_rlist = NULL; /* Install our RX and TX interrupt handlers. */ sc->ndis_block->nmb_senddone_func = ndis_txeof_wrap; sc->ndis_block->nmb_pktind_func = ndis_rxeof_wrap; sc->ndis_block->nmb_ethrxindicate_func = ndis_rxeof_eth_wrap; sc->ndis_block->nmb_ethrxdone_func = ndis_rxeof_done_wrap; sc->ndis_block->nmb_tdcond_func = ndis_rxeof_xfr_done_wrap; /* Override the status handler so we can detect link changes. */ sc->ndis_block->nmb_status_func = ndis_linksts_wrap; sc->ndis_block->nmb_statusdone_func = ndis_linksts_done_wrap; /* Set up work item handlers. */ sc->ndis_tickitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_startitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_resetitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_inputitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_xferdoneitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_taskitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); KeInitializeDpc(&sc->ndis_rxdpc, ndis_rxeof_xfr_wrap, sc->ndis_block); /* Call driver's init routine. */ if (ndis_init_nic(sc)) { device_printf(dev, "init handler failed\n"); error = ENXIO; goto fail; } /* * Figure out how big to make the TX buffer pool. */ len = sizeof(sc->ndis_maxpkts); if (ndis_get_info(sc, OID_GEN_MAXIMUM_SEND_PACKETS, &sc->ndis_maxpkts, &len)) { device_printf(dev, "failed to get max TX packets\n"); error = ENXIO; goto fail; } /* * If this is a deserialized miniport, we don't have * to honor the OID_GEN_MAXIMUM_SEND_PACKETS result. */ if (!NDIS_SERIALIZED(sc->ndis_block)) sc->ndis_maxpkts = NDIS_TXPKTS; /* Enforce some sanity, just in case. */ if (sc->ndis_maxpkts == 0) sc->ndis_maxpkts = 10; sc->ndis_txarray = malloc(sizeof(ndis_packet *) * sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO); /* Allocate a pool of ndis_packets for TX encapsulation. */ NdisAllocatePacketPool(&i, &sc->ndis_txpool, sc->ndis_maxpkts, PROTOCOL_RESERVED_SIZE_IN_PACKET); if (i != NDIS_STATUS_SUCCESS) { sc->ndis_txpool = NULL; device_printf(dev, "failed to allocate TX packet pool"); error = ENOMEM; goto fail; } sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_oidcnt = 0; /* Get supported oid list. */ ndis_get_supported_oids(sc, &sc->ndis_oids, &sc->ndis_oidcnt); /* If the NDIS module requested scatter/gather, init maps. */ if (sc->ndis_sc) ndis_init_dma(sc); /* * See if the OID_802_11_CONFIGURATION OID is * supported by this driver. If it is, then this an 802.11 * wireless driver, and we should set up media for wireless. */ for (i = 0; i < sc->ndis_oidcnt; i++) if (sc->ndis_oids[i] == OID_802_11_CONFIGURATION) { sc->ndis_80211 = 1; break; } if (sc->ndis_80211) error = ndis_80211attach(sc); else error = ndis_ifattach(sc); fail: if (error) { ndis_detach(dev); return (error); } if (sc->ndis_iftype == PNPBus && ndisusb_halt == 0) return (error); DPRINTF(("attach done.\n")); /* We're done talking to the NIC for now; halt it. */ ndis_halt_nic(sc); DPRINTF(("halting done.\n")); return (error); } static int ndis_80211attach(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; ndis_80211_rates_ex rates; struct ndis_80211_nettype_list *ntl; uint32_t arg; int mode, i, r, len, nonettypes = 1; uint8_t bands[IEEE80211_MODE_BYTES] = { 0 }; callout_init(&sc->ndis_scan_callout, 1); ic->ic_softc = sc; ic->ic_ioctl = ndis_80211ioctl; ic->ic_name = device_get_nameunit(sc->ndis_dev); ic->ic_opmode = IEEE80211_M_STA; ic->ic_phytype = IEEE80211_T_DS; ic->ic_caps = IEEE80211_C_8023ENCAP | IEEE80211_C_STA | IEEE80211_C_IBSS; setbit(ic->ic_modecaps, IEEE80211_MODE_AUTO); len = 0; r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, NULL, &len); if (r != ENOSPC) goto nonettypes; ntl = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO); r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, ntl, &len); if (r != 0) { free(ntl, M_DEVBUF); goto nonettypes; } for (i = 0; i < ntl->ntl_items; i++) { mode = ndis_nettype_mode(ntl->ntl_type[i]); if (mode) { nonettypes = 0; setbit(ic->ic_modecaps, mode); setbit(bands, mode); } else device_printf(sc->ndis_dev, "Unknown nettype %d\n", ntl->ntl_type[i]); } free(ntl, M_DEVBUF); nonettypes: /* Default to 11b channels if the card did not supply any */ if (nonettypes) { setbit(ic->ic_modecaps, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11B); } len = sizeof(rates); bzero((char *)&rates, len); r = ndis_get_info(sc, OID_802_11_SUPPORTED_RATES, (void *)rates, &len); if (r != 0) device_printf(sc->ndis_dev, "get rates failed: 0x%x\n", r); /* * Since the supported rates only up to 8 can be supported, * if this is not 802.11b we're just going to be faking it * all up to heck. */ #define TESTSETRATE(x, y) \ do { \ int i; \ for (i = 0; i < ic->ic_sup_rates[x].rs_nrates; i++) { \ if (ic->ic_sup_rates[x].rs_rates[i] == (y)) \ break; \ } \ if (i == ic->ic_sup_rates[x].rs_nrates) { \ ic->ic_sup_rates[x].rs_rates[i] = (y); \ ic->ic_sup_rates[x].rs_nrates++; \ } \ } while (0) #define SETRATE(x, y) \ ic->ic_sup_rates[x].rs_rates[ic->ic_sup_rates[x].rs_nrates] = (y) #define INCRATE(x) \ ic->ic_sup_rates[x].rs_nrates++ ic->ic_curmode = IEEE80211_MODE_AUTO; if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) ic->ic_sup_rates[IEEE80211_MODE_11A].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) ic->ic_sup_rates[IEEE80211_MODE_11B].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) ic->ic_sup_rates[IEEE80211_MODE_11G].rs_nrates = 0; for (i = 0; i < len; i++) { switch (rates[i] & IEEE80211_RATE_VAL) { case 2: case 4: case 11: case 10: case 22: if (isclr(ic->ic_modecaps, IEEE80211_MODE_11B)) { /* Lazy-init 802.11b. */ setbit(ic->ic_modecaps, IEEE80211_MODE_11B); ic->ic_sup_rates[IEEE80211_MODE_11B]. rs_nrates = 0; } SETRATE(IEEE80211_MODE_11B, rates[i]); INCRATE(IEEE80211_MODE_11B); break; default: if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { SETRATE(IEEE80211_MODE_11A, rates[i]); INCRATE(IEEE80211_MODE_11A); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { SETRATE(IEEE80211_MODE_11G, rates[i]); INCRATE(IEEE80211_MODE_11G); } break; } } /* * If the hardware supports 802.11g, it most * likely supports 802.11b and all of the * 802.11b and 802.11g speeds, so maybe we can * just cheat here. Just how in the heck do * we detect turbo modes, though? */ if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) { TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|2); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|4); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|11); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|22); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { TESTSETRATE(IEEE80211_MODE_11G, 48); TESTSETRATE(IEEE80211_MODE_11G, 72); TESTSETRATE(IEEE80211_MODE_11G, 96); TESTSETRATE(IEEE80211_MODE_11G, 108); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { TESTSETRATE(IEEE80211_MODE_11A, 48); TESTSETRATE(IEEE80211_MODE_11A, 72); TESTSETRATE(IEEE80211_MODE_11A, 96); TESTSETRATE(IEEE80211_MODE_11A, 108); } #undef SETRATE #undef INCRATE #undef TESTSETRATE ieee80211_init_channels(ic, NULL, bands); /* * To test for WPA support, we need to see if we can * set AUTHENTICATION_MODE to WPA and read it back * successfully. */ i = sizeof(arg); arg = NDIS_80211_AUTHMODE_WPA; r = ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0) { r = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0 && arg == NDIS_80211_AUTHMODE_WPA) ic->ic_caps |= IEEE80211_C_WPA; } /* * To test for supported ciphers, we set each * available encryption type in descending order. * If ENC3 works, then we have WEP, TKIP and AES. * If only ENC2 works, then we have WEP and TKIP. * If only ENC1 works, then we have just WEP. */ i = sizeof(arg); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP | IEEE80211_CRYPTO_AES_CCM; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC2ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC1ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP; got_crypto: i = sizeof(arg); r = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_PMGT; r = ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_TXPMGT; /* * Get station address from the driver. */ len = sizeof(ic->ic_macaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, &ic->ic_macaddr, &len); ieee80211_ifattach(ic); ic->ic_raw_xmit = ndis_raw_xmit; ic->ic_scan_start = ndis_scan_start; ic->ic_scan_end = ndis_scan_end; ic->ic_set_channel = ndis_set_channel; ic->ic_scan_curchan = ndis_scan_curchan; ic->ic_scan_mindwell = ndis_scan_mindwell; ic->ic_bsschan = IEEE80211_CHAN_ANYC; ic->ic_vap_create = ndis_vap_create; ic->ic_vap_delete = ndis_vap_delete; ic->ic_update_mcast = ndis_update_mcast; ic->ic_update_promisc = ndis_update_promisc; ic->ic_transmit = ndis_80211transmit; ic->ic_parent = ndis_80211parent; if (bootverbose) ieee80211_announce(ic); return (0); } static int ndis_ifattach(struct ndis_softc *sc) { struct ifnet *ifp; u_char eaddr[ETHER_ADDR_LEN]; int len; ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOSPC); sc->ifp = ifp; ifp->if_softc = sc; /* Check for task offload support. */ ndis_probe_offload(sc); /* * Get station address from the driver. */ len = sizeof(eaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, eaddr, &len); if_initname(ifp, device_get_name(sc->ndis_dev), device_get_unit(sc->ndis_dev)); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSEPOCH; ifp->if_ioctl = ndis_ifioctl; ifp->if_start = ndis_ifstart; ifp->if_init = ndis_init; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, 50); ifp->if_snd.ifq_drv_maxlen = 25; IFQ_SET_READY(&ifp->if_snd); ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = sc->ndis_hwassist; ifmedia_init(&sc->ifmedia, IFM_IMASK, ndis_ifmedia_upd, ndis_ifmedia_sts); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL); ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO); ether_ifattach(ifp, eaddr); return (0); } static struct ieee80211vap * ndis_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ndis_vap *nvp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; nvp = malloc(sizeof(struct ndis_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &nvp->vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override with driver methods */ nvp->newstate = vap->iv_newstate; vap->iv_newstate = ndis_newstate; /* complete setup */ ieee80211_vap_attach(vap, ieee80211_media_change, ndis_media_status, mac); ic->ic_opmode = opmode; /* install key handing routines */ vap->iv_key_set = ndis_add_key; vap->iv_key_delete = ndis_del_key; return vap; } static void ndis_vap_delete(struct ieee80211vap *vap) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; ndis_stop(sc); callout_drain(&sc->ndis_scan_callout); ieee80211_vap_detach(vap); free(nvp, M_80211_VAP); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ int ndis_detach(device_t dev) { struct ifnet *ifp; struct ndis_softc *sc; driver_object *drv; sc = device_get_softc(dev); NDIS_LOCK(sc); if (!sc->ndis_80211) ifp = sc->ifp; else ifp = NULL; if (ifp != NULL) ifp->if_flags &= ~IFF_UP; if (device_is_attached(dev)) { NDIS_UNLOCK(sc); ndis_stop(sc); if (sc->ndis_80211) ieee80211_ifdetach(&sc->ndis_ic); else if (ifp != NULL) ether_ifdetach(ifp); } else NDIS_UNLOCK(sc); if (sc->ndis_tickitem != NULL) IoFreeWorkItem(sc->ndis_tickitem); if (sc->ndis_startitem != NULL) IoFreeWorkItem(sc->ndis_startitem); if (sc->ndis_resetitem != NULL) IoFreeWorkItem(sc->ndis_resetitem); if (sc->ndis_inputitem != NULL) IoFreeWorkItem(sc->ndis_inputitem); if (sc->ndisusb_xferdoneitem != NULL) IoFreeWorkItem(sc->ndisusb_xferdoneitem); if (sc->ndisusb_taskitem != NULL) IoFreeWorkItem(sc->ndisusb_taskitem); bus_generic_detach(dev); ndis_unload_driver(sc); if (sc->ndis_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ndis_irq); if (sc->ndis_res_io) bus_release_resource(dev, SYS_RES_IOPORT, sc->ndis_io_rid, sc->ndis_res_io); if (sc->ndis_res_mem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_mem_rid, sc->ndis_res_mem); if (sc->ndis_res_altmem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_altmem_rid, sc->ndis_res_altmem); if (ifp != NULL) if_free(ifp); if (sc->ndis_sc) ndis_destroy_dma(sc); if (sc->ndis_txarray) free(sc->ndis_txarray, M_DEVBUF); if (!sc->ndis_80211) ifmedia_removeall(&sc->ifmedia); if (sc->ndis_txpool != NULL) NdisFreePacketPool(sc->ndis_txpool); /* Destroy the PDO for this device. */ if (sc->ndis_iftype == PCIBus) drv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) drv = windrv_lookup(0, "PCCARD Bus"); else drv = windrv_lookup(0, "USB Bus"); if (drv == NULL) panic("couldn't find driver object"); windrv_destroy_pdo(drv, dev); if (sc->ndis_iftype == PCIBus) bus_dma_tag_destroy(sc->ndis_parent_tag); return (0); } int ndis_suspend(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; #ifdef notdef if (NDIS_INITIALIZED(sc)) ndis_stop(sc); #endif return (0); } int ndis_resume(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * The following bunch of routines are here to support drivers that * use the NdisMEthIndicateReceive()/MiniportTransferData() mechanism. * The NdisMEthIndicateReceive() handler runs at DISPATCH_LEVEL for * serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_eth(adapter, ctx, addr, hdr, hdrlen, lookahead, lookaheadlen, pktlen) ndis_handle adapter; ndis_handle ctx; char *addr; void *hdr; uint32_t hdrlen; void *lookahead; uint32_t lookaheadlen; uint32_t pktlen; { ndis_miniport_block *block; uint8_t irql = 0; uint32_t status; ndis_buffer *b; ndis_packet *p; struct mbuf *m; ndis_ethpriv *priv; block = adapter; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return; /* Save the data provided to us so far. */ m->m_len = lookaheadlen + hdrlen; m->m_pkthdr.len = pktlen + hdrlen; m->m_next = NULL; m_copyback(m, 0, hdrlen, hdr); m_copyback(m, hdrlen, lookaheadlen, lookahead); /* Now create a fake NDIS_PACKET to hold the data */ NdisAllocatePacket(&status, &p, block->nmb_rxpool); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } p->np_m0 = m; b = IoAllocateMdl(m->m_data, m->m_pkthdr.len, FALSE, FALSE, NULL); if (b == NULL) { NdisFreePacket(p); m_freem(m); return; } p->np_private.npp_head = p->np_private.npp_tail = b; p->np_private.npp_totlen = m->m_pkthdr.len; /* Save the packet RX context somewhere. */ priv = (ndis_ethpriv *)&p->np_protocolreserved; priv->nep_ctx = ctx; if (!NDIS_SERIALIZED(block)) KeAcquireSpinLock(&block->nmb_lock, &irql); InsertTailList((&block->nmb_packetlist), (&p->np_list)); if (!NDIS_SERIALIZED(block)) KeReleaseSpinLock(&block->nmb_lock, irql); } /* * NdisMEthIndicateReceiveComplete() handler, runs at DISPATCH_LEVEL * for serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_done(adapter) ndis_handle adapter; { struct ndis_softc *sc; ndis_miniport_block *block; block = adapter; /* Schedule transfer/RX of queued packets. */ sc = device_get_softc(block->nmb_physdeviceobj->do_devext); KeInsertQueueDpc(&sc->ndis_rxdpc, NULL, NULL); } /* * MiniportTransferData() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr(dpc, adapter, sysarg1, sysarg2) kdpc *dpc; ndis_handle adapter; void *sysarg1; void *sysarg2; { ndis_miniport_block *block; struct ndis_softc *sc; ndis_packet *p; list_entry *l; uint32_t status; ndis_ethpriv *priv; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); l = block->nmb_packetlist.nle_flink; while(!IsListEmpty(&block->nmb_packetlist)) { l = RemoveHeadList((&block->nmb_packetlist)); p = CONTAINING_RECORD(l, ndis_packet, np_list); InitializeListHead((&p->np_list)); priv = (ndis_ethpriv *)&p->np_protocolreserved; m = p->np_m0; p->np_softc = sc; p->np_m0 = NULL; KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); status = MSCALL6(sc->ndis_chars->nmc_transferdata_func, p, &p->np_private.npp_totlen, block, priv->nep_ctx, m->m_len, m->m_pkthdr.len - m->m_len); KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); /* * If status is NDIS_STATUS_PENDING, do nothing and * wait for a callback to the ndis_rxeof_xfr_done() * handler. */ m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; if (status == NDIS_STATUS_SUCCESS) { IoFreeMdl(p->np_private.npp_head); NdisFreePacket(p); KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } if (status == NDIS_STATUS_FAILURE) m_freem(m); /* Advance to next packet */ l = block->nmb_packetlist.nle_flink; } KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); } /* * NdisMTransferDataComplete() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr_done(adapter, packet, status, len) ndis_handle adapter; ndis_packet *packet; uint32_t status; uint32_t len; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; IoFreeMdl(packet->np_private.npp_head); NdisFreePacket(packet); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. * * When handling received NDIS packets, the 'status' field in the * out-of-band portion of the ndis_packet has special meaning. In the * most common case, the underlying NDIS driver will set this field * to NDIS_STATUS_SUCCESS, which indicates that it's ok for us to * take possession of it. We then change the status field to * NDIS_STATUS_PENDING to tell the driver that we now own the packet, * and that we will return it at some point in the future via the * return packet handler. * * If the driver hands us a packet with a status of NDIS_STATUS_RESOURCES, * this means the driver is running out of packet/buffer resources and * wants to maintain ownership of the packet. In this case, we have to * copy the packet data into local storage and let the driver keep the * packet. */ static void ndis_rxeof(adapter, packets, pktcnt) ndis_handle adapter; ndis_packet **packets; uint32_t pktcnt; { struct ndis_softc *sc; ndis_miniport_block *block; ndis_packet *p; uint32_t s; ndis_tcpip_csum *csum; struct ifnet *ifp; struct mbuf *m0, *m; int i; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; /* * There's a slim chance the driver may indicate some packets * before we're completely ready to handle them. If we detect this, * we need to return them to the miniport and ignore them. */ if (!sc->ndis_running) { for (i = 0; i < pktcnt; i++) { p = packets[i]; if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) { p->np_refcnt++; ndis_return_packet(p); } } return; } for (i = 0; i < pktcnt; i++) { p = packets[i]; /* Stash the softc here so ptom can use it. */ p->np_softc = sc; if (ndis_ptom(&m0, p)) { device_printf(sc->ndis_dev, "ptom failed\n"); if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) ndis_return_packet(p); } else { #ifdef notdef if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) { m = m_dup(m0, M_NOWAIT); /* * NOTE: we want to destroy the mbuf here, but * we don't actually want to return it to the * driver via the return packet handler. By * bumping np_refcnt, we can prevent the * ndis_return_packet() routine from actually * doing anything. */ p->np_refcnt++; m_freem(m0); if (m == NULL) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); else m0 = m; } else p->np_oob.npo_status = NDIS_STATUS_PENDING; #endif m = m_dup(m0, M_NOWAIT); if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) p->np_refcnt++; else p->np_oob.npo_status = NDIS_STATUS_PENDING; m_freem(m0); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); continue; } m0 = m; m0->m_pkthdr.rcvif = ifp; /* Deal with checksum offload. */ if (ifp->if_capenable & IFCAP_RXCSUM && p->np_ext.npe_info[ndis_tcpipcsum_info] != NULL) { s = (uintptr_t) p->np_ext.npe_info[ndis_tcpipcsum_info]; csum = (ndis_tcpip_csum *)&s; if (csum->u.ntc_rxflags & NDIS_RXCSUM_IP_PASSED) m0->m_pkthdr.csum_flags |= CSUM_IP_CHECKED|CSUM_IP_VALID; if (csum->u.ntc_rxflags & (NDIS_RXCSUM_TCP_PASSED | NDIS_RXCSUM_UDP_PASSED)) { m0->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m0->m_pkthdr.csum_data = 0xFFFF; } } KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m0); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } } } /* * This routine is run at PASSIVE_LEVEL. We use this routine to pass * packets into the stack in order to avoid calling (*ifp->if_input)() * with any locks held (at DISPATCH_LEVEL, we'll be holding the * 'dispatch level' per-cpu sleep lock). */ static void ndis_inputtask(device_object *dobj, void *arg) { ndis_miniport_block *block; struct ndis_softc *sc = arg; struct mbuf *m; uint8_t irql; block = dobj->do_devext; KeAcquireSpinLock(&sc->ndis_rxlock, &irql); while ((m = mbufq_dequeue(&sc->ndis_rxqueue)) != NULL) { KeReleaseSpinLock(&sc->ndis_rxlock, irql); if ((sc->ndis_80211 != 0)) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) vap->iv_deliver_data(vap, vap->iv_bss, m); } else { struct ifnet *ifp = sc->ifp; (*ifp->if_input)(ifp, m); } KeAcquireSpinLock(&sc->ndis_rxlock, &irql); } KeReleaseSpinLock(&sc->ndis_rxlock, irql); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void ndis_txeof(adapter, packet, status) ndis_handle adapter; ndis_packet *packet; ndis_status status; { struct ndis_softc *sc; ndis_miniport_block *block; struct ifnet *ifp; int idx; struct mbuf *m; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; idx = packet->np_txidx; if (sc->ndis_sc) bus_dmamap_unload(sc->ndis_ttag, sc->ndis_tmaps[idx]); ndis_free_packet(packet); m_freem(m); NDIS_LOCK(sc); sc->ndis_txarray[idx] = NULL; sc->ndis_txpending++; if (!sc->ndis_80211) { struct ifnet *ifp = sc->ifp; if (status == NDIS_STATUS_SUCCESS) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; NDIS_UNLOCK(sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); DPRINTF(("%s: ndis_ifstarttask_wrap sc=%p\n", __func__, sc)); } static void ndis_linksts(adapter, status, sbuf, slen) ndis_handle adapter; ndis_status status; void *sbuf; uint32_t slen; { ndis_miniport_block *block; struct ndis_softc *sc; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); sc->ndis_sts = status; /* Event list is all full up, drop this one. */ NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtpidx].ne_sts) { NDIS_UNLOCK(sc); return; } /* Cache the event. */ if (slen) { sc->ndis_evt[sc->ndis_evtpidx].ne_buf = malloc(slen, M_TEMP, M_NOWAIT); if (sc->ndis_evt[sc->ndis_evtpidx].ne_buf == NULL) { NDIS_UNLOCK(sc); return; } bcopy((char *)sbuf, sc->ndis_evt[sc->ndis_evtpidx].ne_buf, slen); } sc->ndis_evt[sc->ndis_evtpidx].ne_sts = status; sc->ndis_evt[sc->ndis_evtpidx].ne_len = slen; NDIS_EVTINC(sc->ndis_evtpidx); NDIS_UNLOCK(sc); } static void ndis_linksts_done(adapter) ndis_handle adapter; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; if (!NDIS_INITIALIZED(sc)) return; switch (sc->ndis_sts) { case NDIS_STATUS_MEDIA_CONNECT: IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); break; case NDIS_STATUS_MEDIA_DISCONNECT: if (sc->ndis_link) IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); break; default: break; } } static void ndis_tick(xsc) void *xsc; { struct ndis_softc *sc; sc = xsc; if (sc->ndis_hang_timer && --sc->ndis_hang_timer == 0) { IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; } if (sc->ndis_tx_timer && --sc->ndis_tx_timer == 0) { if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); device_printf(sc->ndis_dev, "watchdog timeout\n"); IoQueueWorkItem(sc->ndis_resetitem, (io_workitem_func)ndis_resettask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); } callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); } static void ndis_ticktask(device_object *d, void *xsc) { struct ndis_softc *sc = xsc; ndis_checkforhang_handler hangfunc; uint8_t rval; NDIS_LOCK(sc); if (!NDIS_INITIALIZED(sc)) { NDIS_UNLOCK(sc); return; } NDIS_UNLOCK(sc); hangfunc = sc->ndis_chars->nmc_checkhang_func; if (hangfunc != NULL) { rval = MSCALL1(hangfunc, sc->ndis_block->nmb_miniportadapterctx); if (rval == TRUE) { ndis_reset_nic(sc); return; } } NDIS_LOCK(sc); if (sc->ndis_link == 0 && sc->ndis_sts == NDIS_STATUS_MEDIA_CONNECT) { sc->ndis_link = 1; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ndis_getstate_80211(sc); ieee80211_new_state(vap, IEEE80211_S_RUN, -1); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_UP); } } else if_link_state_change(sc->ifp, LINK_STATE_UP); } if (sc->ndis_link == 1 && sc->ndis_sts == NDIS_STATUS_MEDIA_DISCONNECT) { sc->ndis_link = 0; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_DOWN); } } else if_link_state_change(sc->ifp, LINK_STATE_DOWN); } NDIS_UNLOCK(sc); } static void ndis_map_sclist(arg, segs, nseg, mapsize, error) void *arg; bus_dma_segment_t *segs; int nseg; bus_size_t mapsize; int error; { struct ndis_sc_list *sclist; int i; if (error || arg == NULL) return; sclist = arg; sclist->nsl_frags = nseg; for (i = 0; i < nseg; i++) { sclist->nsl_elements[i].nse_addr.np_quad = segs[i].ds_addr; sclist->nsl_elements[i].nse_len = segs[i].ds_len; } } static int ndis_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { /* no support; just discard */ m_freem(m); ieee80211_free_node(ni); return (0); } static void ndis_update_mcast(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_setmulti(sc); } static void ndis_update_promisc(struct ieee80211com *ic) { /* not supported */ } static void ndis_ifstarttask(device_object *d, void *arg) { struct ndis_softc *sc = arg; DPRINTF(("%s: sc=%p, ifp=%p\n", __func__, sc, sc->ifp)); if (sc->ndis_80211) return; struct ifnet *ifp = sc->ifp; if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ndis_ifstart(ifp); } /* * Main transmit routine. To make NDIS drivers happy, we need to * transform mbuf chains into NDIS packets and feed them to the * send packet routines. Most drivers allow you to send several * packets at once (up to the maxpkts limit). Unfortunately, rather * that accepting them in the form of a linked list, they expect * a contiguous array of pointers to packets. * * For those drivers which use the NDIS scatter/gather DMA mechanism, * we need to perform busdma work here. Those that use map registers * will do the mapping themselves on a buffer by buffer basis. */ static void ndis_ifstart(struct ifnet *ifp) { struct ndis_softc *sc; struct mbuf *m = NULL; ndis_packet **p0 = NULL, *p = NULL; ndis_tcpip_csum *csum; int pcnt = 0, status; sc = ifp->if_softc; NDIS_LOCK(sc); if (!sc->ndis_link || ifp->if_drv_flags & IFF_DRV_OACTIVE) { NDIS_UNLOCK(sc); return; } p0 = &sc->ndis_txarray[sc->ndis_txidx]; while(sc->ndis_txpending) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) break; if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { IFQ_DRV_PREPEND(&ifp->if_snd, m); NDIS_UNLOCK(sc); return; } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } /* Handle checksum offload. */ if (ifp->if_capenable & IFCAP_TXCSUM && m->m_pkthdr.csum_flags) { csum = (ndis_tcpip_csum *) &p->np_ext.npe_info[ndis_tcpipcsum_info]; csum->u.ntc_txflags = NDIS_TXCSUM_DO_IPV4; if (m->m_pkthdr.csum_flags & CSUM_IP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_IP; if (m->m_pkthdr.csum_flags & CSUM_TCP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_TCP; if (m->m_pkthdr.csum_flags & CSUM_UDP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_UDP; p->np_private.npp_flags = NDIS_PROTOCOL_ID_TCP_IP; } NDIS_INC(sc); sc->ndis_txpending--; pcnt++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if (!sc->ndis_80211) /* XXX handle 80211 */ BPF_MTAP(ifp, m); /* * The array that p0 points to must appear contiguous, * so we must not wrap past the end of sc->ndis_txarray[]. * If it looks like we're about to wrap, break out here * so the this batch of packets can be transmitted, then * wait for txeof to ask us to send the rest. */ if (sc->ndis_txidx == 0) break; } if (pcnt == 0) { NDIS_UNLOCK(sc); return; } if (sc->ndis_txpending == 0) ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, pcnt); else ndis_send_packet(sc, p); return; } static int ndis_80211transmit(struct ieee80211com *ic, struct mbuf *m) { struct ndis_softc *sc = ic->ic_softc; ndis_packet **p0 = NULL, *p = NULL; int status; NDIS_LOCK(sc); if (!sc->ndis_link || !sc->ndis_running) { NDIS_UNLOCK(sc); return (ENXIO); } if (sc->ndis_txpending == 0) { NDIS_UNLOCK(sc); return (ENOBUFS); } p0 = &sc->ndis_txarray[sc->ndis_txidx]; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) { NDIS_UNLOCK(sc); return (ENOBUFS); } if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { NDIS_UNLOCK(sc); return (ENOBUFS); } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } NDIS_INC(sc); sc->ndis_txpending--; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, 1); else ndis_send_packet(sc, p); return (0); } static void ndis_80211parent(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; /*NDIS_LOCK(sc);*/ if (ic->ic_nrunning > 0) { if (!sc->ndis_running) ndis_init(sc); } else if (sc->ndis_running) ndis_stop(sc); /*NDIS_UNLOCK(sc);*/ } static void ndis_init(void *xsc) { struct ndis_softc *sc = xsc; int i, len, error; /* * Avoid reintializing the link unnecessarily. * This should be dealt with in a better way by * fixing the upper layer modules so they don't * call ifp->if_init() quite as often. */ if (sc->ndis_link) return; /* * Cancel pending I/O and free all RX/TX buffers. */ ndis_stop(sc); if (!(sc->ndis_iftype == PNPBus && ndisusb_halt == 0)) { error = ndis_init_nic(sc); if (error != 0) { device_printf(sc->ndis_dev, "failed to initialize the device: %d\n", error); return; } } /* Program the packet filter */ sc->ndis_filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; if (sc->ndis_80211) { struct ieee80211com *ic = &sc->ndis_ic; if (ic->ic_promisc > 0) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } else { struct ifnet *ifp = sc->ifp; if (ifp->if_flags & IFF_PROMISC) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set filter failed: %d\n", error); /* * Set lookahead. */ if (sc->ndis_80211) i = ETHERMTU; else i = sc->ifp->if_mtu; len = sizeof(i); ndis_set_info(sc, OID_GEN_CURRENT_LOOKAHEAD, &i, &len); /* * Program the multicast filter, if necessary. */ ndis_setmulti(sc); /* Setup task offload. */ ndis_set_offload(sc); NDIS_LOCK(sc); sc->ndis_txidx = 0; sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_link = 0; if (!sc->ndis_80211) { if_link_state_change(sc->ifp, LINK_STATE_UNKNOWN); sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; /* * Some drivers don't set this value. The NDIS spec says * the default checkforhang timeout is "approximately 2 * seconds." We use 3 seconds, because it seems for some * drivers, exactly 2 seconds is too fast. */ if (sc->ndis_block->nmb_checkforhangsecs == 0) sc->ndis_block->nmb_checkforhangsecs = 3; sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); sc->ndis_running = 1; NDIS_UNLOCK(sc); /* XXX force handling */ if (sc->ndis_80211) ieee80211_start_all(&sc->ndis_ic); /* start all vap's */ } /* * Set media options. */ static int ndis_ifmedia_upd(ifp) struct ifnet *ifp; { struct ndis_softc *sc; sc = ifp->if_softc; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * Report current media status. */ static void ndis_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct ndis_softc *sc; uint32_t media_info; ndis_media_state linkstate; int len; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; sc = ifp->if_softc; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(linkstate); ndis_get_info(sc, OID_GEN_MEDIA_CONNECT_STATUS, (void *)&linkstate, &len); len = sizeof(media_info); ndis_get_info(sc, OID_GEN_LINK_SPEED, (void *)&media_info, &len); if (linkstate == nmc_connected) ifmr->ifm_status |= IFM_ACTIVE; switch (media_info) { case 100000: ifmr->ifm_active |= IFM_10_T; break; case 1000000: ifmr->ifm_active |= IFM_100_TX; break; case 10000000: ifmr->ifm_active |= IFM_1000_T; break; default: device_printf(sc->ndis_dev, "unknown speed: %d\n", media_info); break; } } static int ndis_set_cipher(struct ndis_softc *sc, int cipher) { struct ieee80211com *ic = &sc->ndis_ic; int rval = 0, len; uint32_t arg, save; len = sizeof(arg); if (cipher == WPA_CSE_WEP40 || cipher == WPA_CSE_WEP104) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_WEP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC1ENABLED; } if (cipher == WPA_CSE_TKIP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC2ENABLED; } if (cipher == WPA_CSE_CCMP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_AES_CCM)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; } DPRINTF(("Setting cipher to %d\n", arg)); save = arg; rval = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval) return (rval); /* Check that the cipher was set correctly. */ len = sizeof(save); rval = ndis_get_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval != 0 || arg != save) return (ENODEV); return (0); } /* * WPA is hairy to set up. Do the work in a separate routine * so we don't clutter the setstate function too much. * Important yet undocumented fact: first we have to set the * authentication mode, _then_ we enable the ciphers. If one * of the WPA authentication modes isn't enabled, the driver * might not permit the TKIP or AES ciphers to be selected. */ static int ndis_set_wpa(sc, ie, ielen) struct ndis_softc *sc; void *ie; int ielen; { struct ieee80211_ie_wpa *w; struct ndis_ie *n; char *pos; uint32_t arg; int i; /* * Apparently, the only way for us to know what ciphers * and key management/authentication mode to use is for * us to inspect the optional information element (IE) * stored in the 802.11 state machine. This IE should be * supplied by the WPA supplicant. */ w = (struct ieee80211_ie_wpa *)ie; /* Check for the right kind of IE. */ if (w->wpa_id != IEEE80211_ELEMID_VENDOR) { DPRINTF(("Incorrect IE type %d\n", w->wpa_id)); return (EINVAL); } /* Skip over the ucast cipher OIDs. */ pos = (char *)&w->wpa_uciphers[0]; pos += w->wpa_uciphercnt * sizeof(struct ndis_ie); /* Skip over the authmode count. */ pos += sizeof(u_int16_t); /* * Check for the authentication modes. I'm * pretty sure there's only supposed to be one. */ n = (struct ndis_ie *)pos; if (n->ni_val == WPA_ASE_NONE) arg = NDIS_80211_AUTHMODE_WPANONE; if (n->ni_val == WPA_ASE_8021X_UNSPEC) arg = NDIS_80211_AUTHMODE_WPA; if (n->ni_val == WPA_ASE_8021X_PSK) arg = NDIS_80211_AUTHMODE_WPAPSK; DPRINTF(("Setting WPA auth mode to %d\n", arg)); i = sizeof(arg); if (ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i)) return (ENOTSUP); i = sizeof(arg); ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); /* Now configure the desired ciphers. */ /* First, set up the multicast group cipher. */ n = (struct ndis_ie *)&w->wpa_mcipher[0]; if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); /* Now start looking around for the unicast ciphers. */ pos = (char *)&w->wpa_uciphers[0]; n = (struct ndis_ie *)pos; for (i = 0; i < w->wpa_uciphercnt; i++) { if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); n++; } return (0); } static void ndis_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct ieee80211vap *vap = ifp->if_softc; struct ndis_softc *sc = vap->iv_ic->ic_softc; uint32_t txrate; int len; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(txrate); if (ndis_get_info(sc, OID_GEN_LINK_SPEED, &txrate, &len) == 0) vap->iv_bss->ni_txrate = txrate / 5000; ieee80211_media_status(ifp, imr); } static void ndis_setstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_macaddr bssid; ndis_80211_config config; int rval = 0, len; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Disassociate and turn off radio. */ len = sizeof(arg); arg = 1; ndis_set_info(sc, OID_802_11_DISASSOCIATE, &arg, &len); /* Set network infrastructure mode. */ len = sizeof(arg); if (ic->ic_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set power management */ len = sizeof(arg); if (vap->iv_flags & IEEE80211_F_PMGTON) arg = NDIS_80211_POWERMODE_FAST_PSP; else arg = NDIS_80211_POWERMODE_CAM; ndis_set_info(sc, OID_802_11_POWER_MODE, &arg, &len); /* Set TX power */ if ((ic->ic_caps & IEEE80211_C_TXPMGT) && ic->ic_txpowlimit < nitems(dBm2mW)) { arg = dBm2mW[ic->ic_txpowlimit]; len = sizeof(arg); ndis_set_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); } /* * Default encryption mode to off, authentication * to open and privacy to 'accept everything.' */ len = sizeof(arg); arg = NDIS_80211_WEPSTAT_DISABLED; ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); len = sizeof(arg); arg = NDIS_80211_AUTHMODE_OPEN; ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); /* * Note that OID_802_11_PRIVACY_FILTER is optional: * not all drivers implement it. */ len = sizeof(arg); arg = NDIS_80211_PRIVFILT_8021XWEP; ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); len = sizeof(config); bzero((char *)&config, len); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); rval = ndis_get_info(sc, OID_802_11_CONFIGURATION, &config, &len); /* * Some drivers expect us to initialize these values, so * provide some defaults. */ if (config.nc_beaconperiod == 0) config.nc_beaconperiod = 100; if (config.nc_atimwin == 0) config.nc_atimwin = 100; if (config.nc_fhconfig.ncf_dwelltime == 0) config.nc_fhconfig.ncf_dwelltime = 200; if (rval == 0 && ic->ic_bsschan != IEEE80211_CHAN_ANYC) { int chan, chanflag; chan = ieee80211_chan2ieee(ic, ic->ic_bsschan); chanflag = config.nc_dsconfig > 2500000 ? IEEE80211_CHAN_2GHZ : IEEE80211_CHAN_5GHZ; if (chan != ieee80211_mhz2ieee(config.nc_dsconfig / 1000, 0)) { config.nc_dsconfig = ic->ic_bsschan->ic_freq * 1000; len = sizeof(config); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); DPRINTF(("Setting channel to %ukHz\n", config.nc_dsconfig)); rval = ndis_set_info(sc, OID_802_11_CONFIGURATION, &config, &len); if (rval) device_printf(sc->ndis_dev, "couldn't change " "DS config to %ukHz: %d\n", config.nc_dsconfig, rval); } } else if (rval) device_printf(sc->ndis_dev, "couldn't retrieve " "channel info: %d\n", rval); /* Set the BSSID to our value so the driver doesn't associate */ len = IEEE80211_ADDR_LEN; bcopy(vap->iv_myaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); } static void ndis_auth_and_assoc(struct ndis_softc *sc, struct ieee80211vap *vap) { struct ieee80211_node *ni = vap->iv_bss; ndis_80211_ssid ssid; ndis_80211_macaddr bssid; ndis_80211_wep wep; int i, rval = 0, len, error; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Initial setup */ ndis_setstate_80211(sc); /* Set network infrastructure mode. */ len = sizeof(arg); if (vap->iv_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set RTS threshold */ len = sizeof(arg); arg = vap->iv_rtsthreshold; ndis_set_info(sc, OID_802_11_RTS_THRESHOLD, &arg, &len); /* Set fragmentation threshold */ len = sizeof(arg); arg = vap->iv_fragthreshold; ndis_set_info(sc, OID_802_11_FRAGMENTATION_THRESHOLD, &arg, &len); /* Set WEP */ if (vap->iv_flags & IEEE80211_F_PRIVACY && !(vap->iv_flags & IEEE80211_F_WPA)) { int keys_set = 0; if (ni->ni_authmode == IEEE80211_AUTH_SHARED) { len = sizeof(arg); arg = NDIS_80211_AUTHMODE_SHARED; DPRINTF(("Setting shared auth\n")); ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); } for (i = 0; i < IEEE80211_WEP_NKID; i++) { if (vap->iv_nw_keys[i].wk_keylen) { if (vap->iv_nw_keys[i].wk_cipher->ic_cipher != IEEE80211_CIPHER_WEP) continue; bzero((char *)&wep, sizeof(wep)); wep.nw_keylen = vap->iv_nw_keys[i].wk_keylen; /* * 5, 13 and 16 are the only valid * key lengths. Anything in between * will be zero padded out to the * next highest boundary. */ if (vap->iv_nw_keys[i].wk_keylen < 5) wep.nw_keylen = 5; else if (vap->iv_nw_keys[i].wk_keylen > 5 && vap->iv_nw_keys[i].wk_keylen < 13) wep.nw_keylen = 13; else if (vap->iv_nw_keys[i].wk_keylen > 13 && vap->iv_nw_keys[i].wk_keylen < 16) wep.nw_keylen = 16; wep.nw_keyidx = i; wep.nw_length = (sizeof(uint32_t) * 3) + wep.nw_keylen; if (i == vap->iv_def_txkey) wep.nw_keyidx |= NDIS_80211_WEPKEY_TX; bcopy(vap->iv_nw_keys[i].wk_key, wep.nw_keydata, wep.nw_length); len = sizeof(wep); DPRINTF(("Setting WEP key %d\n", i)); rval = ndis_set_info(sc, OID_802_11_ADD_WEP, &wep, &len); if (rval) device_printf(sc->ndis_dev, "set wepkey failed: %d\n", rval); keys_set++; } } if (keys_set) { DPRINTF(("Setting WEP on\n")); arg = NDIS_80211_WEPSTAT_ENABLED; len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "enable WEP failed: %d\n", rval); if (vap->iv_flags & IEEE80211_F_DROPUNENC) arg = NDIS_80211_PRIVFILT_8021XWEP; else arg = NDIS_80211_PRIVFILT_ACCEPTALL; len = sizeof(arg); ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); } } /* Set up WPA. */ if ((vap->iv_flags & IEEE80211_F_WPA) && vap->iv_appie_assocreq != NULL) { struct ieee80211_appie *ie = vap->iv_appie_assocreq; error = ndis_set_wpa(sc, ie->ie_data, ie->ie_len); if (error != 0) device_printf(sc->ndis_dev, "WPA setup failed\n"); } #ifdef notyet /* Set network type. */ arg = 0; switch (vap->iv_curmode) { case IEEE80211_MODE_11A: arg = NDIS_80211_NETTYPE_11OFDM5; break; case IEEE80211_MODE_11B: arg = NDIS_80211_NETTYPE_11DS; break; case IEEE80211_MODE_11G: arg = NDIS_80211_NETTYPE_11OFDM24; break; default: device_printf(sc->ndis_dev, "unknown mode: %d\n", vap->iv_curmode); } if (arg) { DPRINTF(("Setting network type to %d\n", arg)); len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_NETWORK_TYPE_IN_USE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "set nettype failed: %d\n", rval); } #endif /* * If the user selected a specific BSSID, try * to use that one. This is useful in the case where * there are several APs in range with the same network * name. To delete the BSSID, we use the broadcast * address as the BSSID. * Note that some drivers seem to allow setting a BSSID * in ad-hoc mode, which has the effect of forcing the * NIC to create an ad-hoc cell with a specific BSSID, * instead of a randomly chosen one. However, the net80211 * code makes the assumtion that the BSSID setting is invalid * when you're in ad-hoc mode, so we don't allow that here. */ len = IEEE80211_ADDR_LEN; if (vap->iv_flags & IEEE80211_F_DESBSSID && vap->iv_opmode != IEEE80211_M_IBSS) bcopy(ni->ni_bssid, bssid, len); else bcopy(ieee80211broadcastaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); /* Set SSID -- always do this last. */ #ifdef NDIS_DEBUG if (ndis_debug > 0) { printf("Setting ESSID to "); ieee80211_print_essid(ni->ni_essid, ni->ni_esslen); printf("\n"); } #endif len = sizeof(ssid); bzero((char *)&ssid, len); ssid.ns_ssidlen = ni->ni_esslen; if (ssid.ns_ssidlen == 0) { ssid.ns_ssidlen = 1; } else bcopy(ni->ni_essid, ssid.ns_ssid, ssid.ns_ssidlen); rval = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (rval) device_printf (sc->ndis_dev, "set ssid failed: %d\n", rval); return; } static int ndis_get_bssid_list(sc, bl) struct ndis_softc *sc; ndis_80211_bssid_list_ex **bl; { int len, error; len = sizeof(uint32_t) + (sizeof(ndis_wlan_bssid_ex) * 16); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); if (error == ENOSPC) { free(*bl, M_DEVBUF); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); } if (error) { DPRINTF(("%s: failed to read\n", __func__)); free(*bl, M_DEVBUF); return (error); } return (0); } static int ndis_get_assoc(struct ndis_softc *sc, ndis_wlan_bssid_ex **assoc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap; struct ieee80211_node *ni; ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *bs; ndis_80211_macaddr bssid; int i, len, error; if (!sc->ndis_link) return (ENOENT); len = sizeof(bssid); error = ndis_get_info(sc, OID_802_11_BSSID, &bssid, &len); if (error) { device_printf(sc->ndis_dev, "failed to get bssid\n"); return (ENOENT); } vap = TAILQ_FIRST(&ic->ic_vaps); ni = vap->iv_bss; error = ndis_get_bssid_list(sc, &bl); if (error) return (error); bs = (ndis_wlan_bssid_ex *)&bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { if (bcmp(bs->nwbx_macaddr, bssid, sizeof(bssid)) == 0) { *assoc = malloc(bs->nwbx_len, M_TEMP, M_NOWAIT); if (*assoc == NULL) { free(bl, M_TEMP); return (ENOMEM); } bcopy((char *)bs, (char *)*assoc, bs->nwbx_len); free(bl, M_TEMP); if (ic->ic_opmode == IEEE80211_M_STA) ni->ni_associd = 1 | 0xc000; /* fake associd */ return (0); } bs = (ndis_wlan_bssid_ex *)((char *)bs + bs->nwbx_len); } free(bl, M_TEMP); return (ENOENT); } static void ndis_getstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = vap->iv_bss; ndis_wlan_bssid_ex *bs; int rval, len, i = 0; int chanflag; uint32_t arg; if (!NDIS_INITIALIZED(sc)) return; if ((rval = ndis_get_assoc(sc, &bs)) != 0) return; /* We're associated, retrieve info on the current bssid. */ ic->ic_curmode = ndis_nettype_mode(bs->nwbx_nettype); chanflag = ndis_nettype_chan(bs->nwbx_nettype); IEEE80211_ADDR_COPY(ni->ni_bssid, bs->nwbx_macaddr); /* Get SSID from current association info. */ bcopy(bs->nwbx_ssid.ns_ssid, ni->ni_essid, bs->nwbx_ssid.ns_ssidlen); ni->ni_esslen = bs->nwbx_ssid.ns_ssidlen; if (ic->ic_caps & IEEE80211_C_PMGT) { len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get power mode failed: %d\n", rval); if (arg == NDIS_80211_POWERMODE_CAM) vap->iv_flags &= ~IEEE80211_F_PMGTON; else vap->iv_flags |= IEEE80211_F_PMGTON; } /* Get TX power */ if (ic->ic_caps & IEEE80211_C_TXPMGT) { len = sizeof(arg); ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); for (i = 0; i < nitems(dBm2mW); i++) if (dBm2mW[i] >= arg) break; ic->ic_txpowlimit = i; } /* * Use the current association information to reflect * what channel we're on. */ ic->ic_curchan = ieee80211_find_channel(ic, bs->nwbx_config.nc_dsconfig / 1000, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; ni->ni_chan = ic->ic_curchan; ic->ic_bsschan = ic->ic_curchan; free(bs, M_TEMP); /* * Determine current authentication mode. */ len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get authmode status failed: %d\n", rval); else { vap->iv_flags &= ~IEEE80211_F_WPA; switch (arg) { case NDIS_80211_AUTHMODE_OPEN: ni->ni_authmode = IEEE80211_AUTH_OPEN; break; case NDIS_80211_AUTHMODE_SHARED: ni->ni_authmode = IEEE80211_AUTH_SHARED; break; case NDIS_80211_AUTHMODE_AUTO: ni->ni_authmode = IEEE80211_AUTH_AUTO; break; case NDIS_80211_AUTHMODE_WPA: case NDIS_80211_AUTHMODE_WPAPSK: case NDIS_80211_AUTHMODE_WPANONE: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA1; break; case NDIS_80211_AUTHMODE_WPA2: case NDIS_80211_AUTHMODE_WPA2PSK: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA2; break; default: ni->ni_authmode = IEEE80211_AUTH_NONE; break; } } len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get wep status failed: %d\n", rval); if (arg == NDIS_80211_WEPSTAT_ENABLED) vap->iv_flags |= IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC; else vap->iv_flags &= ~(IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC); } static int ndis_ifioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct ndis_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int i, error = 0; /*NDIS_LOCK(sc);*/ switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if (sc->ndis_running && ifp->if_flags & IFF_PROMISC && !(sc->ndis_if_flags & IFF_PROMISC)) { sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else if (sc->ndis_running && !(ifp->if_flags & IFF_PROMISC) && sc->ndis_if_flags & IFF_PROMISC) { sc->ndis_filter &= ~NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else ndis_init(sc); } else { if (sc->ndis_running) ndis_stop(sc); } sc->ndis_if_flags = ifp->if_flags; error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: ndis_setmulti(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCSIFCAP: ifp->if_capenable = ifr->ifr_reqcap; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = sc->ndis_hwassist; else ifp->if_hwassist = 0; ndis_set_offload(sc); break; default: error = ether_ioctl(ifp, command, data); break; } /*NDIS_UNLOCK(sc);*/ return(error); } static int ndis_80211ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ndis_softc *sc = ic->ic_softc; struct ifreq *ifr = data; struct ndis_oid_data oid; struct ndis_evt evt; void *oidbuf = NULL; int error = 0; if ((error = priv_check(curthread, PRIV_DRIVER)) != 0) return (error); switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: error = copyin(ifr_data_get_ptr(ifr), &oid, sizeof(oid)); if (error) break; oidbuf = malloc(oid.len, M_TEMP, M_WAITOK | M_ZERO); error = copyin((caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), oidbuf, oid.len); } if (error) { free(oidbuf, M_TEMP); return (error); } switch (cmd) { case SIOCGDRVSPEC: error = ndis_get_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCSDRVSPEC: error = ndis_set_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCGPRIVATE_0: NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtcidx].ne_sts == 0) { error = ENOENT; NDIS_UNLOCK(sc); break; } error = copyin(ifr_data_get_ptr(ifr), &evt, sizeof(evt)); if (error) { NDIS_UNLOCK(sc); break; } if (evt.ne_len < sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = ENOSPC; NDIS_UNLOCK(sc); break; } error = copyout(&sc->ndis_evt[sc->ndis_evtcidx], ifr_data_get_ptr(ifr), sizeof(uint32_t) * 2); if (error) { NDIS_UNLOCK(sc); break; } if (sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = copyout(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, (caddr_t)ifr_data_get_ptr(ifr) + (sizeof(uint32_t) * 2), sc->ndis_evt[sc->ndis_evtcidx].ne_len); if (error) { NDIS_UNLOCK(sc); break; } free(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, M_TEMP); sc->ndis_evt[sc->ndis_evtcidx].ne_buf = NULL; } sc->ndis_evt[sc->ndis_evtcidx].ne_len = 0; sc->ndis_evt[sc->ndis_evtcidx].ne_sts = 0; NDIS_EVTINC(sc->ndis_evtcidx); NDIS_UNLOCK(sc); break; default: error = ENOTTY; break; } switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: error = copyout(&oid, ifr_data_get_ptr(ifr), sizeof(oid)); if (error) break; error = copyout(oidbuf, (caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), oid.len); } free(oidbuf, M_TEMP); return (error); } int ndis_del_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; bzero((char *)&rkey, sizeof(rkey)); len = sizeof(rkey); rkey.nk_len = len; rkey.nk_keyidx = key->wk_keyix; bcopy(vap->iv_ifp->if_broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); error = ndis_set_info(sc, OID_802_11_REMOVE_KEY, &rkey, &len); if (error) return (0); return (1); } /* * In theory this could be called for any key, but we'll * only use it for WPA TKIP or AES keys. These need to be * set after initial authentication with the AP. */ static int ndis_add_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; switch (key->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_TKIP: len = sizeof(ndis_80211_key); bzero((char *)&rkey, sizeof(rkey)); rkey.nk_len = len; rkey.nk_keylen = key->wk_keylen; if (key->wk_flags & IEEE80211_KEY_SWMIC) rkey.nk_keylen += 16; /* key index - gets weird in NDIS */ if (key->wk_keyix != IEEE80211_KEYIX_NONE) rkey.nk_keyidx = key->wk_keyix; else rkey.nk_keyidx = 0; if (key->wk_flags & IEEE80211_KEY_XMIT) rkey.nk_keyidx |= 1 << 31; if (key->wk_flags & IEEE80211_KEY_GROUP) { bcopy(ieee80211broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); } else { bcopy(vap->iv_bss->ni_bssid, rkey.nk_bssid, IEEE80211_ADDR_LEN); /* pairwise key */ rkey.nk_keyidx |= 1 << 30; } /* need to set bit 29 based on keyrsc */ rkey.nk_keyrsc = key->wk_keyrsc[0]; /* XXX need tid */ if (rkey.nk_keyrsc) rkey.nk_keyidx |= 1 << 29; if (key->wk_flags & IEEE80211_KEY_SWMIC) { bcopy(key->wk_key, rkey.nk_keydata, 16); bcopy(key->wk_key + 24, rkey.nk_keydata + 16, 8); bcopy(key->wk_key + 16, rkey.nk_keydata + 24, 8); } else bcopy(key->wk_key, rkey.nk_keydata, key->wk_keylen); error = ndis_set_info(sc, OID_802_11_ADD_KEY, &rkey, &len); break; case IEEE80211_CIPHER_WEP: error = 0; break; /* * I don't know how to set up keys for the AES * cipher yet. Is it the same as TKIP? */ case IEEE80211_CIPHER_AES_CCM: default: error = ENOTTY; break; } /* We need to return 1 for success, 0 for failure. */ if (error) return (0); return (1); } static void ndis_resettask(d, arg) device_object *d; void *arg; { struct ndis_softc *sc; sc = arg; ndis_reset_nic(sc); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void ndis_stop(struct ndis_softc *sc) { int i; callout_drain(&sc->ndis_stat_callout); NDIS_LOCK(sc); sc->ndis_tx_timer = 0; sc->ndis_link = 0; if (!sc->ndis_80211) sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->ndis_running = 0; NDIS_UNLOCK(sc); if (sc->ndis_iftype != PNPBus || (sc->ndis_iftype == PNPBus && !(sc->ndisusb_status & NDISUSB_STATUS_DETACH) && ndisusb_halt != 0)) ndis_halt_nic(sc); NDIS_LOCK(sc); for (i = 0; i < NDIS_EVENTS; i++) { if (sc->ndis_evt[i].ne_sts && sc->ndis_evt[i].ne_buf != NULL) { free(sc->ndis_evt[i].ne_buf, M_TEMP); sc->ndis_evt[i].ne_buf = NULL; } sc->ndis_evt[i].ne_sts = 0; sc->ndis_evt[i].ne_len = 0; } sc->ndis_evtcidx = 0; sc->ndis_evtpidx = 0; NDIS_UNLOCK(sc); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ void ndis_shutdown(dev) device_t dev; { struct ndis_softc *sc; sc = device_get_softc(dev); ndis_stop(sc); } static int ndis_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; enum ieee80211_state ostate; DPRINTF(("%s: %s -> %s\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate])); ostate = vap->iv_state; vap->iv_state = nstate; switch (nstate) { /* pass on to net80211 */ case IEEE80211_S_INIT: case IEEE80211_S_SCAN: return nvp->newstate(vap, nstate, arg); case IEEE80211_S_ASSOC: if (ostate != IEEE80211_S_AUTH) { IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); IEEE80211_LOCK(ic); } break; case IEEE80211_S_AUTH: IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); if (vap->iv_state == IEEE80211_S_AUTH) /* XXX */ ieee80211_new_state(vap, IEEE80211_S_ASSOC, 0); IEEE80211_LOCK(ic); break; default: break; } return (0); } static void ndis_scan(void *arg) { struct ieee80211vap *vap = arg; ieee80211_scan_done(vap); } static void ndis_scan_results(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *wb; struct ieee80211_scanparams sp; struct ieee80211_frame wh; struct ieee80211_channel *saved_chan; int i, j; int rssi, noise, freq, chanflag; uint8_t ssid[2+IEEE80211_NWID_LEN]; uint8_t rates[2+IEEE80211_RATE_MAXSIZE]; uint8_t *frm, *efrm; saved_chan = ic->ic_curchan; noise = -96; if (ndis_get_bssid_list(sc, &bl)) return; DPRINTF(("%s: %d results\n", __func__, bl->nblx_items)); wb = &bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { memset(&sp, 0, sizeof(sp)); memcpy(wh.i_addr2, wb->nwbx_macaddr, sizeof(wh.i_addr2)); memcpy(wh.i_addr3, wb->nwbx_macaddr, sizeof(wh.i_addr3)); rssi = 100 * (wb->nwbx_rssi - noise) / (-32 - noise); rssi = max(0, min(rssi, 100)); /* limit 0 <= rssi <= 100 */ if (wb->nwbx_privacy) sp.capinfo |= IEEE80211_CAPINFO_PRIVACY; sp.bintval = wb->nwbx_config.nc_beaconperiod; switch (wb->nwbx_netinfra) { case NDIS_80211_NET_INFRA_IBSS: sp.capinfo |= IEEE80211_CAPINFO_IBSS; break; case NDIS_80211_NET_INFRA_BSS: sp.capinfo |= IEEE80211_CAPINFO_ESS; break; } sp.rates = &rates[0]; for (j = 0; j < IEEE80211_RATE_MAXSIZE; j++) { /* XXX - check units */ if (wb->nwbx_supportedrates[j] == 0) break; rates[2 + j] = wb->nwbx_supportedrates[j] & 0x7f; } rates[1] = j; sp.ssid = (uint8_t *)&ssid[0]; memcpy(sp.ssid + 2, &wb->nwbx_ssid.ns_ssid, wb->nwbx_ssid.ns_ssidlen); sp.ssid[1] = wb->nwbx_ssid.ns_ssidlen; chanflag = ndis_nettype_chan(wb->nwbx_nettype); freq = wb->nwbx_config.nc_dsconfig / 1000; sp.chan = sp.bchan = ieee80211_mhz2ieee(freq, chanflag); /* Hack ic->ic_curchan to be in sync with the scan result */ ic->ic_curchan = ieee80211_find_channel(ic, freq, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; /* Process extended info from AP */ if (wb->nwbx_len > sizeof(ndis_wlan_bssid)) { frm = (uint8_t *)&wb->nwbx_ies; efrm = frm + wb->nwbx_ielen; if (efrm - frm < 12) goto done; sp.tstamp = frm; frm += 8; sp.bintval = le16toh(*(uint16_t *)frm); frm += 2; sp.capinfo = le16toh(*(uint16_t *)frm); frm += 2; sp.ies = frm; sp.ies_len = efrm - frm; } done: DPRINTF(("scan: bssid %s chan %dMHz (%d/%d) rssi %d\n", ether_sprintf(wb->nwbx_macaddr), freq, sp.bchan, chanflag, rssi)); ieee80211_add_scan(vap, ic->ic_curchan, &sp, &wh, 0, rssi, noise); wb = (ndis_wlan_bssid_ex *)((char *)wb + wb->nwbx_len); } free(bl, M_DEVBUF); /* Restore the channel after messing with it */ ic->ic_curchan = saved_chan; } static void ndis_scan_start(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; struct ieee80211vap *vap; struct ieee80211_scan_state *ss; ndis_80211_ssid ssid; int error, len; ss = ic->ic_scan; vap = TAILQ_FIRST(&ic->ic_vaps); if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: scan aborted\n", __func__)); ieee80211_cancel_scan(vap); return; } len = sizeof(ssid); bzero((char *)&ssid, len); if (ss->ss_nssid == 0) ssid.ns_ssidlen = 1; else { /* Perform a directed scan */ ssid.ns_ssidlen = ss->ss_ssid[0].len; bcopy(ss->ss_ssid[0].ssid, ssid.ns_ssid, ssid.ns_ssidlen); } error = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (error) DPRINTF(("%s: set ESSID failed\n", __func__)); len = 0; error = ndis_set_info(sc, OID_802_11_BSSID_LIST_SCAN, NULL, &len); if (error) { DPRINTF(("%s: scan command failed\n", __func__)); ieee80211_cancel_scan(vap); return; } /* Set a timer to collect the results */ callout_reset(&sc->ndis_scan_callout, hz * 3, ndis_scan, vap); } static void ndis_set_channel(struct ieee80211com *ic) { /* ignore */ } static void ndis_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { /* ignore */ } static void ndis_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } static void ndis_scan_end(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_scan_results(sc); } Index: head/sys/dev/ntb/if_ntb/if_ntb.c =================================================================== --- head/sys/dev/ntb/if_ntb/if_ntb.c (revision 357009) +++ head/sys/dev/ntb/if_ntb/if_ntb.c (revision 357010) @@ -1,513 +1,514 @@ /*- * Copyright (c) 2016 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The Non-Transparent Bridge (NTB) is a device that allows you to connect * two or more systems using a PCI-e links, providing remote memory access. * * This module contains a driver for simulated Ethernet device, using * underlying NTB Transport device. * * NOTE: Much of the code in this module is shared with Linux. Any patches may * be picked up and redistributed in Linux with a dual GPL/BSD license. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../ntb_transport.h" #define KTR_NTB KTR_SPARE3 #define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX) #define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) #define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) #define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ CSUM_PSEUDO_HDR | \ CSUM_IP_CHECKED | CSUM_IP_VALID | \ CSUM_SCTP_VALID) static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb"); static unsigned g_if_ntb_num_queues = UINT_MAX; SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN, &g_if_ntb_num_queues, 0, "Number of queues per interface"); struct ntb_net_queue { struct ntb_net_ctx *sc; if_t ifp; struct ntb_transport_qp *qp; struct buf_ring *br; struct task tx_task; struct taskqueue *tx_tq; struct mtx tx_lock; struct callout queue_full; }; struct ntb_net_ctx { if_t ifp; struct ifmedia media; u_char eaddr[ETHER_ADDR_LEN]; int num_queues; struct ntb_net_queue *queues; int mtu; }; static int ntb_net_probe(device_t dev); static int ntb_net_attach(device_t dev); static int ntb_net_detach(device_t dev); static void ntb_net_init(void *arg); static int ntb_ifmedia_upd(struct ifnet *); static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int ntb_ioctl(if_t ifp, u_long command, caddr_t data); static int ntb_transmit(if_t ifp, struct mbuf *m); static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); static void ntb_net_event_handler(void *data, enum ntb_link_event status); static void ntb_handle_tx(void *arg, int pending); static void ntb_qp_full(void *arg); static void ntb_qflush(if_t ifp); static void create_random_local_eui48(u_char *eaddr); static int ntb_net_probe(device_t dev) { device_set_desc(dev, "NTB Network Interface"); return (0); } static int ntb_net_attach(device_t dev) { struct ntb_net_ctx *sc = device_get_softc(dev); struct ntb_net_queue *q; if_t ifp; struct ntb_queue_handlers handlers = { ntb_net_rx_handler, ntb_net_tx_handler, ntb_net_event_handler }; int i; ifp = sc->ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { printf("ntb: Cannot allocate ifnet structure\n"); return (ENOMEM); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setdev(ifp, dev); sc->num_queues = min(g_if_ntb_num_queues, ntb_transport_queue_count(dev)); sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue), M_DEVBUF, M_WAITOK | M_ZERO); sc->mtu = INT_MAX; for (i = 0; i < sc->num_queues; i++) { q = &sc->queues[i]; q->sc = sc; q->ifp = ifp; q->qp = ntb_transport_create_queue(dev, i, &handlers, q); if (q->qp == NULL) break; sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp)); mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF); q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock); TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q); q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT, taskqueue_thread_enqueue, &q->tx_tq); taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d", device_get_nameunit(dev), i); callout_init(&q->queue_full, 1); } sc->num_queues = i; device_printf(dev, "%d queue(s)\n", sc->num_queues); if_setinitfn(ifp, ntb_net_init); if_setsoftc(ifp, sc); - if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); + if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSEPOCH); if_setioctlfn(ifp, ntb_ioctl); if_settransmitfn(ifp, ntb_transmit); if_setqflushfn(ifp, ntb_qflush); create_random_local_eui48(sc->eaddr); ether_ifattach(ifp, sc->eaddr); if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN); ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd, ntb_ifmedia_sts); ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL); ifmedia_set(&sc->media, NTB_MEDIATYPE); for (i = 0; i < sc->num_queues; i++) ntb_transport_link_up(sc->queues[i].qp); return (0); } static int ntb_net_detach(device_t dev) { struct ntb_net_ctx *sc = device_get_softc(dev); struct ntb_net_queue *q; int i; for (i = 0; i < sc->num_queues; i++) ntb_transport_link_down(sc->queues[i].qp); ether_ifdetach(sc->ifp); if_free(sc->ifp); ifmedia_removeall(&sc->media); for (i = 0; i < sc->num_queues; i++) { q = &sc->queues[i]; ntb_transport_free_queue(q->qp); buf_ring_free(q->br, M_DEVBUF); callout_drain(&q->queue_full); taskqueue_drain_all(q->tx_tq); mtx_destroy(&q->tx_lock); } free(sc->queues, M_DEVBUF); return (0); } /* Network device interface */ static void ntb_net_init(void *arg) { struct ntb_net_ctx *sc = arg; if_t ifp = sc->ifp; if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp)); if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ? LINK_STATE_UP : LINK_STATE_DOWN); } static int ntb_ioctl(if_t ifp, u_long command, caddr_t data) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; int error = 0; switch (command) { case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCSIFMTU: { if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) { error = EINVAL; break; } if_setmtu(ifp, ifr->ifr_mtu); break; } case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; case SIOCSIFCAP: if (ifr->ifr_reqcap & IFCAP_RXCSUM) if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); else if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); if (ifr->ifr_reqcap & IFCAP_TXCSUM) { if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0); } else { if_setcapenablebit(ifp, 0, IFCAP_TXCSUM); if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES); } if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6) if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); else if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) { if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0); } else { if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6); if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6); } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int ntb_ifmedia_upd(struct ifnet *ifp) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ifmedia *ifm = &sc->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } static void ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct ntb_net_ctx *sc = if_getsoftc(ifp); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = NTB_MEDIATYPE; if (ntb_transport_link_query(sc->queues[0].qp)) ifmr->ifm_status |= IFM_ACTIVE; } static void ntb_transmit_locked(struct ntb_net_queue *q) { if_t ifp = q->ifp; struct mbuf *m; int rc, len; short mflags; CTR0(KTR_NTB, "TX: ntb_transmit_locked"); while ((m = drbr_peek(ifp, q->br)) != NULL) { CTR1(KTR_NTB, "TX: start mbuf %p", m); if_etherbpfmtap(ifp, m); len = m->m_pkthdr.len; mflags = m->m_flags; rc = ntb_transport_tx_enqueue(q->qp, m, m, len); if (rc != 0) { CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc); if (rc == EAGAIN) { drbr_putback(ifp, q->br, m); callout_reset_sbt(&q->queue_full, SBT_1MS / 4, SBT_1MS / 4, ntb_qp_full, q, 0); } else { m_freem(m); drbr_advance(ifp, q->br); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } break; } drbr_advance(ifp, q->br); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mflags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } } static int ntb_transmit(if_t ifp, struct mbuf *m) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ntb_net_queue *q; int error, i; CTR0(KTR_NTB, "TX: ntb_transmit"); if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % sc->num_queues; else i = curcpu % sc->num_queues; q = &sc->queues[i]; error = drbr_enqueue(ifp, q->br, m); if (error) return (error); if (mtx_trylock(&q->tx_lock)) { ntb_transmit_locked(q); mtx_unlock(&q->tx_lock); } else taskqueue_enqueue(q->tx_tq, &q->tx_task); return (0); } static void ntb_handle_tx(void *arg, int pending) { struct ntb_net_queue *q = arg; mtx_lock(&q->tx_lock); ntb_transmit_locked(q); mtx_unlock(&q->tx_lock); } static void ntb_qp_full(void *arg) { struct ntb_net_queue *q = arg; CTR0(KTR_NTB, "TX: qp_full callout"); if (ntb_transport_tx_free_entry(q->qp) > 0) taskqueue_enqueue(q->tx_tq, &q->tx_task); else callout_schedule_sbt(&q->queue_full, SBT_1MS / 4, SBT_1MS / 4, 0); } static void ntb_qflush(if_t ifp) { struct ntb_net_ctx *sc = if_getsoftc(ifp); struct ntb_net_queue *q; struct mbuf *m; int i; for (i = 0; i < sc->num_queues; i++) { q = &sc->queues[i]; mtx_lock(&q->tx_lock); while ((m = buf_ring_dequeue_sc(q->br)) != NULL) m_freem(m); mtx_unlock(&q->tx_lock); } if_qflush(ifp); } /* Network Device Callbacks */ static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { m_freem(data); CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data); } static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { struct ntb_net_queue *q = qp_data; struct ntb_net_ctx *sc = q->sc; struct mbuf *m = data; if_t ifp = q->ifp; uint16_t proto; CTR1(KTR_NTB, "RX: rx handler (%d)", len); if (len < 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.rcvif = ifp; if (sc->num_queues > 1) { m->m_pkthdr.flowid = q - sc->queues; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); } if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { m_copydata(m, 12, 2, (void *)&proto); switch (ntohs(proto)) { case ETHERTYPE_IP: if (if_getcapenable(ifp) & IFCAP_RXCSUM) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = NTB_CSUM_SET; } break; case ETHERTYPE_IPV6: if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = NTB_CSUM_SET; } break; } } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_input(ifp, m); } static void ntb_net_event_handler(void *data, enum ntb_link_event status) { struct ntb_net_queue *q = data; if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp)); if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP : LINK_STATE_DOWN); } /* Helper functions */ /* TODO: This too should really be part of the kernel */ #define EUI48_MULTICAST 1 << 0 #define EUI48_LOCALLY_ADMINISTERED 1 << 1 static void create_random_local_eui48(u_char *eaddr) { static uint8_t counter = 0; eaddr[0] = EUI48_LOCALLY_ADMINISTERED; arc4rand(&eaddr[1], 4, 0); eaddr[5] = counter++; } static device_method_t ntb_net_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ntb_net_probe), DEVMETHOD(device_attach, ntb_net_attach), DEVMETHOD(device_detach, ntb_net_detach), DEVMETHOD_END }; devclass_t ntb_net_devclass; static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods, sizeof(struct ntb_net_ctx)); DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass, NULL, NULL); MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1); MODULE_VERSION(if_ntb, 1); Index: head/sys/dev/sbni/if_sbni.c =================================================================== --- head/sys/dev/sbni/if_sbni.c (revision 357009) +++ head/sys/dev/sbni/if_sbni.c (revision 357010) @@ -1,1277 +1,1278 @@ /*- * Copyright (c) 1997-2001 Granch, Ltd. All rights reserved. * Author: Denis I.Timofeev * * Redistributon and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Device driver for Granch SBNI12 leased line adapters * * Revision 2.0.0 1997/08/06 * Initial revision by Alexey Zverev * * Revision 2.0.1 1997/08/11 * Additional internal statistics support (tx statistics) * * Revision 2.0.2 1997/11/05 * if_bpf bug has been fixed * * Revision 2.0.3 1998/12/20 * Memory leakage has been eliminated in * the sbni_st and sbni_timeout routines. * * Revision 3.0 2000/08/10 by Yaroslav Polyakov * Support for PCI cards. 4.1 modification. * * Revision 3.1 2000/09/12 * Removed extra #defines around bpf functions * * Revision 4.0 2000/11/23 by Denis Timofeev * Completely redesigned the buffer management * * Revision 4.1 2001/01/21 * Support for PCI Dual cards and new SBNI12D-10, -11 Dual/ISA cards * * Written with reference to NE2000 driver developed by David Greenman. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void sbni_init(void *); static void sbni_init_locked(struct sbni_softc *); static void sbni_start(struct ifnet *); static void sbni_start_locked(struct ifnet *); static int sbni_ioctl(struct ifnet *, u_long, caddr_t); static void sbni_stop(struct sbni_softc *); static void handle_channel(struct sbni_softc *); static void card_start(struct sbni_softc *); static int recv_frame(struct sbni_softc *); static void send_frame(struct sbni_softc *); static int upload_data(struct sbni_softc *, u_int, u_int, u_int, u_int32_t); static int skip_tail(struct sbni_softc *, u_int, u_int32_t); static void interpret_ack(struct sbni_softc *, u_int); static void download_data(struct sbni_softc *, u_int32_t *); static void prepare_to_send(struct sbni_softc *); static void drop_xmit_queue(struct sbni_softc *); static int get_rx_buf(struct sbni_softc *); static void indicate_pkt(struct sbni_softc *); static void change_level(struct sbni_softc *); static int check_fhdr(struct sbni_softc *, u_int *, u_int *, u_int *, u_int *, u_int32_t *); static int append_frame_to_pkt(struct sbni_softc *, u_int, u_int32_t); static void timeout_change_level(struct sbni_softc *); static void send_frame_header(struct sbni_softc *, u_int32_t *); static void set_initial_values(struct sbni_softc *, struct sbni_flags); static u_int32_t calc_crc32(u_int32_t, caddr_t, u_int); static callout_func_t sbni_timeout; static __inline u_char sbni_inb(struct sbni_softc *, enum sbni_reg); static __inline void sbni_outb(struct sbni_softc *, enum sbni_reg, u_char); static __inline void sbni_insb(struct sbni_softc *, u_char *, u_int); static __inline void sbni_outsb(struct sbni_softc *, u_char *, u_int); static u_int32_t crc32tab[]; #ifdef SBNI_DUAL_COMPOUND static struct mtx headlist_lock; MTX_SYSINIT(headlist_lock, &headlist_lock, "sbni headlist", MTX_DEF); static struct sbni_softc *sbni_headlist; #endif /* -------------------------------------------------------------------------- */ static __inline u_char sbni_inb(struct sbni_softc *sc, enum sbni_reg reg) { return bus_space_read_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg); } static __inline void sbni_outb(struct sbni_softc *sc, enum sbni_reg reg, u_char value) { bus_space_write_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg, value); } static __inline void sbni_insb(struct sbni_softc *sc, u_char *to, u_int len) { bus_space_read_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, to, len); } static __inline void sbni_outsb(struct sbni_softc *sc, u_char *from, u_int len) { bus_space_write_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, from, len); } /* Valid combinations in CSR0 (for probing): VALID_DECODER 0000,0011,1011,1010 ; 0 ; - TR_REQ ; 1 ; + TR_RDY ; 2 ; - TR_RDY TR_REQ ; 3 ; + BU_EMP ; 4 ; + BU_EMP TR_REQ ; 5 ; + BU_EMP TR_RDY ; 6 ; - BU_EMP TR_RDY TR_REQ ; 7 ; + RC_RDY ; 8 ; + RC_RDY TR_REQ ; 9 ; + RC_RDY TR_RDY ; 10 ; - RC_RDY TR_RDY TR_REQ ; 11 ; - RC_RDY BU_EMP ; 12 ; - RC_RDY BU_EMP TR_REQ ; 13 ; - RC_RDY BU_EMP TR_RDY ; 14 ; - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - */ #define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) int sbni_probe(struct sbni_softc *sc) { u_char csr0; csr0 = sbni_inb(sc, CSR0); if (csr0 != 0xff && csr0 != 0x00) { csr0 &= ~EN_INT; if (csr0 & BU_EMP) csr0 |= EN_INT; if (VALID_DECODER & (1 << (csr0 >> 4))) return (0); } return (ENXIO); } /* * Install interface into kernel networking data structures */ int sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags) { struct ifnet *ifp; u_char csr0; ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOMEM); sbni_outb(sc, CSR0, 0); set_initial_values(sc, flags); /* Initialize ifnet structure */ ifp->if_softc = sc; if_initname(ifp, "sbni", unit); ifp->if_init = sbni_init; ifp->if_start = sbni_start; ifp->if_ioctl = sbni_ioctl; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); /* report real baud rate */ csr0 = sbni_inb(sc, CSR0); ifp->if_baudrate = (csr0 & 0x01 ? 500000 : 2000000) / (1 << flags.rate); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSEPOCH; mtx_init(&sc->lock, ifp->if_xname, MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->wch, &sc->lock, 0); ether_ifattach(ifp, sc->enaddr); /* device attach does transition from UNCONFIGURED to IDLE state */ if_printf(ifp, "speed %ju, rxl ", (uintmax_t)ifp->if_baudrate); if (sc->delta_rxl) printf("auto\n"); else printf("%d (fixed)\n", sc->cur_rxl_index); return (0); } void sbni_detach(struct sbni_softc *sc) { SBNI_LOCK(sc); sbni_stop(sc); SBNI_UNLOCK(sc); callout_drain(&sc->wch); ether_ifdetach(sc->ifp); if (sc->irq_handle) bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handle); mtx_destroy(&sc->lock); if_free(sc->ifp); } void sbni_release_resources(struct sbni_softc *sc) { if (sc->irq_res) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); if (sc->io_res && sc->io_off == 0) bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io_rid, sc->io_res); } /* -------------------------------------------------------------------------- */ static void sbni_init(void *xsc) { struct sbni_softc *sc; sc = (struct sbni_softc *)xsc; SBNI_LOCK(sc); sbni_init_locked(sc); SBNI_UNLOCK(sc); } static void sbni_init_locked(struct sbni_softc *sc) { struct ifnet *ifp; ifp = sc->ifp; /* * kludge to avoid multiple initialization when more than once * protocols configured */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; card_start(sc); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* attempt to start output */ sbni_start_locked(ifp); } static void sbni_start(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; SBNI_LOCK(sc); sbni_start_locked(ifp); SBNI_UNLOCK(sc); } static void sbni_start_locked(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; if (sc->tx_frameno == 0) prepare_to_send(sc); } static void sbni_stop(struct sbni_softc *sc) { sbni_outb(sc, CSR0, 0); drop_xmit_queue(sc); if (sc->rx_buf_p) { m_freem(sc->rx_buf_p); sc->rx_buf_p = NULL; } callout_stop(&sc->wch); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* -------------------------------------------------------------------------- */ /* interrupt handler */ /* * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not * be looked as two independent single-channel devices. Every channel seems * as Ethernet interface but interrupt handler must be common. Really, first * channel ("master") driver only registers the handler. In it's struct softc * it has got pointer to "slave" channel's struct softc and handles that's * interrupts too. * softc of successfully attached ISA SBNI boards is linked to list. * While next board driver is initialized, it scans this list. If one * has found softc with same irq and ioaddr different by 4 then it assumes * this board to be "master". */ void sbni_intr(void *arg) { struct sbni_softc *sc; int repeat; sc = (struct sbni_softc *)arg; do { repeat = 0; SBNI_LOCK(sc); if (sbni_inb(sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc); repeat = 1; } SBNI_UNLOCK(sc); if (sc->slave_sc) { /* second channel present */ SBNI_LOCK(sc->slave_sc); if (sbni_inb(sc->slave_sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc->slave_sc); repeat = 1; } SBNI_UNLOCK(sc->slave_sc); } } while (repeat); } static void handle_channel(struct sbni_softc *sc) { int req_ans; u_char csr0; sbni_outb(sc, CSR0, (sbni_inb(sc, CSR0) & ~EN_INT) | TR_REQ); sc->timer_ticks = CHANGE_LEVEL_START_TICKS; for (;;) { csr0 = sbni_inb(sc, CSR0); if ((csr0 & (RC_RDY | TR_RDY)) == 0) break; req_ans = !(sc->state & FL_PREV_OK); if (csr0 & RC_RDY) req_ans = recv_frame(sc); /* * TR_RDY always equals 1 here because we have owned the marker, * and we set TR_REQ when disabled interrupts */ csr0 = sbni_inb(sc, CSR0); if ((csr0 & TR_RDY) == 0 || (csr0 & RC_RDY) != 0) if_printf(sc->ifp, "internal error!\n"); /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ if (req_ans || sc->tx_frameno != 0) send_frame(sc); else { /* send the marker without any data */ sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) & ~TR_REQ); } } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | EN_INT); } /* * Routine returns 1 if it need to acknoweledge received frame. * Empty frame received without errors won't be acknoweledged. */ static int recv_frame(struct sbni_softc *sc) { u_int32_t crc; u_int framelen, frameno, ack; u_int is_first, frame_ok; crc = CRC32_INITIAL; if (check_fhdr(sc, &framelen, &frameno, &ack, &is_first, &crc)) { frame_ok = framelen > 4 ? upload_data(sc, framelen, frameno, is_first, crc) : skip_tail(sc, framelen, crc); if (frame_ok) interpret_ack(sc, ack); } else { framelen = 0; frame_ok = 0; } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) ^ CT_ZER); if (frame_ok) { sc->state |= FL_PREV_OK; if (framelen > 4) sc->in_stats.all_rx_number++; } else { sc->state &= ~FL_PREV_OK; change_level(sc); sc->in_stats.all_rx_number++; sc->in_stats.bad_rx_number++; } return (!frame_ok || framelen > 4); } static void send_frame(struct sbni_softc *sc) { u_int32_t crc; u_char csr0; crc = CRC32_INITIAL; if (sc->state & FL_NEED_RESEND) { /* if frame was sended but not ACK'ed - resend it */ if (sc->trans_errors) { sc->trans_errors--; if (sc->framelen != 0) sc->in_stats.resend_tx_number++; } else { /* cannot xmit with many attempts */ drop_xmit_queue(sc); goto do_send; } } else sc->trans_errors = TR_ERROR_COUNT; send_frame_header(sc, &crc); sc->state |= FL_NEED_RESEND; /* * FL_NEED_RESEND will be cleared after ACK, but if empty * frame sended then in prepare_to_send next frame */ if (sc->framelen) { download_data(sc, &crc); sc->in_stats.all_tx_number++; sc->state |= FL_WAIT_ACK; } sbni_outsb(sc, (u_char *)&crc, sizeof crc); do_send: csr0 = sbni_inb(sc, CSR0); sbni_outb(sc, CSR0, csr0 & ~TR_REQ); if (sc->tx_frameno) { /* next frame exists - request to send */ sbni_outb(sc, CSR0, csr0 | TR_REQ); } } static void download_data(struct sbni_softc *sc, u_int32_t *crc_p) { struct mbuf *m; caddr_t data_p; u_int data_len, pos, slice; data_p = NULL; /* initialized to avoid warn */ pos = 0; for (m = sc->tx_buf_p; m != NULL && pos < sc->pktlen; m = m->m_next) { if (pos + m->m_len > sc->outpos) { data_len = m->m_len - (sc->outpos - pos); data_p = mtod(m, caddr_t) + (sc->outpos - pos); goto do_copy; } else pos += m->m_len; } data_len = 0; do_copy: pos = 0; do { if (data_len) { slice = min(data_len, sc->framelen - pos); sbni_outsb(sc, data_p, slice); *crc_p = calc_crc32(*crc_p, data_p, slice); pos += slice; if (data_len -= slice) data_p += slice; else { do { m = m->m_next; } while (m != NULL && m->m_len == 0); if (m) { data_len = m->m_len; data_p = mtod(m, caddr_t); } } } else { /* frame too short - zero padding */ pos = sc->framelen - pos; while (pos--) { sbni_outb(sc, DAT, 0); *crc_p = CRC32(0, *crc_p); } return; } } while (pos < sc->framelen); } static int upload_data(struct sbni_softc *sc, u_int framelen, u_int frameno, u_int is_first, u_int32_t crc) { int frame_ok; if (is_first) { sc->wait_frameno = frameno; sc->inppos = 0; } if (sc->wait_frameno == frameno) { if (sc->inppos + framelen <= ETHER_MAX_LEN) { frame_ok = append_frame_to_pkt(sc, framelen, crc); /* * if CRC is right but framelen incorrect then transmitter * error was occurred... drop entire packet */ } else if ((frame_ok = skip_tail(sc, framelen, crc)) != 0) { sc->wait_frameno = 0; sc->inppos = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); /* now skip all frames until is_first != 0 */ } } else frame_ok = skip_tail(sc, framelen, crc); if (is_first && !frame_ok) { /* * Frame has been violated, but we have stored * is_first already... Drop entire packet. */ sc->wait_frameno = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } return (frame_ok); } static __inline void send_complete(struct sbni_softc *); static __inline void send_complete(struct sbni_softc *sc) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); } static void interpret_ack(struct sbni_softc *sc, u_int ack) { if (ack == FRAME_SENT_OK) { sc->state &= ~FL_NEED_RESEND; if (sc->state & FL_WAIT_ACK) { sc->outpos += sc->framelen; if (--sc->tx_frameno) { sc->framelen = min( sc->maxframe, sc->pktlen - sc->outpos); } else { send_complete(sc); prepare_to_send(sc); } } } sc->state &= ~FL_WAIT_ACK; } /* * Glue received frame with previous fragments of packet. * Indicate packet when last frame would be accepted. */ static int append_frame_to_pkt(struct sbni_softc *sc, u_int framelen, u_int32_t crc) { caddr_t p; if (sc->inppos + framelen > ETHER_MAX_LEN) return (0); if (!sc->rx_buf_p && !get_rx_buf(sc)) return (0); p = sc->rx_buf_p->m_data + sc->inppos; sbni_insb(sc, p, framelen); if (calc_crc32(crc, p, framelen) != CRC32_REMAINDER) return (0); sc->inppos += framelen - 4; if (--sc->wait_frameno == 0) { /* last frame received */ indicate_pkt(sc); if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); } return (1); } /* * Prepare to start output on adapter. Current priority must be set to splimp * before this routine is called. * Transmitter will be actually activated when marker has been accepted. */ static void prepare_to_send(struct sbni_softc *sc) { struct mbuf *m; u_int len; /* sc->tx_buf_p == NULL here! */ if (sc->tx_buf_p) printf("sbni: memory leak!\n"); sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, sc->tx_buf_p); if (!sc->tx_buf_p) { /* nothing to transmit... */ sc->pktlen = 0; sc->tx_frameno = 0; sc->framelen = 0; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } for (len = 0, m = sc->tx_buf_p; m; m = m->m_next) len += m->m_len; if (len != 0) break; m_freem(sc->tx_buf_p); } if (len < SBNI_MIN_LEN) len = SBNI_MIN_LEN; sc->pktlen = len; sc->tx_frameno = howmany(len, sc->maxframe); sc->framelen = min(len, sc->maxframe); sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | TR_REQ); sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; BPF_MTAP(sc->ifp, sc->tx_buf_p); } static void drop_xmit_queue(struct sbni_softc *sc) { struct mbuf *m; if (sc->tx_buf_p) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); } for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, m); if (m == NULL) break; m_freem(m); if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); } sc->tx_frameno = 0; sc->framelen = 0; sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } static void send_frame_header(struct sbni_softc *sc, u_int32_t *crc_p) { u_int32_t crc; u_int len_field; u_char value; crc = *crc_p; len_field = sc->framelen + 6; /* CRC + frameno + reserved */ if (sc->state & FL_NEED_RESEND) len_field |= FRAME_RETRY; /* non-first attempt... */ if (sc->outpos == 0) len_field |= FRAME_FIRST; len_field |= (sc->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; sbni_outb(sc, DAT, SBNI_SIG); value = (u_char)len_field; sbni_outb(sc, DAT, value); crc = CRC32(value, crc); value = (u_char)(len_field >> 8); sbni_outb(sc, DAT, value); crc = CRC32(value, crc); sbni_outb(sc, DAT, sc->tx_frameno); crc = CRC32(sc->tx_frameno, crc); sbni_outb(sc, DAT, 0); crc = CRC32(0, crc); *crc_p = crc; } /* * if frame tail not needed (incorrect number or received twice), * it won't store, but CRC will be calculated */ static int skip_tail(struct sbni_softc *sc, u_int tail_len, u_int32_t crc) { while (tail_len--) crc = CRC32(sbni_inb(sc, DAT), crc); return (crc == CRC32_REMAINDER); } static int check_fhdr(struct sbni_softc *sc, u_int *framelen, u_int *frameno, u_int *ack, u_int *is_first, u_int32_t *crc_p) { u_int32_t crc; u_char value; crc = *crc_p; if (sbni_inb(sc, DAT) != SBNI_SIG) return (0); value = sbni_inb(sc, DAT); *framelen = (u_int)value; crc = CRC32(value, crc); value = sbni_inb(sc, DAT); *framelen |= ((u_int)value) << 8; crc = CRC32(value, crc); *ack = *framelen & FRAME_ACK_MASK; *is_first = (*framelen & FRAME_FIRST) != 0; if ((*framelen &= FRAME_LEN_MASK) < 6 || *framelen > SBNI_MAX_FRAME - 3) return (0); value = sbni_inb(sc, DAT); *frameno = (u_int)value; crc = CRC32(value, crc); crc = CRC32(sbni_inb(sc, DAT), crc); /* reserved byte */ *framelen -= 2; *crc_p = crc; return (1); } static int get_rx_buf(struct sbni_softc *sc) { struct mbuf *m; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_printf(sc->ifp, "cannot allocate header mbuf\n"); return (0); } /* * We always put the received packet in a single buffer - * either with just an mbuf header or in a cluster attached * to the header. The +2 is to compensate for the alignment * fixup below. */ if (ETHER_MAX_LEN + 2 > MHLEN) { /* Attach an mbuf cluster */ if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); return (0); } } m->m_pkthdr.len = m->m_len = ETHER_MAX_LEN + 2; /* * The +2 is to longword align the start of the real packet. * (sizeof ether_header == 14) * This is important for NFS. */ m_adj(m, 2); sc->rx_buf_p = m; return (1); } static void indicate_pkt(struct sbni_softc *sc) { struct ifnet *ifp = sc->ifp; struct mbuf *m; m = sc->rx_buf_p; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = sc->inppos; sc->rx_buf_p = NULL; SBNI_UNLOCK(sc); (*ifp->if_input)(ifp, m); SBNI_LOCK(sc); } /* -------------------------------------------------------------------------- */ /* * Routine checks periodically wire activity and regenerates marker if * connect was inactive for a long time. */ static void sbni_timeout(void *xsc) { struct sbni_softc *sc; u_char csr0; sc = (struct sbni_softc *)xsc; SBNI_ASSERT_LOCKED(sc); csr0 = sbni_inb(sc, CSR0); if (csr0 & RC_CHK) { if (sc->timer_ticks) { if (csr0 & (RC_RDY | BU_EMP)) /* receiving not active */ sc->timer_ticks--; } else { sc->in_stats.timeout_number++; if (sc->delta_rxl) timeout_change_level(sc); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); csr0 = sbni_inb(sc, CSR0); } } sbni_outb(sc, CSR0, csr0 | RC_CHK); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); } /* -------------------------------------------------------------------------- */ static void card_start(struct sbni_softc *sc) { sc->timer_ticks = CHANGE_LEVEL_START_TICKS; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->state |= FL_PREV_OK; sc->inppos = 0; sc->wait_frameno = 0; sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); sbni_outb(sc, CSR0, EN_INT); } /* -------------------------------------------------------------------------- */ static u_char rxl_tab[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f }; #define SIZE_OF_TIMEOUT_RXL_TAB 4 static u_char timeout_rxl_tab[] = { 0x03, 0x05, 0x08, 0x0b }; static void set_initial_values(struct sbni_softc *sc, struct sbni_flags flags) { if (flags.fixed_rxl) { sc->delta_rxl = 0; /* disable receive level autodetection */ sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->maxframe = DEFAULT_FRAME_LEN; /* * generate Ethernet address (0x00ff01xxxxxx) */ *(u_int16_t *) sc->enaddr = htons(0x00ff); if (flags.mac_addr) { *(u_int32_t *) (sc->enaddr + 2) = htonl(flags.mac_addr | 0x01000000); } else { *(u_char *) (sc->enaddr + 2) = 0x01; read_random(sc->enaddr + 3, 3); } } #ifdef SBNI_DUAL_COMPOUND void sbni_add(struct sbni_softc *sc) { mtx_lock(&headlist_lock); sc->link = sbni_headlist; sbni_headlist = sc; mtx_unlock(&headlist_lock); } struct sbni_softc * connect_to_master(struct sbni_softc *sc) { struct sbni_softc *p, *p_prev; mtx_lock(&headlist_lock); for (p = sbni_headlist, p_prev = NULL; p; p_prev = p, p = p->link) { if (rman_get_start(p->io_res) == rman_get_start(sc->io_res) + 4 || rman_get_start(p->io_res) == rman_get_start(sc->io_res) - 4) { p->slave_sc = sc; if (p_prev) p_prev->link = p->link; else sbni_headlist = p->link; mtx_unlock(&headlist_lock); return p; } } mtx_unlock(&headlist_lock); return (NULL); } #endif /* SBNI_DUAL_COMPOUND */ /* Receive level auto-selection */ static void change_level(struct sbni_softc *sc) { if (sc->delta_rxl == 0) /* do not auto-negotiate RxL */ return; if (sc->cur_rxl_index == 0) sc->delta_rxl = 1; else if (sc->cur_rxl_index == 15) sc->delta_rxl = -1; else if (sc->cur_rxl_rcvd < sc->prev_rxl_rcvd) sc->delta_rxl = -sc->delta_rxl; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index += sc->delta_rxl]; sbni_inb(sc, CSR0); /* it needed for PCI cards */ sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } static void timeout_change_level(struct sbni_softc *sc) { sc->cur_rxl_index = timeout_rxl_tab[sc->timeout_rxl]; if (++sc->timeout_rxl >= 4) sc->timeout_rxl = 0; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sbni_inb(sc, CSR0); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } /* -------------------------------------------------------------------------- */ /* * Process an ioctl request. This code needs some work - it looks * pretty ugly. */ static int sbni_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct sbni_softc *sc; struct ifreq *ifr; struct thread *td; struct sbni_in_stats *in_stats; struct sbni_flags flags; int error; sc = ifp->if_softc; ifr = (struct ifreq *)data; td = curthread; error = 0; switch (command) { case SIOCSIFFLAGS: /* * If the interface is marked up and stopped, then start it. * If it is marked down and running, then stop it. */ SBNI_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) sbni_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { sbni_stop(sc); } } SBNI_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * Multicast list has changed; set the hardware filter * accordingly. */ error = 0; /* if (ifr == NULL) error = EAFNOSUPPORT; */ break; /* * SBNI specific ioctl */ case SIOCGHWFLAGS: /* get flags */ SBNI_LOCK(sc); bcopy((caddr_t)IF_LLADDR(sc->ifp)+3, (caddr_t) &flags, 3); flags.rxl = sc->cur_rxl_index; flags.rate = sc->csr1.rate; flags.fixed_rxl = (sc->delta_rxl == 0); flags.fixed_rate = 1; SBNI_UNLOCK(sc); bcopy(&flags, &ifr->ifr_ifru, sizeof(flags)); break; case SIOCGINSTATS: in_stats = malloc(sizeof(struct sbni_in_stats), M_DEVBUF, M_WAITOK); SBNI_LOCK(sc); bcopy(&sc->in_stats, in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); error = copyout(in_stats, ifr_data_get_ptr(ifr), sizeof(struct sbni_in_stats)); free(in_stats, M_DEVBUF); break; case SIOCSHWFLAGS: /* set flags */ /* root only */ error = priv_check(td, PRIV_DRIVER); if (error) break; bcopy(&ifr->ifr_ifru, &flags, sizeof(flags)); SBNI_LOCK(sc); if (flags.fixed_rxl) { sc->delta_rxl = 0; sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; if (flags.mac_addr) bcopy((caddr_t) &flags, (caddr_t) IF_LLADDR(sc->ifp)+3, 3); /* Don't be afraid... */ sbni_outb(sc, CSR1, *(char*)(&sc->csr1) | PR_RES); SBNI_UNLOCK(sc); break; case SIOCRINSTATS: SBNI_LOCK(sc); if (!(error = priv_check(td, PRIV_DRIVER))) /* root only */ bzero(&sc->in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* -------------------------------------------------------------------------- */ static u_int32_t calc_crc32(u_int32_t crc, caddr_t p, u_int len) { while (len--) crc = CRC32(*p++, crc); return (crc); } static u_int32_t crc32tab[] __aligned(8) = { 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 }; Index: head/sys/dev/xen/netback/netback.c =================================================================== --- head/sys/dev/xen/netback/netback.c (revision 357009) +++ head/sys/dev/xen/netback/netback.c (revision 357010) @@ -1,2515 +1,2515 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009-2011 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * Alan Somers (Spectra Logic Corporation) * John Suykerbuyk (Spectra Logic Corporation) */ #include __FBSDID("$FreeBSD$"); /** * \file netback.c * * \brief Device driver supporting the vending of network access * from this FreeBSD domain to other domains. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if __FreeBSD_version >= 700000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include /*--------------------------- Compile-time Tunables --------------------------*/ /*---------------------------------- Macros ----------------------------------*/ /** * Custom malloc type for all driver allocations. */ static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data"); #define XNB_SG 1 /* netback driver supports feature-sg */ #define XNB_GSO_TCPV4 0 /* netback driver supports feature-gso-tcpv4 */ #define XNB_RX_COPY 1 /* netback driver supports feature-rx-copy */ #define XNB_RX_FLIP 0 /* netback driver does not support feature-rx-flip */ #undef XNB_DEBUG #define XNB_DEBUG /* hardcode on during development */ #ifdef XNB_DEBUG #define DPRINTF(fmt, args...) \ printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) do {} while (0) #endif /* Default length for stack-allocated grant tables */ #define GNTTAB_LEN (64) /* Features supported by all backends. TSO and LRO can be negotiated */ #define XNB_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) /** * Two argument version of the standard macro. Second argument is a tentative * value of req_cons */ #define RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({ \ unsigned int req = (_r)->sring->req_prod - cons; \ unsigned int rsp = RING_SIZE(_r) - \ (cons - (_r)->rsp_prod_pvt); \ req < rsp ? req : rsp; \ }) #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) #define virt_to_offset(x) ((x) & (PAGE_SIZE - 1)) /** * Predefined array type of grant table copy descriptors. Used to pass around * statically allocated memory structures. */ typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN]; /*--------------------------- Forward Declarations ---------------------------*/ struct xnb_softc; struct xnb_pkt; static void xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...) __printflike(3,4); static int xnb_shutdown(struct xnb_softc *xnb); static int create_netdev(device_t dev); static int xnb_detach(device_t dev); static int xnb_ifmedia_upd(struct ifnet *ifp); static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static void xnb_intr(void *arg); static int xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend, const struct mbuf *mbufc, gnttab_copy_table gnttab); static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc, struct ifnet *ifnet, gnttab_copy_table gnttab); static int xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring, RING_IDX start); static void xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring, int error); static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp); static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt, struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb, domid_t otherend_id); static void xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab, int n_entries); static int xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt, RING_IDX start, int space); static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb, domid_t otherend_id); static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab, int n_entries, netif_rx_back_ring_t *ring); static void xnb_stop(struct xnb_softc*); static int xnb_ioctl(struct ifnet*, u_long, caddr_t); static void xnb_start_locked(struct ifnet*); static void xnb_start(struct ifnet*); static void xnb_ifinit_locked(struct xnb_softc*); static void xnb_ifinit(void*); #ifdef XNB_DEBUG static int xnb_unit_test_main(SYSCTL_HANDLER_ARGS); static int xnb_dump_rings(SYSCTL_HANDLER_ARGS); #endif #if defined(INET) || defined(INET6) static void xnb_add_mbuf_cksum(struct mbuf *mbufc); #endif /*------------------------------ Data Structures -----------------------------*/ /** * Representation of a xennet packet. Simplified version of a packet as * stored in the Xen tx ring. Applicable to both RX and TX packets */ struct xnb_pkt{ /** * Array index of the first data-bearing (eg, not extra info) entry * for this packet */ RING_IDX car; /** * Array index of the second data-bearing entry for this packet. * Invalid if the packet has only one data-bearing entry. If the * packet has more than two data-bearing entries, then the second * through the last will be sequential modulo the ring size */ RING_IDX cdr; /** * Optional extra info. Only valid if flags contains * NETTXF_extra_info. Note that extra.type will always be * XEN_NETIF_EXTRA_TYPE_GSO. Currently, no known netfront or netback * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_* */ netif_extra_info_t extra; /** Size of entire packet in bytes. */ uint16_t size; /** The size of the first entry's data in bytes */ uint16_t car_size; /** * Either NETTXF_ or NETRXF_ flags. Note that the flag values are * not the same for TX and RX packets */ uint16_t flags; /** * The number of valid data-bearing entries (either netif_tx_request's * or netif_rx_response's) in the packet. If this is 0, it means the * entire packet is invalid. */ uint16_t list_len; /** There was an error processing the packet */ uint8_t error; }; /** xnb_pkt method: initialize it */ static inline void xnb_pkt_initialize(struct xnb_pkt *pxnb) { bzero(pxnb, sizeof(*pxnb)); } /** xnb_pkt method: mark the packet as valid */ static inline void xnb_pkt_validate(struct xnb_pkt *pxnb) { pxnb->error = 0; }; /** xnb_pkt method: mark the packet as invalid */ static inline void xnb_pkt_invalidate(struct xnb_pkt *pxnb) { pxnb->error = 1; }; /** xnb_pkt method: Check whether the packet is valid */ static inline int xnb_pkt_is_valid(const struct xnb_pkt *pxnb) { return (! pxnb->error); } #ifdef XNB_DEBUG /** xnb_pkt method: print the packet's contents in human-readable format*/ static void __unused xnb_dump_pkt(const struct xnb_pkt *pkt) { if (pkt == NULL) { DPRINTF("Was passed a null pointer.\n"); return; } DPRINTF("pkt address= %p\n", pkt); DPRINTF("pkt->size=%d\n", pkt->size); DPRINTF("pkt->car_size=%d\n", pkt->car_size); DPRINTF("pkt->flags=0x%04x\n", pkt->flags); DPRINTF("pkt->list_len=%d\n", pkt->list_len); /* DPRINTF("pkt->extra"); TODO */ DPRINTF("pkt->car=%d\n", pkt->car); DPRINTF("pkt->cdr=%d\n", pkt->cdr); DPRINTF("pkt->error=%d\n", pkt->error); } #endif /* XNB_DEBUG */ static void xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq) { if (txreq != NULL) { DPRINTF("netif_tx_request index =%u\n", idx); DPRINTF("netif_tx_request.gref =%u\n", txreq->gref); DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset); DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags); DPRINTF("netif_tx_request.id =%hu\n", txreq->id); DPRINTF("netif_tx_request.size =%hu\n", txreq->size); } } /** * \brief Configuration data for a shared memory request ring * used to communicate with the front-end client of this * this driver. */ struct xnb_ring_config { /** * Runtime structures for ring access. Unfortunately, TX and RX rings * use different data structures, and that cannot be changed since it * is part of the interdomain protocol. */ union{ netif_rx_back_ring_t rx_ring; netif_tx_back_ring_t tx_ring; } back_ring; /** * The device bus address returned by the hypervisor when * mapping the ring and required to unmap it when a connection * is torn down. */ uint64_t bus_addr; /** The pseudo-physical address where ring memory is mapped.*/ uint64_t gnt_addr; /** KVA address where ring memory is mapped. */ vm_offset_t va; /** * Grant table handles, one per-ring page, returned by the * hyperpervisor upon mapping of the ring and required to * unmap it when a connection is torn down. */ grant_handle_t handle; /** The number of ring pages mapped for the current connection. */ unsigned ring_pages; /** * The grant references, one per-ring page, supplied by the * front-end, allowing us to reference the ring pages in the * front-end's domain and to map these pages into our own domain. */ grant_ref_t ring_ref; }; /** * Per-instance connection state flags. */ typedef enum { /** Communication with the front-end has been established. */ XNBF_RING_CONNECTED = 0x01, /** * Front-end requests exist in the ring and are waiting for * xnb_xen_req objects to free up. */ XNBF_RESOURCE_SHORTAGE = 0x02, /** Connection teardown has started. */ XNBF_SHUTDOWN = 0x04, /** A thread is already performing shutdown processing. */ XNBF_IN_SHUTDOWN = 0x08 } xnb_flag_t; /** * Types of rings. Used for array indices and to identify a ring's control * data structure type */ typedef enum{ XNB_RING_TYPE_TX = 0, /* ID of TX rings, used for array indices */ XNB_RING_TYPE_RX = 1, /* ID of RX rings, used for array indices */ XNB_NUM_RING_TYPES } xnb_ring_type_t; /** * Per-instance configuration data. */ struct xnb_softc { /** NewBus device corresponding to this instance. */ device_t dev; /* Media related fields */ /** Generic network media state */ struct ifmedia sc_media; /** Media carrier info */ struct ifnet *xnb_ifp; /** Our own private carrier state */ unsigned carrier; /** Device MAC Address */ uint8_t mac[ETHER_ADDR_LEN]; /* Xen related fields */ /** * \brief The netif protocol abi in effect. * * There are situations where the back and front ends can * have a different, native abi (e.g. intel x86_64 and * 32bit x86 domains on the same machine). The back-end * always accommodates the front-end's native abi. That * value is pulled from the XenStore and recorded here. */ int abi; /** * Name of the bridge to which this VIF is connected, if any * This field is dynamically allocated by xenbus and must be free()ed * when no longer needed */ char *bridge; /** The interrupt driven even channel used to signal ring events. */ evtchn_port_t evtchn; /** Xen device handle.*/ long handle; /** Handle to the communication ring event channel. */ xen_intr_handle_t xen_intr_handle; /** * \brief Cached value of the front-end's domain id. * * This value is used at once for each mapped page in * a transaction. We cache it to avoid incuring the * cost of an ivar access every time this is needed. */ domid_t otherend_id; /** * Undocumented frontend feature. Has something to do with * scatter/gather IO */ uint8_t can_sg; /** Undocumented frontend feature */ uint8_t gso; /** Undocumented frontend feature */ uint8_t gso_prefix; /** Can checksum TCP/UDP over IPv4 */ uint8_t ip_csum; /* Implementation related fields */ /** * Preallocated grant table copy descriptor for RX operations. * Access must be protected by rx_lock */ gnttab_copy_table rx_gnttab; /** * Preallocated grant table copy descriptor for TX operations. * Access must be protected by tx_lock */ gnttab_copy_table tx_gnttab; /** * Resource representing allocated physical address space * associated with our per-instance kva region. */ struct resource *pseudo_phys_res; /** Resource id for allocated physical address space. */ int pseudo_phys_res_id; /** Ring mapping and interrupt configuration data. */ struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES]; /** * Global pool of kva used for mapping remote domain ring * and I/O transaction data. */ vm_offset_t kva; /** Pseudo-physical address corresponding to kva. */ uint64_t gnt_base_addr; /** Various configuration and state bit flags. */ xnb_flag_t flags; /** Mutex protecting per-instance data in the receive path. */ struct mtx rx_lock; /** Mutex protecting per-instance data in the softc structure. */ struct mtx sc_lock; /** Mutex protecting per-instance data in the transmit path. */ struct mtx tx_lock; /** The size of the global kva pool. */ int kva_size; /** Name of the interface */ char if_name[IFNAMSIZ]; }; /*---------------------------- Debugging functions ---------------------------*/ #ifdef XNB_DEBUG static void __unused xnb_dump_gnttab_copy(const struct gnttab_copy *entry) { if (entry == NULL) { printf("NULL grant table pointer\n"); return; } if (entry->flags & GNTCOPY_dest_gref) printf("gnttab dest ref=\t%u\n", entry->dest.u.ref); else printf("gnttab dest gmfn=\t%"PRI_xen_pfn"\n", entry->dest.u.gmfn); printf("gnttab dest offset=\t%hu\n", entry->dest.offset); printf("gnttab dest domid=\t%hu\n", entry->dest.domid); if (entry->flags & GNTCOPY_source_gref) printf("gnttab source ref=\t%u\n", entry->source.u.ref); else printf("gnttab source gmfn=\t%"PRI_xen_pfn"\n", entry->source.u.gmfn); printf("gnttab source offset=\t%hu\n", entry->source.offset); printf("gnttab source domid=\t%hu\n", entry->source.domid); printf("gnttab len=\t%hu\n", entry->len); printf("gnttab flags=\t%hu\n", entry->flags); printf("gnttab status=\t%hd\n", entry->status); } static int xnb_dump_rings(SYSCTL_HANDLER_ARGS) { static char results[720]; struct xnb_softc const* xnb = (struct xnb_softc*)arg1; netif_rx_back_ring_t const* rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; netif_tx_back_ring_t const* txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; /* empty the result strings */ results[0] = 0; if ( !txb || !txb->sring || !rxb || !rxb->sring ) return (SYSCTL_OUT(req, results, strnlen(results, 720))); snprintf(results, 720, "\n\t%35s %18s\n" /* TX, RX */ "\t%16s %18d %18d\n" /* req_cons */ "\t%16s %18d %18d\n" /* nr_ents */ "\t%16s %18d %18d\n" /* rsp_prod_pvt */ "\t%16s %18p %18p\n" /* sring */ "\t%16s %18d %18d\n" /* req_prod */ "\t%16s %18d %18d\n" /* req_event */ "\t%16s %18d %18d\n" /* rsp_prod */ "\t%16s %18d %18d\n", /* rsp_event */ "TX", "RX", "req_cons", txb->req_cons, rxb->req_cons, "nr_ents", txb->nr_ents, rxb->nr_ents, "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt, "sring", txb->sring, rxb->sring, "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod, "sring->req_event", txb->sring->req_event, rxb->sring->req_event, "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod, "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event); return (SYSCTL_OUT(req, results, strnlen(results, 720))); } static void __unused xnb_dump_mbuf(const struct mbuf *m) { int len; uint8_t *d; if (m == NULL) return; printf("xnb_dump_mbuf:\n"); if (m->m_flags & M_PKTHDR) { printf(" flowid=%10d, csum_flags=%#8x, csum_data=%#8x, " "tso_segsz=%5hd\n", m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags, m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz); printf(" rcvif=%16p, len=%19d\n", m->m_pkthdr.rcvif, m->m_pkthdr.len); } printf(" m_next=%16p, m_nextpk=%16p, m_data=%16p\n", m->m_next, m->m_nextpkt, m->m_data); printf(" m_len=%17d, m_flags=%#15x, m_type=%18u\n", m->m_len, m->m_flags, m->m_type); len = m->m_len; d = mtod(m, uint8_t*); while (len > 0) { int i; printf(" "); for (i = 0; (i < 16) && (len > 0); i++, len--) { printf("%02hhx ", *(d++)); } printf("\n"); } } #endif /* XNB_DEBUG */ /*------------------------ Inter-Domain Communication ------------------------*/ /** * Free dynamically allocated KVA or pseudo-physical address allocations. * * \param xnb Per-instance xnb configuration structure. */ static void xnb_free_communication_mem(struct xnb_softc *xnb) { if (xnb->kva != 0) { if (xnb->pseudo_phys_res != NULL) { xenmem_free(xnb->dev, xnb->pseudo_phys_res_id, xnb->pseudo_phys_res); xnb->pseudo_phys_res = NULL; } } xnb->kva = 0; xnb->gnt_base_addr = 0; } /** * Cleanup all inter-domain communication mechanisms. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_disconnect(struct xnb_softc *xnb) { struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES]; int error; int i; if (xnb->xen_intr_handle != NULL) xen_intr_unbind(&xnb->xen_intr_handle); /* * We may still have another thread currently processing requests. We * must acquire the rx and tx locks to make sure those threads are done, * but we can release those locks as soon as we acquire them, because no * more interrupts will be arriving. */ mtx_lock(&xnb->tx_lock); mtx_unlock(&xnb->tx_lock); mtx_lock(&xnb->rx_lock); mtx_unlock(&xnb->rx_lock); mtx_lock(&xnb->sc_lock); /* Free malloc'd softc member variables */ if (xnb->bridge != NULL) { free(xnb->bridge, M_XENSTORE); xnb->bridge = NULL; } /* All request processing has stopped, so unmap the rings */ for (i=0; i < XNB_NUM_RING_TYPES; i++) { gnts[i].host_addr = xnb->ring_configs[i].gnt_addr; gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr; gnts[i].handle = xnb->ring_configs[i].handle; } error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts, XNB_NUM_RING_TYPES); KASSERT(error == 0, ("Grant table unmap op failed (%d)", error)); xnb_free_communication_mem(xnb); /* * Zero the ring config structs because the pointers, handles, and * grant refs contained therein are no longer valid. */ bzero(&xnb->ring_configs[XNB_RING_TYPE_TX], sizeof(struct xnb_ring_config)); bzero(&xnb->ring_configs[XNB_RING_TYPE_RX], sizeof(struct xnb_ring_config)); xnb->flags &= ~XNBF_RING_CONNECTED; mtx_unlock(&xnb->sc_lock); return (0); } /** * Map a single shared memory ring into domain local address space and * initialize its control structure * * \param xnb Per-instance xnb configuration structure * \param ring_type Array index of this ring in the xnb's array of rings * \return An errno */ static int xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type) { struct gnttab_map_grant_ref gnt; struct xnb_ring_config *ring = &xnb->ring_configs[ring_type]; int error; /* TX ring type = 0, RX =1 */ ring->va = xnb->kva + ring_type * PAGE_SIZE; ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE; gnt.host_addr = ring->gnt_addr; gnt.flags = GNTMAP_host_map; gnt.ref = ring->ring_ref; gnt.dom = xnb->otherend_id; error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1); if (error != 0) panic("netback: Ring page grant table op failed (%d)", error); if (gnt.status != 0) { ring->va = 0; error = EACCES; xenbus_dev_fatal(xnb->dev, error, "Ring shared page mapping failed. " "Status %d.", gnt.status); } else { ring->handle = gnt.handle; ring->bus_addr = gnt.dev_bus_addr; if (ring_type == XNB_RING_TYPE_TX) { BACK_RING_INIT(&ring->back_ring.tx_ring, (netif_tx_sring_t*)ring->va, ring->ring_pages * PAGE_SIZE); } else if (ring_type == XNB_RING_TYPE_RX) { BACK_RING_INIT(&ring->back_ring.rx_ring, (netif_rx_sring_t*)ring->va, ring->ring_pages * PAGE_SIZE); } else { xenbus_dev_fatal(xnb->dev, error, "Unknown ring type %d", ring_type); } } return error; } /** * Setup the shared memory rings and bind an interrupt to the event channel * used to notify us of ring changes. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_connect_comms(struct xnb_softc *xnb) { int error; xnb_ring_type_t i; if ((xnb->flags & XNBF_RING_CONNECTED) != 0) return (0); /* * Kva for our rings are at the tail of the region of kva allocated * by xnb_alloc_communication_mem(). */ for (i=0; i < XNB_NUM_RING_TYPES; i++) { error = xnb_connect_ring(xnb, i); if (error != 0) return error; } xnb->flags |= XNBF_RING_CONNECTED; error = xen_intr_bind_remote_port(xnb->dev, xnb->otherend_id, xnb->evtchn, /*filter*/NULL, xnb_intr, /*arg*/xnb, - INTR_TYPE_BIO | INTR_MPSAFE, + INTR_TYPE_NET | INTR_MPSAFE, &xnb->xen_intr_handle); if (error != 0) { (void)xnb_disconnect(xnb); xenbus_dev_fatal(xnb->dev, error, "binding event channel"); return (error); } DPRINTF("rings connected!\n"); return (0); } /** * Size KVA and pseudo-physical address allocations based on negotiated * values for the size and number of I/O requests, and the size of our * communication ring. * * \param xnb Per-instance xnb configuration structure. * * These address spaces are used to dynamically map pages in the * front-end's domain into our own. */ static int xnb_alloc_communication_mem(struct xnb_softc *xnb) { xnb_ring_type_t i; xnb->kva_size = 0; for (i=0; i < XNB_NUM_RING_TYPES; i++) { xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE; } /* * Reserve a range of pseudo physical memory that we can map * into kva. These pages will only be backed by machine * pages ("real memory") during the lifetime of front-end requests * via grant table operations. We will map the netif tx and rx rings * into this space. */ xnb->pseudo_phys_res_id = 0; xnb->pseudo_phys_res = xenmem_alloc(xnb->dev, &xnb->pseudo_phys_res_id, xnb->kva_size); if (xnb->pseudo_phys_res == NULL) { xnb->kva = 0; return (ENOMEM); } xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res); xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res); return (0); } /** * Collect information from the XenStore related to our device and its frontend * * \param xnb Per-instance xnb configuration structure. */ static int xnb_collect_xenstore_info(struct xnb_softc *xnb) { /** * \todo Linux collects the following info. We should collect most * of this, too: * "feature-rx-notify" */ const char *otherend_path; const char *our_path; int err; unsigned int rx_copy, bridge_len; uint8_t no_csum_offload; otherend_path = xenbus_get_otherend_path(xnb->dev); our_path = xenbus_get_node(xnb->dev); /* Collect the critical communication parameters */ err = xs_gather(XST_NIL, otherend_path, "tx-ring-ref", "%l" PRIu32, &xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref, "rx-ring-ref", "%l" PRIu32, &xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref, "event-channel", "%" PRIu32, &xnb->evtchn, NULL); if (err != 0) { xenbus_dev_fatal(xnb->dev, err, "Unable to retrieve ring information from " "frontend %s. Unable to connect.", otherend_path); return (err); } /* Collect the handle from xenstore */ err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle); if (err != 0) { xenbus_dev_fatal(xnb->dev, err, "Error reading handle from frontend %s. " "Unable to connect.", otherend_path); } /* * Collect the bridgename, if any. We do not need bridge_len; we just * throw it away */ err = xs_read(XST_NIL, our_path, "bridge", &bridge_len, (void**)&xnb->bridge); if (err != 0) xnb->bridge = NULL; /* * Does the frontend request that we use rx copy? If not, return an * error because this driver only supports rx copy. */ err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL, "%" PRIu32, &rx_copy); if (err == ENOENT) { err = 0; rx_copy = 0; } if (err < 0) { xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy", otherend_path); return err; } /** * \todo: figure out the exact meaning of this feature, and when * the frontend will set it to true. It should be set to true * at some point */ /* if (!rx_copy)*/ /* return EOPNOTSUPP;*/ /** \todo Collect the rx notify feature */ /* Collect the feature-sg. */ if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL, "%hhu", &xnb->can_sg) < 0) xnb->can_sg = 0; /* Collect remaining frontend features */ if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL, "%hhu", &xnb->gso) < 0) xnb->gso = 0; if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL, "%hhu", &xnb->gso_prefix) < 0) xnb->gso_prefix = 0; if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL, "%hhu", &no_csum_offload) < 0) no_csum_offload = 0; xnb->ip_csum = (no_csum_offload == 0); return (0); } /** * Supply information about the physical device to the frontend * via XenBus. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_publish_backend_info(struct xnb_softc *xnb) { struct xs_transaction xst; const char *our_path; int error; our_path = xenbus_get_node(xnb->dev); do { error = xs_transaction_start(&xst); if (error != 0) { xenbus_dev_fatal(xnb->dev, error, "Error publishing backend info " "(start transaction)"); break; } error = xs_printf(xst, our_path, "feature-sg", "%d", XNB_SG); if (error != 0) break; error = xs_printf(xst, our_path, "feature-gso-tcpv4", "%d", XNB_GSO_TCPV4); if (error != 0) break; error = xs_printf(xst, our_path, "feature-rx-copy", "%d", XNB_RX_COPY); if (error != 0) break; error = xs_printf(xst, our_path, "feature-rx-flip", "%d", XNB_RX_FLIP); if (error != 0) break; error = xs_transaction_end(xst, 0); if (error != 0 && error != EAGAIN) { xenbus_dev_fatal(xnb->dev, error, "ending transaction"); break; } } while (error == EAGAIN); return (error); } /** * Connect to our netfront peer now that it has completed publishing * its configuration into the XenStore. * * \param xnb Per-instance xnb configuration structure. */ static void xnb_connect(struct xnb_softc *xnb) { int error; if (xenbus_get_state(xnb->dev) == XenbusStateConnected) return; if (xnb_collect_xenstore_info(xnb) != 0) return; xnb->flags &= ~XNBF_SHUTDOWN; /* Read front end configuration. */ /* Allocate resources whose size depends on front-end configuration. */ error = xnb_alloc_communication_mem(xnb); if (error != 0) { xenbus_dev_fatal(xnb->dev, error, "Unable to allocate communication memory"); return; } /* * Connect communication channel. */ error = xnb_connect_comms(xnb); if (error != 0) { /* Specific errors are reported by xnb_connect_comms(). */ return; } xnb->carrier = 1; /* Ready for I/O. */ xenbus_set_state(xnb->dev, XenbusStateConnected); } /*-------------------------- Device Teardown Support -------------------------*/ /** * Perform device shutdown functions. * * \param xnb Per-instance xnb configuration structure. * * Mark this instance as shutting down, wait for any active requests * to drain, disconnect from the front-end, and notify any waiters (e.g. * a thread invoking our detach method) that detach can now proceed. */ static int xnb_shutdown(struct xnb_softc *xnb) { /* * Due to the need to drop our mutex during some * xenbus operations, it is possible for two threads * to attempt to close out shutdown processing at * the same time. Tell the caller that hits this * race to try back later. */ if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0) return (EAGAIN); xnb->flags |= XNBF_SHUTDOWN; xnb->flags |= XNBF_IN_SHUTDOWN; mtx_unlock(&xnb->sc_lock); /* Free the network interface */ xnb->carrier = 0; if (xnb->xnb_ifp != NULL) { ether_ifdetach(xnb->xnb_ifp); if_free(xnb->xnb_ifp); xnb->xnb_ifp = NULL; } xnb_disconnect(xnb); if (xenbus_get_state(xnb->dev) < XenbusStateClosing) xenbus_set_state(xnb->dev, XenbusStateClosing); mtx_lock(&xnb->sc_lock); xnb->flags &= ~XNBF_IN_SHUTDOWN; /* Indicate to xnb_detach() that is it safe to proceed. */ wakeup(xnb); return (0); } /** * Report an attach time error to the console and Xen, and cleanup * this instance by forcing immediate detach processing. * * \param xnb Per-instance xnb configuration structure. * \param err Errno describing the error. * \param fmt Printf style format and arguments */ static void xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...) { va_list ap; va_list ap_hotplug; va_start(ap, fmt); va_copy(ap_hotplug, ap); xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-error", fmt, ap_hotplug); va_end(ap_hotplug); (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-status", "error"); xenbus_dev_vfatal(xnb->dev, err, fmt, ap); va_end(ap); (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "online", "0"); xnb_detach(xnb->dev); } /*---------------------------- NewBus Entrypoints ----------------------------*/ /** * Inspect a XenBus device and claim it if is of the appropriate type. * * \param dev NewBus device object representing a candidate XenBus device. * * \return 0 for success, errno codes for failure. */ static int xnb_probe(device_t dev) { if (!strcmp(xenbus_get_type(dev), "vif")) { DPRINTF("Claiming device %d, %s\n", device_get_unit(dev), devclass_get_name(device_get_devclass(dev))); device_set_desc(dev, "Backend Virtual Network Device"); device_quiet(dev); return (0); } return (ENXIO); } /** * Setup sysctl variables to control various Network Back parameters. * * \param xnb Xen Net Back softc. * */ static void xnb_setup_sysctl(struct xnb_softc *xnb) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; sysctl_ctx = device_get_sysctl_ctx(xnb->dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xnb->dev); if (sysctl_tree == NULL) return; #ifdef XNB_DEBUG SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "unit_test_results", CTLTYPE_STRING | CTLFLAG_RD, xnb, 0, xnb_unit_test_main, "A", "Results of builtin unit tests"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "dump_rings", CTLTYPE_STRING | CTLFLAG_RD, xnb, 0, xnb_dump_rings, "A", "Xennet Back Rings"); #endif /* XNB_DEBUG */ } /** * Create a network device. * @param handle device handle */ int create_netdev(device_t dev) { struct ifnet *ifp; struct xnb_softc *xnb; int err = 0; uint32_t handle; xnb = device_get_softc(dev); mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF); mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF); mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF); xnb->dev = dev; ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts); ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL); /* * Set the MAC address to a dummy value (00:00:00:00:00), * if the MAC address of the host-facing interface is set * to the same as the guest-facing one (the value found in * xenstore), the bridge would stop delivering packets to * us because it would see that the destination address of * the packet is the same as the interface, and so the bridge * would expect the packet has already been delivered locally * (and just drop it). */ bzero(&xnb->mac[0], sizeof(xnb->mac)); /* The interface will be named using the following nomenclature: * * xnb. * * Where handle is the oder of the interface referred to the guest. */ err = xs_scanf(XST_NIL, xenbus_get_node(xnb->dev), "handle", NULL, "%" PRIu32, &handle); if (err != 0) return (err); snprintf(xnb->if_name, IFNAMSIZ, "xnb%" PRIu16 ".%" PRIu32, xenbus_get_otherend_id(dev), handle); if (err == 0) { /* Set up ifnet structure */ ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER); ifp->if_softc = xnb; if_initname(ifp, xnb->if_name, IF_DUNIT_NONE); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = xnb_ioctl; ifp->if_start = xnb_start; ifp->if_init = xnb_ifinit; ifp->if_mtu = ETHERMTU; ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1; ifp->if_hwassist = XNB_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_capenable = IFCAP_HWCSUM; ether_ifattach(ifp, xnb->mac); xnb->carrier = 0; } return err; } /** * Attach to a XenBus device that has been claimed by our probe routine. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_attach(device_t dev) { struct xnb_softc *xnb; int error; xnb_ring_type_t i; error = create_netdev(dev); if (error != 0) { xenbus_dev_fatal(dev, error, "creating netdev"); return (error); } DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); /* * Basic initialization. * After this block it is safe to call xnb_detach() * to clean up any allocated data for this instance. */ xnb = device_get_softc(dev); xnb->otherend_id = xenbus_get_otherend_id(dev); for (i=0; i < XNB_NUM_RING_TYPES; i++) { xnb->ring_configs[i].ring_pages = 1; } /* * Setup sysctl variables. */ xnb_setup_sysctl(xnb); /* Update hot-plug status to satisfy xend. */ error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-status", "connected"); if (error != 0) { xnb_attach_failed(xnb, error, "writing %s/hotplug-status", xenbus_get_node(xnb->dev)); return (error); } if ((error = xnb_publish_backend_info(xnb)) != 0) { /* * If we can't publish our data, we cannot participate * in this connection, and waiting for a front-end state * change will not help the situation. */ xnb_attach_failed(xnb, error, "Publishing backend status for %s", xenbus_get_node(xnb->dev)); return error; } /* Tell the front end that we are ready to connect. */ xenbus_set_state(dev, XenbusStateInitWait); return (0); } /** * Detach from a net back device instance. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. * * \note A net back device may be detached at any time in its life-cycle, * including part way through the attach process. For this reason, * initialization order and the initialization state checks in this * routine must be carefully coupled so that attach time failures * are gracefully handled. */ static int xnb_detach(device_t dev) { struct xnb_softc *xnb; DPRINTF("\n"); xnb = device_get_softc(dev); mtx_lock(&xnb->sc_lock); while (xnb_shutdown(xnb) == EAGAIN) { msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0, "xnb_shutdown", 0); } mtx_unlock(&xnb->sc_lock); DPRINTF("\n"); mtx_destroy(&xnb->tx_lock); mtx_destroy(&xnb->rx_lock); mtx_destroy(&xnb->sc_lock); return (0); } /** * Prepare this net back device for suspension of this VM. * * \param dev NewBus device object representing this Xen net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_suspend(device_t dev) { return (0); } /** * Perform any processing required to recover from a suspended state. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_resume(device_t dev) { return (0); } /** * Handle state changes expressed via the XenStore by our front-end peer. * * \param dev NewBus device object representing this Xen * Net Back instance. * \param frontend_state The new state of the front-end. * * \return 0 for success, errno codes for failure. */ static void xnb_frontend_changed(device_t dev, XenbusState frontend_state) { struct xnb_softc *xnb; xnb = device_get_softc(dev); DPRINTF("frontend_state=%s, xnb_state=%s\n", xenbus_strstate(frontend_state), xenbus_strstate(xenbus_get_state(xnb->dev))); switch (frontend_state) { case XenbusStateInitialising: break; case XenbusStateInitialised: case XenbusStateConnected: xnb_connect(xnb); break; case XenbusStateClosing: case XenbusStateClosed: mtx_lock(&xnb->sc_lock); xnb_shutdown(xnb); mtx_unlock(&xnb->sc_lock); if (frontend_state == XenbusStateClosed) xenbus_set_state(xnb->dev, XenbusStateClosed); break; default: xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend", frontend_state); break; } } /*---------------------------- Request Processing ----------------------------*/ /** * Interrupt handler bound to the shared ring's event channel. * Entry point for the xennet transmit path in netback * Transfers packets from the Xen ring to the host's generic networking stack * * \param arg Callback argument registerd during event channel * binding - the xnb_softc for this instance. */ static void xnb_intr(void *arg) { struct xnb_softc *xnb; struct ifnet *ifp; netif_tx_back_ring_t *txb; RING_IDX req_prod_local; xnb = (struct xnb_softc *)arg; ifp = xnb->xnb_ifp; txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; mtx_lock(&xnb->tx_lock); do { int notify; req_prod_local = txb->sring->req_prod; xen_rmb(); for (;;) { struct mbuf *mbufc; int err; err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp, xnb->tx_gnttab); if (err || (mbufc == NULL)) break; /* Send the packet to the generic network stack */ (*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc); } RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify); if (notify != 0) xen_intr_signal(xnb->xen_intr_handle); txb->sring->req_event = txb->req_cons + 1; xen_mb(); } while (txb->sring->req_prod != req_prod_local) ; mtx_unlock(&xnb->tx_lock); xnb_start(ifp); } /** * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring. * Will read exactly 0 or 1 packets from the ring; never a partial packet. * \param[out] pkt The returned packet. If there is an error building * the packet, pkt.list_len will be set to 0. * \param[in] tx_ring Pointer to the Ring that is the input to this function * \param[in] start The ring index of the first potential request * \return The number of requests consumed to build this packet */ static int xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring, RING_IDX start) { /* * Outline: * 1) Initialize pkt * 2) Read the first request of the packet * 3) Read the extras * 4) Set cdr * 5) Loop on the remainder of the packet * 6) Finalize pkt (stuff like car_size and list_len) */ int idx = start; int discard = 0; /* whether to discard the packet */ int more_data = 0; /* there are more request past the last one */ uint16_t cdr_size = 0; /* accumulated size of requests 2 through n */ xnb_pkt_initialize(pkt); /* Read the first request */ if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); pkt->size = tx->size; pkt->flags = tx->flags & ~NETTXF_more_data; more_data = tx->flags & NETTXF_more_data; pkt->list_len++; pkt->car = idx; idx++; } /* Read the extra info */ if ((pkt->flags & NETTXF_extra_info) && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_extra_info_t *ext = (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx); pkt->extra.type = ext->type; switch (pkt->extra.type) { case XEN_NETIF_EXTRA_TYPE_GSO: pkt->extra.u.gso = ext->u.gso; break; default: /* * The reference Linux netfront driver will * never set any other extra.type. So we don't * know what to do with it. Let's print an * error, then consume and discard the packet */ printf("xnb(%s:%d): Unknown extra info type %d." " Discarding packet\n", __func__, __LINE__, pkt->extra.type); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); discard = 1; break; } pkt->extra.flags = ext->flags; if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) { /* * The reference linux netfront driver never sets this * flag (nor does any other known netfront). So we * will discard the packet. */ printf("xnb(%s:%d): Request sets " "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle " "that\n", __func__, __LINE__); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); discard = 1; } idx++; } /* Set cdr. If there is not more data, cdr is invalid */ pkt->cdr = idx; /* Loop on remainder of packet */ while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); pkt->list_len++; cdr_size += tx->size; if (tx->flags & ~NETTXF_more_data) { /* There should be no other flags set at this point */ printf("xnb(%s:%d): Request sets unknown flags %d " "after the 1st request in the packet.\n", __func__, __LINE__, tx->flags); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); } more_data = tx->flags & NETTXF_more_data; idx++; } /* Finalize packet */ if (more_data != 0) { /* The ring ran out of requests before finishing the packet */ xnb_pkt_invalidate(pkt); idx = start; /* tell caller that we consumed no requests */ } else { /* Calculate car_size */ pkt->car_size = pkt->size - cdr_size; } if (discard != 0) { xnb_pkt_invalidate(pkt); } return idx - start; } /** * Respond to all the requests that constituted pkt. Builds the responses and * writes them to the ring, but doesn't push them to the shared ring. * \param[in] pkt the packet that needs a response * \param[in] error true if there was an error handling the packet, such * as in the hypervisor copy op or mbuf allocation * \param[out] ring Responses go here */ static void xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring, int error) { /* * Outline: * 1) Respond to the first request * 2) Respond to the extra info reques * Loop through every remaining request in the packet, generating * responses that copy those requests' ids and sets the status * appropriately. */ netif_tx_request_t *tx; netif_tx_response_t *rsp; int i; uint16_t status; status = (xnb_pkt_is_valid(pkt) == 0) || error ? NETIF_RSP_ERROR : NETIF_RSP_OKAY; KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car), ("Cannot respond to ring requests out of order")); if (pkt->list_len >= 1) { uint16_t id; tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); id = tx->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = status; ring->rsp_prod_pvt++; if (pkt->flags & NETRXF_extra_info) { rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->status = NETIF_RSP_NULL; ring->rsp_prod_pvt++; } } for (i=0; i < pkt->list_len - 1; i++) { uint16_t id; tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); id = tx->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = status; ring->rsp_prod_pvt++; } } /** * Create an mbuf chain to represent a packet. Initializes all of the headers * in the mbuf chain, but does not copy the data. The returned chain must be * free()'d when no longer needed * \param[in] pkt A packet to model the mbuf chain after * \return A newly allocated mbuf chain, possibly with clusters attached. * NULL on failure */ static struct mbuf* xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp) { /** * \todo consider using a memory pool for mbufs instead of * reallocating them for every packet */ /** \todo handle extra data */ struct mbuf *m; m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA); if (m != NULL) { m->m_pkthdr.rcvif = ifp; if (pkt->flags & NETTXF_data_validated) { /* * We lie to the host OS and always tell it that the * checksums are ok, because the packet is unlikely to * get corrupted going across domains. */ m->m_pkthdr.csum_flags = ( CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR ); m->m_pkthdr.csum_data = 0xffff; } } return m; } /** * Build a gnttab_copy table that can be used to copy data from a pkt * to an mbufc. Does not actually perform the copy. Always uses gref's on * the packet side. * \param[in] pkt pkt's associated requests form the src for * the copy operation * \param[in] mbufc mbufc's storage forms the dest for the copy operation * \param[out] gnttab Storage for the returned grant table * \param[in] txb Pointer to the backend ring structure * \param[in] otherend_id The domain ID of the other end of the copy * \return The number of gnttab entries filled */ static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt, struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb, domid_t otherend_id) { struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ int gnt_idx = 0; /* index into grant table */ RING_IDX r_idx = pkt->car; /* index into tx ring buffer */ int r_ofs = 0; /* offset of next data within tx request's data area */ int m_ofs = 0; /* offset of next data within mbuf's data area */ /* size in bytes that still needs to be represented in the table */ uint16_t size_remaining = pkt->size; while (size_remaining > 0) { const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx); const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs; const size_t req_size = r_idx == pkt->car ? pkt->car_size : txq->size; const size_t pkt_space = req_size - r_ofs; /* * space is the largest amount of data that can be copied in the * grant table's next entry */ const size_t space = MIN(pkt_space, mbuf_space); /* TODO: handle this error condition without panicking */ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); gnttab[gnt_idx].source.u.ref = txq->gref; gnttab[gnt_idx].source.domid = otherend_id; gnttab[gnt_idx].source.offset = txq->offset + r_ofs; gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].dest.offset = virt_to_offset( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].dest.domid = DOMID_SELF; gnttab[gnt_idx].len = space; gnttab[gnt_idx].flags = GNTCOPY_source_gref; gnt_idx++; r_ofs += space; m_ofs += space; size_remaining -= space; if (req_size - r_ofs <= 0) { /* Must move to the next tx request */ r_ofs = 0; r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; } if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) { /* Must move to the next mbuf */ m_ofs = 0; mbuf = mbuf->m_next; } } return gnt_idx; } /** * Check the status of the grant copy operations, and update mbufs various * non-data fields to reflect the data present. * \param[in,out] mbufc mbuf chain to update. The chain must be valid and of * the correct length, and data should already be present * \param[in] gnttab A grant table for a just completed copy op * \param[in] n_entries The number of valid entries in the grant table */ static void xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab, int n_entries) { struct mbuf *mbuf = mbufc; int i; size_t total_size = 0; for (i = 0; i < n_entries; i++) { KASSERT(gnttab[i].status == GNTST_okay, ("Some gnttab_copy entry had error status %hd\n", gnttab[i].status)); mbuf->m_len += gnttab[i].len; total_size += gnttab[i].len; if (M_TRAILINGSPACE(mbuf) <= 0) { mbuf = mbuf->m_next; } } mbufc->m_pkthdr.len = total_size; #if defined(INET) || defined(INET6) xnb_add_mbuf_cksum(mbufc); #endif } /** * Dequeue at most one packet from the shared ring * \param[in,out] txb Netif tx ring. A packet will be removed from it, and * its private indices will be updated. But the indices * will not be pushed to the shared ring. * \param[in] ifnet Interface to which the packet will be sent * \param[in] otherend Domain ID of the other end of the ring * \param[out] mbufc The assembled mbuf chain, ready to send to the generic * networking stack * \param[in,out] gnttab Pointer to enough memory for a grant table. We make * this a function parameter so that we will take less * stack space. * \return An error code */ static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc, struct ifnet *ifnet, gnttab_copy_table gnttab) { struct xnb_pkt pkt; /* number of tx requests consumed to build the last packet */ int num_consumed; int nr_ents; *mbufc = NULL; num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons); if (num_consumed == 0) return 0; /* Nothing to receive */ /* update statistics independent of errors */ if_inc_counter(ifnet, IFCOUNTER_IPACKETS, 1); /* * if we got here, then 1 or more requests was consumed, but the packet * is not necessarily valid. */ if (xnb_pkt_is_valid(&pkt) == 0) { /* got a garbage packet, respond and drop it */ xnb_txpkt2rsp(&pkt, txb, 1); txb->req_cons += num_consumed; DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n", num_consumed); if_inc_counter(ifnet, IFCOUNTER_IERRORS, 1); return EINVAL; } *mbufc = xnb_pkt2mbufc(&pkt, ifnet); if (*mbufc == NULL) { /* * Couldn't allocate mbufs. Respond and drop the packet. Do * not consume the requests */ xnb_txpkt2rsp(&pkt, txb, 1); DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n", num_consumed); if_inc_counter(ifnet, IFCOUNTER_IQDROPS, 1); return ENOMEM; } nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend); if (nr_ents > 0) { int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, gnttab, nr_ents); KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n", hv_ret)); xnb_update_mbufc(*mbufc, gnttab, nr_ents); } xnb_txpkt2rsp(&pkt, txb, 0); txb->req_cons += num_consumed; return 0; } /** * Create an xnb_pkt based on the contents of an mbuf chain. * \param[in] mbufc mbuf chain to transform into a packet * \param[out] pkt Storage for the newly generated xnb_pkt * \param[in] start The ring index of the first available slot in the rx * ring * \param[in] space The number of free slots in the rx ring * \retval 0 Success * \retval EINVAL mbufc was corrupt or not convertible into a pkt * \retval EAGAIN There was not enough space in the ring to queue the * packet */ static int xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt, RING_IDX start, int space) { int retval = 0; if ((mbufc == NULL) || ( (mbufc->m_flags & M_PKTHDR) == 0) || (mbufc->m_pkthdr.len == 0)) { xnb_pkt_invalidate(pkt); retval = EINVAL; } else { int slots_required; xnb_pkt_validate(pkt); pkt->flags = 0; pkt->size = mbufc->m_pkthdr.len; pkt->car = start; pkt->car_size = mbufc->m_len; if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) { pkt->flags |= NETRXF_extra_info; pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz; pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; pkt->extra.u.gso.pad = 0; pkt->extra.u.gso.features = 0; pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO; pkt->extra.flags = 0; pkt->cdr = start + 2; } else { pkt->cdr = start + 1; } if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) { pkt->flags |= (NETRXF_csum_blank | NETRXF_data_validated); } /* * Each ring response can have up to PAGE_SIZE of data. * Assume that we can defragment the mbuf chain efficiently * into responses so that each response but the last uses all * PAGE_SIZE bytes. */ pkt->list_len = howmany(pkt->size, PAGE_SIZE); if (pkt->list_len > 1) { pkt->flags |= NETRXF_more_data; } slots_required = pkt->list_len + (pkt->flags & NETRXF_extra_info ? 1 : 0); if (slots_required > space) { xnb_pkt_invalidate(pkt); retval = EAGAIN; } } return retval; } /** * Build a gnttab_copy table that can be used to copy data from an mbuf chain * to the frontend's shared buffers. Does not actually perform the copy. * Always uses gref's on the other end's side. * \param[in] pkt pkt's associated responses form the dest for the copy * operatoin * \param[in] mbufc The source for the copy operation * \param[out] gnttab Storage for the returned grant table * \param[in] rxb Pointer to the backend ring structure * \param[in] otherend_id The domain ID of the other end of the copy * \return The number of gnttab entries filled */ static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb, domid_t otherend_id) { const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ int gnt_idx = 0; /* index into grant table */ RING_IDX r_idx = pkt->car; /* index into rx ring buffer */ int r_ofs = 0; /* offset of next data within rx request's data area */ int m_ofs = 0; /* offset of next data within mbuf's data area */ /* size in bytes that still needs to be represented in the table */ uint16_t size_remaining; size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0; while (size_remaining > 0) { const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx); const size_t mbuf_space = mbuf->m_len - m_ofs; /* Xen shared pages have an implied size of PAGE_SIZE */ const size_t req_size = PAGE_SIZE; const size_t pkt_space = req_size - r_ofs; /* * space is the largest amount of data that can be copied in the * grant table's next entry */ const size_t space = MIN(pkt_space, mbuf_space); /* TODO: handle this error condition without panicing */ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); gnttab[gnt_idx].dest.u.ref = rxq->gref; gnttab[gnt_idx].dest.domid = otherend_id; gnttab[gnt_idx].dest.offset = r_ofs; gnttab[gnt_idx].source.u.gmfn = virt_to_mfn( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].source.offset = virt_to_offset( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].source.domid = DOMID_SELF; gnttab[gnt_idx].len = space; gnttab[gnt_idx].flags = GNTCOPY_dest_gref; gnt_idx++; r_ofs += space; m_ofs += space; size_remaining -= space; if (req_size - r_ofs <= 0) { /* Must move to the next rx request */ r_ofs = 0; r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; } if (mbuf->m_len - m_ofs <= 0) { /* Must move to the next mbuf */ m_ofs = 0; mbuf = mbuf->m_next; } } return gnt_idx; } /** * Generates responses for all the requests that constituted pkt. Builds * responses and writes them to the ring, but doesn't push the shared ring * indices. * \param[in] pkt the packet that needs a response * \param[in] gnttab The grant copy table corresponding to this packet. * Used to determine how many rsp->netif_rx_response_t's to * generate. * \param[in] n_entries Number of relevant entries in the grant table * \param[out] ring Responses go here * \return The number of RX requests that were consumed to generate * the responses */ static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab, int n_entries, netif_rx_back_ring_t *ring) { /* * This code makes the following assumptions: * * All entries in gnttab set GNTCOPY_dest_gref * * The entries in gnttab are grouped by their grefs: any two * entries with the same gref must be adjacent */ int error = 0; int gnt_idx, i; int n_responses = 0; grant_ref_t last_gref = GRANT_REF_INVALID; RING_IDX r_idx; KASSERT(gnttab != NULL, ("Received a null granttable copy")); /* * In the event of an error, we only need to send one response to the * netfront. In that case, we musn't write any data to the responses * after the one we send. So we must loop all the way through gnttab * looking for errors before we generate any responses * * Since we're looping through the grant table anyway, we'll count the * number of different gref's in it, which will tell us how many * responses to generate */ for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) { int16_t status = gnttab[gnt_idx].status; if (status != GNTST_okay) { DPRINTF( "Got error %d for hypervisor gnttab_copy status\n", status); error = 1; break; } if (gnttab[gnt_idx].dest.u.ref != last_gref) { n_responses++; last_gref = gnttab[gnt_idx].dest.u.ref; } } if (error != 0) { uint16_t id; netif_rx_response_t *rsp; id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = NETIF_RSP_ERROR; n_responses = 1; } else { gnt_idx = 0; const int has_extra = pkt->flags & NETRXF_extra_info; if (has_extra != 0) n_responses++; for (i = 0; i < n_responses; i++) { netif_rx_request_t rxq; netif_rx_response_t *rsp; r_idx = ring->rsp_prod_pvt + i; /* * We copy the structure of rxq instead of making a * pointer because it shares the same memory as rsp. */ rxq = *(RING_GET_REQUEST(ring, r_idx)); rsp = RING_GET_RESPONSE(ring, r_idx); if (has_extra && (i == 1)) { netif_extra_info_t *ext = (netif_extra_info_t*)rsp; ext->type = XEN_NETIF_EXTRA_TYPE_GSO; ext->flags = 0; ext->u.gso.size = pkt->extra.u.gso.size; ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; ext->u.gso.pad = 0; ext->u.gso.features = 0; } else { rsp->id = rxq.id; rsp->status = GNTST_okay; rsp->offset = 0; rsp->flags = 0; if (i < pkt->list_len - 1) rsp->flags |= NETRXF_more_data; if ((i == 0) && has_extra) rsp->flags |= NETRXF_extra_info; if ((i == 0) && (pkt->flags & NETRXF_data_validated)) { rsp->flags |= NETRXF_data_validated; rsp->flags |= NETRXF_csum_blank; } rsp->status = 0; for (; gnttab[gnt_idx].dest.u.ref == rxq.gref; gnt_idx++) { rsp->status += gnttab[gnt_idx].len; } } } } ring->req_cons += n_responses; ring->rsp_prod_pvt += n_responses; return n_responses; } #if defined(INET) || defined(INET6) /** * Add IP, TCP, and/or UDP checksums to every mbuf in a chain. The first mbuf * in the chain must start with a struct ether_header. * * XXX This function will perform incorrectly on UDP packets that are split up * into multiple ethernet frames. */ static void xnb_add_mbuf_cksum(struct mbuf *mbufc) { struct ether_header *eh; struct ip *iph; uint16_t ether_type; eh = mtod(mbufc, struct ether_header*); ether_type = ntohs(eh->ether_type); if (ether_type != ETHERTYPE_IP) { /* Nothing to calculate */ return; } iph = (struct ip*)(eh + 1); if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { iph->ip_sum = 0; iph->ip_sum = in_cksum_hdr(iph); } switch (iph->ip_p) { case IPPROTO_TCP: if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip); struct tcphdr *th = (struct tcphdr*)(iph + 1); th->th_sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen)); th->th_sum = in_cksum_skip(mbufc, sizeof(struct ether_header) + ntohs(iph->ip_len), sizeof(struct ether_header) + (iph->ip_hl << 2)); } break; case IPPROTO_UDP: if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip); struct udphdr *uh = (struct udphdr*)(iph + 1); uh->uh_sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen)); uh->uh_sum = in_cksum_skip(mbufc, sizeof(struct ether_header) + ntohs(iph->ip_len), sizeof(struct ether_header) + (iph->ip_hl << 2)); } break; default: break; } } #endif /* INET || INET6 */ static void xnb_stop(struct xnb_softc *xnb) { struct ifnet *ifp; mtx_assert(&xnb->sc_lock, MA_OWNED); ifp = xnb->xnb_ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } static int xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct xnb_softc *xnb = ifp->if_softc; struct ifreq *ifr = (struct ifreq*) data; #ifdef INET struct ifaddr *ifa = (struct ifaddr*)data; #endif int error = 0; switch (cmd) { case SIOCSIFFLAGS: mtx_lock(&xnb->sc_lock); if (ifp->if_flags & IFF_UP) { xnb_ifinit_locked(xnb); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { xnb_stop(xnb); } } /* * Note: netfront sets a variable named xn_if_flags * here, but that variable is never read */ mtx_unlock(&xnb->sc_lock); break; case SIOCSIFADDR: #ifdef INET mtx_lock(&xnb->sc_lock); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } arp_ifinit(ifp, ifa); mtx_unlock(&xnb->sc_lock); } else { mtx_unlock(&xnb->sc_lock); #endif error = ether_ioctl(ifp, cmd, data); #ifdef INET } #endif break; case SIOCSIFCAP: mtx_lock(&xnb->sc_lock); if (ifr->ifr_reqcap & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= XNB_CSUM_FEATURES; } else { ifp->if_capenable &= ~(IFCAP_TXCSUM); ifp->if_hwassist &= ~(XNB_CSUM_FEATURES); } if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) { ifp->if_capenable |= IFCAP_RXCSUM; } else { ifp->if_capenable &= ~(IFCAP_RXCSUM); } /* * TODO enable TSO4 and LRO once we no longer need * to calculate checksums in software */ #if 0 if (ifr->if_reqcap |= IFCAP_TSO4) { if (IFCAP_TXCSUM & ifp->if_capenable) { printf("xnb: Xen netif requires that " "TXCSUM be enabled in order " "to use TSO4\n"); error = EINVAL; } else { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } } else { ifp->if_capenable &= ~(IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TSO); } if (ifr->ifreqcap |= IFCAP_LRO) { ifp->if_capenable |= IFCAP_LRO; } else { ifp->if_capenable &= ~(IFCAP_LRO); } #endif mtx_unlock(&xnb->sc_lock); break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xnb_ifinit(xnb); break; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void xnb_start_locked(struct ifnet *ifp) { netif_rx_back_ring_t *rxb; struct xnb_softc *xnb; struct mbuf *mbufc; RING_IDX req_prod_local; xnb = ifp->if_softc; rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; if (!xnb->carrier) return; do { int out_of_space = 0; int notify; req_prod_local = rxb->sring->req_prod; xen_rmb(); for (;;) { int error; IF_DEQUEUE(&ifp->if_snd, mbufc); if (mbufc == NULL) break; error = xnb_send(rxb, xnb->otherend_id, mbufc, xnb->rx_gnttab); switch (error) { case EAGAIN: /* * Insufficient space in the ring. * Requeue pkt and send when space is * available. */ IF_PREPEND(&ifp->if_snd, mbufc); /* * Perhaps the frontend missed an IRQ * and went to sleep. Notify it to wake * it up. */ out_of_space = 1; break; case EINVAL: /* OS gave a corrupt packet. Drop it.*/ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); /* FALLTHROUGH */ default: /* Send succeeded, or packet had error. * Free the packet */ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (mbufc) m_freem(mbufc); break; } if (out_of_space != 0) break; } RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify); if ((notify != 0) || (out_of_space != 0)) xen_intr_signal(xnb->xen_intr_handle); rxb->sring->req_event = req_prod_local + 1; xen_mb(); } while (rxb->sring->req_prod != req_prod_local) ; } /** * Sends one packet to the ring. Blocks until the packet is on the ring * \param[in] mbufc Contains one packet to send. Caller must free * \param[in,out] rxb The packet will be pushed onto this ring, but the * otherend will not be notified. * \param[in] otherend The domain ID of the other end of the connection * \retval EAGAIN The ring did not have enough space for the packet. * The ring has not been modified * \param[in,out] gnttab Pointer to enough memory for a grant table. We make * this a function parameter so that we will take less * stack space. * \retval EINVAL mbufc was corrupt or not convertible into a pkt */ static int xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc, gnttab_copy_table gnttab) { struct xnb_pkt pkt; int error, n_entries, n_reqs; RING_IDX space; space = ring->sring->req_prod - ring->req_cons; error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space); if (error != 0) return error; n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend); if (n_entries != 0) { int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, gnttab, n_entries); KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n", hv_ret)); } n_reqs = xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring); return 0; } static void xnb_start(struct ifnet *ifp) { struct xnb_softc *xnb; xnb = ifp->if_softc; mtx_lock(&xnb->rx_lock); xnb_start_locked(ifp); mtx_unlock(&xnb->rx_lock); } /* equivalent of network_open() in Linux */ static void xnb_ifinit_locked(struct xnb_softc *xnb) { struct ifnet *ifp; ifp = xnb->xnb_ifp; mtx_assert(&xnb->sc_lock, MA_OWNED); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; xnb_stop(xnb); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } static void xnb_ifinit(void *xsc) { struct xnb_softc *xnb = xsc; mtx_lock(&xnb->sc_lock); xnb_ifinit_locked(xnb); mtx_unlock(&xnb->sc_lock); } /** * Callback used by the generic networking code to tell us when our carrier * state has changed. Since we don't have a physical carrier, we don't care */ static int xnb_ifmedia_upd(struct ifnet *ifp) { return (0); } /** * Callback used by the generic networking code to ask us what our carrier * state is. Since we don't have a physical carrier, this is very simple */ static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xnb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xnb_probe), DEVMETHOD(device_attach, xnb_attach), DEVMETHOD(device_detach, xnb_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xnb_suspend), DEVMETHOD(device_resume, xnb_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed), { 0, 0 } }; static driver_t xnb_driver = { "xnb", xnb_methods, sizeof(struct xnb_softc), }; devclass_t xnb_devclass; DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, xnb_devclass, 0, 0); /*-------------------------- Unit Tests -------------------------------------*/ #ifdef XNB_DEBUG #include "netback_unit_tests.c" #endif Index: head/sys/mips/nlm/dev/net/xlpge.c =================================================================== --- head/sys/mips/nlm/dev/net/xlpge.c (revision 357009) +++ head/sys/mips/nlm/dev/net/xlpge.c (revision 357010) @@ -1,1543 +1,1544 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2012 Broadcom Corporation * All Rights Reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define __RMAN_RESOURCE_VISIBLE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for DELAY */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #include "miibus_if.h" #include #include /*#define XLP_DRIVER_LOOPBACK*/ static struct nae_port_config nae_port_config[64]; int poe_cl_tbl[MAX_POE_CLASSES] = { 0x0, 0x249249, 0x492492, 0x6db6db, 0x924924, 0xb6db6d, 0xdb6db6, 0xffffff }; /* #define DUMP_PACKET */ static uint64_t nlm_paddr_ld(uint64_t paddr) { uint64_t xkaddr = 0x9800000000000000 | paddr; return (nlm_load_dword_daddr(xkaddr)); } struct nlm_xlp_portdata ifp_ports[64]; static uma_zone_t nl_tx_desc_zone; /* This implementation will register the following tree of device * registration: * pcibus * | * xlpnae (1 instance - virtual entity) * | * xlpge * (18 sgmii / 4 xaui / 2 interlaken instances) * | * miibus */ static int nlm_xlpnae_probe(device_t); static int nlm_xlpnae_attach(device_t); static int nlm_xlpnae_detach(device_t); static int nlm_xlpnae_suspend(device_t); static int nlm_xlpnae_resume(device_t); static int nlm_xlpnae_shutdown(device_t); static device_method_t nlm_xlpnae_methods[] = { /* Methods from the device interface */ DEVMETHOD(device_probe, nlm_xlpnae_probe), DEVMETHOD(device_attach, nlm_xlpnae_attach), DEVMETHOD(device_detach, nlm_xlpnae_detach), DEVMETHOD(device_suspend, nlm_xlpnae_suspend), DEVMETHOD(device_resume, nlm_xlpnae_resume), DEVMETHOD(device_shutdown, nlm_xlpnae_shutdown), DEVMETHOD(bus_driver_added, bus_generic_driver_added), DEVMETHOD_END }; static driver_t nlm_xlpnae_driver = { "xlpnae", nlm_xlpnae_methods, sizeof(struct nlm_xlpnae_softc) }; static devclass_t nlm_xlpnae_devclass; static int nlm_xlpge_probe(device_t); static int nlm_xlpge_attach(device_t); static int nlm_xlpge_detach(device_t); static int nlm_xlpge_suspend(device_t); static int nlm_xlpge_resume(device_t); static int nlm_xlpge_shutdown(device_t); /* mii override functions */ static int nlm_xlpge_mii_read(device_t, int, int); static int nlm_xlpge_mii_write(device_t, int, int, int); static void nlm_xlpge_mii_statchg(device_t); static device_method_t nlm_xlpge_methods[] = { /* Methods from the device interface */ DEVMETHOD(device_probe, nlm_xlpge_probe), DEVMETHOD(device_attach, nlm_xlpge_attach), DEVMETHOD(device_detach, nlm_xlpge_detach), DEVMETHOD(device_suspend, nlm_xlpge_suspend), DEVMETHOD(device_resume, nlm_xlpge_resume), DEVMETHOD(device_shutdown, nlm_xlpge_shutdown), /* Methods from the nexus bus needed for explicitly * probing children when driver is loaded as a kernel module */ DEVMETHOD(miibus_readreg, nlm_xlpge_mii_read), DEVMETHOD(miibus_writereg, nlm_xlpge_mii_write), DEVMETHOD(miibus_statchg, nlm_xlpge_mii_statchg), /* Terminate method list */ DEVMETHOD_END }; static driver_t nlm_xlpge_driver = { "xlpge", nlm_xlpge_methods, sizeof(struct nlm_xlpge_softc) }; static devclass_t nlm_xlpge_devclass; DRIVER_MODULE(xlpnae, pci, nlm_xlpnae_driver, nlm_xlpnae_devclass, 0, 0); DRIVER_MODULE(xlpge, xlpnae, nlm_xlpge_driver, nlm_xlpge_devclass, 0, 0); DRIVER_MODULE(miibus, xlpge, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(pci, xlpnae, 1, 1, 1); MODULE_DEPEND(xlpnae, xlpge, 1, 1, 1); MODULE_DEPEND(xlpge, ether, 1, 1, 1); MODULE_DEPEND(xlpge, miibus, 1, 1, 1); #define SGMII_RCV_CONTEXT_WIDTH 8 /* prototypes */ static void nlm_xlpge_msgring_handler(int vc, int size, int code, int srcid, struct nlm_fmn_msg *msg, void *data); static void nlm_xlpge_submit_rx_free_desc(struct nlm_xlpge_softc *sc, int num); static void nlm_xlpge_init(void *addr); static void nlm_xlpge_port_disable(struct nlm_xlpge_softc *sc); static void nlm_xlpge_port_enable(struct nlm_xlpge_softc *sc); /* globals */ int dbg_on = 1; int cntx2port[524]; static __inline void atomic_incr_long(unsigned long *addr) { atomic_add_long(addr, 1); } /* * xlpnae driver implementation */ static int nlm_xlpnae_probe(device_t dev) { if (pci_get_vendor(dev) != PCI_VENDOR_NETLOGIC || pci_get_device(dev) != PCI_DEVICE_ID_NLM_NAE) return (ENXIO); return (BUS_PROBE_DEFAULT); } static void nlm_xlpnae_print_frin_desc_carving(struct nlm_xlpnae_softc *sc) { int intf; uint32_t value; int start, size; /* XXXJC: use max_ports instead of 20 ? */ for (intf = 0; intf < 20; intf++) { nlm_write_nae_reg(sc->base, NAE_FREE_IN_FIFO_CFG, (0x80000000 | intf)); value = nlm_read_nae_reg(sc->base, NAE_FREE_IN_FIFO_CFG); size = 2 * ((value >> 20) & 0x3ff); start = 2 * ((value >> 8) & 0x1ff); } } static void nlm_config_egress(struct nlm_xlpnae_softc *sc, int nblock, int context_base, int hwport, int max_channels) { int offset, num_channels; uint32_t data; num_channels = sc->portcfg[hwport].num_channels; data = (2048 << 12) | (hwport << 4) | 1; nlm_write_nae_reg(sc->base, NAE_TX_IF_BURSTMAX_CMD, data); data = ((context_base + num_channels - 1) << 22) | (context_base << 12) | (hwport << 4) | 1; nlm_write_nae_reg(sc->base, NAE_TX_DDR_ACTVLIST_CMD, data); config_egress_fifo_carvings(sc->base, hwport, context_base, num_channels, max_channels, sc->portcfg); config_egress_fifo_credits(sc->base, hwport, context_base, num_channels, max_channels, sc->portcfg); data = nlm_read_nae_reg(sc->base, NAE_DMA_TX_CREDIT_TH); data |= (1 << 25) | (1 << 24); nlm_write_nae_reg(sc->base, NAE_DMA_TX_CREDIT_TH, data); for (offset = 0; offset < num_channels; offset++) { nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD1, NAE_DRR_QUANTA); data = (hwport << 15) | ((context_base + offset) << 5); if (sc->cmplx_type[nblock] == ILC) data |= (offset << 20); nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD0, data | 1); nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD0, data); } } static int xlpnae_get_maxchannels(struct nlm_xlpnae_softc *sc) { int maxchans = 0; int i; for (i = 0; i < sc->max_ports; i++) { if (sc->portcfg[i].type == UNKNOWN) continue; maxchans += sc->portcfg[i].num_channels; } return (maxchans); } static void nlm_setup_interface(struct nlm_xlpnae_softc *sc, int nblock, int port, uint32_t cur_flow_base, uint32_t flow_mask, int max_channels, int context) { uint64_t nae_base = sc->base; int mtu = 1536; /* XXXJC: don't hard code */ uint32_t ucore_mask; if (sc->cmplx_type[nblock] == XAUIC) nlm_config_xaui(nae_base, nblock, mtu, mtu, sc->portcfg[port].vlan_pri_en); nlm_config_freein_fifo_uniq_cfg(nae_base, port, sc->portcfg[port].free_desc_sizes); nlm_config_ucore_iface_mask_cfg(nae_base, port, sc->portcfg[port].ucore_mask); nlm_program_flow_cfg(nae_base, port, cur_flow_base, flow_mask); if (sc->cmplx_type[nblock] == SGMIIC) nlm_configure_sgmii_interface(nae_base, nblock, port, mtu, 0); nlm_config_egress(sc, nblock, context, port, max_channels); nlm_nae_init_netior(nae_base, sc->nblocks); nlm_nae_open_if(nae_base, nblock, sc->cmplx_type[nblock], port, sc->portcfg[port].free_desc_sizes); /* XXXJC: check mask calculation */ ucore_mask = (1 << sc->nucores) - 1; nlm_nae_init_ucore(nae_base, port, ucore_mask); } static void nlm_setup_interfaces(struct nlm_xlpnae_softc *sc) { uint64_t nae_base; uint32_t cur_slot, cur_slot_base; uint32_t cur_flow_base, port, flow_mask; int max_channels; int i, context; cur_slot = 0; cur_slot_base = 0; cur_flow_base = 0; nae_base = sc->base; flow_mask = nlm_get_flow_mask(sc->total_num_ports); /* calculate max_channels */ max_channels = xlpnae_get_maxchannels(sc); port = 0; context = 0; for (i = 0; i < sc->max_ports; i++) { if (sc->portcfg[i].type == UNKNOWN) continue; nlm_setup_interface(sc, sc->portcfg[i].block, i, cur_flow_base, flow_mask, max_channels, context); cur_flow_base += sc->per_port_num_flows; context += sc->portcfg[i].num_channels; } } static void nlm_xlpnae_init(int node, struct nlm_xlpnae_softc *sc) { uint64_t nae_base; uint32_t ucoremask = 0; uint32_t val; int i; nae_base = sc->base; nlm_nae_flush_free_fifo(nae_base, sc->nblocks); nlm_deflate_frin_fifo_carving(nae_base, sc->max_ports); nlm_reset_nae(node); for (i = 0; i < sc->nucores; i++) /* XXXJC: code repeated below */ ucoremask |= (0x1 << i); printf("Loading 0x%x ucores with microcode\n", ucoremask); nlm_ucore_load_all(nae_base, ucoremask, 1); val = nlm_set_device_frequency(node, DFS_DEVICE_NAE, sc->freq); printf("Setup NAE frequency to %dMHz\n", val); nlm_mdio_reset_all(nae_base); printf("Initialze SGMII PCS for blocks 0x%x\n", sc->sgmiimask); nlm_sgmii_pcs_init(nae_base, sc->sgmiimask); printf("Initialze XAUI PCS for blocks 0x%x\n", sc->xauimask); nlm_xaui_pcs_init(nae_base, sc->xauimask); /* clear NETIOR soft reset */ nlm_write_nae_reg(nae_base, NAE_LANE_CFG_SOFTRESET, 0x0); /* Disable RX enable bit in RX_CONFIG */ val = nlm_read_nae_reg(nae_base, NAE_RX_CONFIG); val &= 0xfffffffe; nlm_write_nae_reg(nae_base, NAE_RX_CONFIG, val); if (nlm_is_xlp8xx_ax() == 0) { val = nlm_read_nae_reg(nae_base, NAE_TX_CONFIG); val &= ~(1 << 3); nlm_write_nae_reg(nae_base, NAE_TX_CONFIG, val); } nlm_setup_poe_class_config(nae_base, MAX_POE_CLASSES, sc->ncontexts, poe_cl_tbl); nlm_setup_vfbid_mapping(nae_base); nlm_setup_flow_crc_poly(nae_base, sc->flow_crc_poly); nlm_setup_rx_cal_cfg(nae_base, sc->max_ports, sc->portcfg); /* note: xlp8xx Ax does not have Tx Calendering */ if (!nlm_is_xlp8xx_ax()) nlm_setup_tx_cal_cfg(nae_base, sc->max_ports, sc->portcfg); nlm_setup_interfaces(sc); nlm_config_poe(sc->poe_base, sc->poedv_base); if (sc->hw_parser_en) nlm_enable_hardware_parser(nae_base); if (sc->prepad_en) nlm_prepad_enable(nae_base, sc->prepad_size); if (sc->ieee_1588_en) nlm_setup_1588_timer(sc->base, sc->portcfg); } static void nlm_xlpnae_update_pde(void *dummy __unused) { struct nlm_xlpnae_softc *sc; uint32_t dv[NUM_WORDS_PER_DV]; device_t dev; int vec; dev = devclass_get_device(devclass_find("xlpnae"), 0); sc = device_get_softc(dev); nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 0); for (vec = 0; vec < NUM_DIST_VEC; vec++) { if (nlm_get_poe_distvec(vec, dv) != 0) continue; nlm_write_poe_distvec(sc->poedv_base, vec, dv); } nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 1); } SYSINIT(nlm_xlpnae_update_pde, SI_SUB_SMP, SI_ORDER_ANY, nlm_xlpnae_update_pde, NULL); /* configuration common for sgmii, xaui, ilaken goes here */ static void nlm_setup_portcfg(struct nlm_xlpnae_softc *sc, struct xlp_nae_ivars *naep, int block, int port) { int i; uint32_t ucore_mask = 0; struct xlp_block_ivars *bp; struct xlp_port_ivars *p; bp = &(naep->block_ivars[block]); p = &(bp->port_ivars[port & 0x3]); sc->portcfg[port].node = p->node; sc->portcfg[port].block = p->block; sc->portcfg[port].port = p->port; sc->portcfg[port].type = p->type; sc->portcfg[port].mdio_bus = p->mdio_bus; sc->portcfg[port].phy_addr = p->phy_addr; sc->portcfg[port].loopback_mode = p->loopback_mode; sc->portcfg[port].num_channels = p->num_channels; if (p->free_desc_sizes != MCLBYTES) { printf("[%d, %d] Error: free_desc_sizes %d != %d\n", block, port, p->free_desc_sizes, MCLBYTES); return; } sc->portcfg[port].free_desc_sizes = p->free_desc_sizes; for (i = 0; i < sc->nucores; i++) /* XXXJC: configure this */ ucore_mask |= (0x1 << i); sc->portcfg[port].ucore_mask = ucore_mask; sc->portcfg[port].vlan_pri_en = p->vlan_pri_en; sc->portcfg[port].num_free_descs = p->num_free_descs; sc->portcfg[port].iface_fifo_size = p->iface_fifo_size; sc->portcfg[port].rxbuf_size = p->rxbuf_size; sc->portcfg[port].rx_slots_reqd = p->rx_slots_reqd; sc->portcfg[port].tx_slots_reqd = p->tx_slots_reqd; sc->portcfg[port].pseq_fifo_size = p->pseq_fifo_size; sc->portcfg[port].stg2_fifo_size = p->stg2_fifo_size; sc->portcfg[port].eh_fifo_size = p->eh_fifo_size; sc->portcfg[port].frout_fifo_size = p->frout_fifo_size; sc->portcfg[port].ms_fifo_size = p->ms_fifo_size; sc->portcfg[port].pkt_fifo_size = p->pkt_fifo_size; sc->portcfg[port].pktlen_fifo_size = p->pktlen_fifo_size; sc->portcfg[port].max_stg2_offset = p->max_stg2_offset; sc->portcfg[port].max_eh_offset = p->max_eh_offset; sc->portcfg[port].max_frout_offset = p->max_frout_offset; sc->portcfg[port].max_ms_offset = p->max_ms_offset; sc->portcfg[port].max_pmem_offset = p->max_pmem_offset; sc->portcfg[port].stg1_2_credit = p->stg1_2_credit; sc->portcfg[port].stg2_eh_credit = p->stg2_eh_credit; sc->portcfg[port].stg2_frout_credit = p->stg2_frout_credit; sc->portcfg[port].stg2_ms_credit = p->stg2_ms_credit; sc->portcfg[port].ieee1588_inc_intg = p->ieee1588_inc_intg; sc->portcfg[port].ieee1588_inc_den = p->ieee1588_inc_den; sc->portcfg[port].ieee1588_inc_num = p->ieee1588_inc_num; sc->portcfg[port].ieee1588_userval = p->ieee1588_userval; sc->portcfg[port].ieee1588_ptpoff = p->ieee1588_ptpoff; sc->portcfg[port].ieee1588_tmr1 = p->ieee1588_tmr1; sc->portcfg[port].ieee1588_tmr2 = p->ieee1588_tmr2; sc->portcfg[port].ieee1588_tmr3 = p->ieee1588_tmr3; sc->total_free_desc += sc->portcfg[port].free_desc_sizes; sc->total_num_ports++; } static int nlm_xlpnae_attach(device_t dev) { struct xlp_nae_ivars *nae_ivars; struct nlm_xlpnae_softc *sc; device_t tmpd; uint32_t dv[NUM_WORDS_PER_DV]; int port, i, j, nchan, nblock, node, qstart, qnum; int offset, context, txq_base, rxvcbase; uint64_t poe_pcibase, nae_pcibase; node = pci_get_slot(dev) / 8; nae_ivars = &xlp_board_info.nodes[node].nae_ivars; sc = device_get_softc(dev); sc->xlpnae_dev = dev; sc->node = nae_ivars->node; sc->base = nlm_get_nae_regbase(sc->node); sc->poe_base = nlm_get_poe_regbase(sc->node); sc->poedv_base = nlm_get_poedv_regbase(sc->node); sc->portcfg = nae_port_config; sc->blockmask = nae_ivars->blockmask; sc->ilmask = nae_ivars->ilmask; sc->xauimask = nae_ivars->xauimask; sc->sgmiimask = nae_ivars->sgmiimask; sc->nblocks = nae_ivars->nblocks; sc->freq = nae_ivars->freq; /* flow table generation is done by CRC16 polynomial */ sc->flow_crc_poly = nae_ivars->flow_crc_poly; sc->hw_parser_en = nae_ivars->hw_parser_en; sc->prepad_en = nae_ivars->prepad_en; sc->prepad_size = nae_ivars->prepad_size; sc->ieee_1588_en = nae_ivars->ieee_1588_en; nae_pcibase = nlm_get_nae_pcibase(sc->node); sc->ncontexts = nlm_read_reg(nae_pcibase, XLP_PCI_DEVINFO_REG5); sc->nucores = nlm_num_uengines(nae_pcibase); for (nblock = 0; nblock < sc->nblocks; nblock++) { sc->cmplx_type[nblock] = nae_ivars->block_ivars[nblock].type; sc->portmask[nblock] = nae_ivars->block_ivars[nblock].portmask; } for (i = 0; i < sc->ncontexts; i++) cntx2port[i] = 18; /* 18 is an invalid port */ if (sc->nblocks == 5) sc->max_ports = 18; /* 8xx has a block 4 with 2 ports */ else sc->max_ports = sc->nblocks * PORTS_PER_CMPLX; for (i = 0; i < sc->max_ports; i++) sc->portcfg[i].type = UNKNOWN; /* Port Not Present */ /* * Now setup all internal fifo carvings based on * total number of ports in the system */ sc->total_free_desc = 0; sc->total_num_ports = 0; port = 0; context = 0; txq_base = nlm_qidstart(nae_pcibase); rxvcbase = txq_base + sc->ncontexts; for (i = 0; i < sc->nblocks; i++) { uint32_t portmask; if ((nae_ivars->blockmask & (1 << i)) == 0) { port += 4; continue; } portmask = nae_ivars->block_ivars[i].portmask; for (j = 0; j < PORTS_PER_CMPLX; j++, port++) { if ((portmask & (1 << j)) == 0) continue; nlm_setup_portcfg(sc, nae_ivars, i, port); nchan = sc->portcfg[port].num_channels; for (offset = 0; offset < nchan; offset++) cntx2port[context + offset] = port; sc->portcfg[port].txq = txq_base + context; sc->portcfg[port].rxfreeq = rxvcbase + port; context += nchan; } } poe_pcibase = nlm_get_poe_pcibase(sc->node); sc->per_port_num_flows = nlm_poe_max_flows(poe_pcibase) / sc->total_num_ports; /* zone for P2P descriptors */ nl_tx_desc_zone = uma_zcreate("NL Tx Desc", sizeof(struct xlpge_tx_desc), NULL, NULL, NULL, NULL, NAE_CACHELINE_SIZE, 0); /* NAE FMN messages have CMS src station id's in the * range of qstart to qnum. */ qstart = nlm_qidstart(nae_pcibase); qnum = nlm_qnum(nae_pcibase); if (register_msgring_handler(qstart, qstart + qnum - 1, nlm_xlpge_msgring_handler, sc)) { panic("Couldn't register NAE msgring handler\n"); } /* POE FMN messages have CMS src station id's in the * range of qstart to qnum. */ qstart = nlm_qidstart(poe_pcibase); qnum = nlm_qnum(poe_pcibase); if (register_msgring_handler(qstart, qstart + qnum - 1, nlm_xlpge_msgring_handler, sc)) { panic("Couldn't register POE msgring handler\n"); } nlm_xlpnae_init(node, sc); for (i = 0; i < sc->max_ports; i++) { char desc[32]; int block, port; if (sc->portcfg[i].type == UNKNOWN) continue; block = sc->portcfg[i].block; port = sc->portcfg[i].port; tmpd = device_add_child(dev, "xlpge", i); device_set_ivars(tmpd, &(nae_ivars->block_ivars[block].port_ivars[port])); sprintf(desc, "XLP NAE Port %d,%d", block, port); device_set_desc_copy(tmpd, desc); } nlm_setup_iface_fifo_cfg(sc->base, sc->max_ports, sc->portcfg); nlm_setup_rx_base_config(sc->base, sc->max_ports, sc->portcfg); nlm_setup_rx_buf_config(sc->base, sc->max_ports, sc->portcfg); nlm_setup_freein_fifo_cfg(sc->base, sc->portcfg); nlm_program_nae_parser_seq_fifo(sc->base, sc->max_ports, sc->portcfg); nlm_xlpnae_print_frin_desc_carving(sc); bus_generic_probe(dev); bus_generic_attach(dev); /* * Enable only boot cpu at this point, full distribution comes * only after SMP is started */ nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 0); nlm_calc_poe_distvec(0x1, 0, 0, 0, 0x1 << XLPGE_RX_VC, dv); nlm_write_poe_distvec(sc->poedv_base, 0, dv); nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 1); return (0); } static int nlm_xlpnae_detach(device_t dev) { /* TODO - free zone here */ return (0); } static int nlm_xlpnae_suspend(device_t dev) { return (0); } static int nlm_xlpnae_resume(device_t dev) { return (0); } static int nlm_xlpnae_shutdown(device_t dev) { return (0); } /* * xlpge driver implementation */ static void nlm_xlpge_mac_set_rx_mode(struct nlm_xlpge_softc *sc) { if (sc->if_flags & IFF_PROMISC) { if (sc->type == SGMIIC) nlm_nae_setup_rx_mode_sgmii(sc->base_addr, sc->block, sc->port, sc->type, 1 /* broadcast */, 1/* multicast */, 0 /* pause */, 1 /* promisc */); else nlm_nae_setup_rx_mode_xaui(sc->base_addr, sc->block, sc->port, sc->type, 1 /* broadcast */, 1/* multicast */, 0 /* pause */, 1 /* promisc */); } else { if (sc->type == SGMIIC) nlm_nae_setup_rx_mode_sgmii(sc->base_addr, sc->block, sc->port, sc->type, 1 /* broadcast */, 1/* multicast */, 0 /* pause */, 0 /* promisc */); else nlm_nae_setup_rx_mode_xaui(sc->base_addr, sc->block, sc->port, sc->type, 1 /* broadcast */, 1/* multicast */, 0 /* pause */, 0 /* promisc */); } } static int nlm_xlpge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct mii_data *mii; struct nlm_xlpge_softc *sc; struct ifreq *ifr; int error; sc = ifp->if_softc; error = 0; ifr = (struct ifreq *)data; switch (command) { case SIOCSIFFLAGS: XLPGE_LOCK(sc); sc->if_flags = ifp->if_flags; if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) nlm_xlpge_init(sc); else nlm_xlpge_port_enable(sc); nlm_xlpge_mac_set_rx_mode(sc); sc->link = NLM_LINK_UP; } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) nlm_xlpge_port_disable(sc); sc->link = NLM_LINK_DOWN; } XLPGE_UNLOCK(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: if (sc->mii_bus != NULL) { mii = device_get_softc(sc->mii_bus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int xlpge_tx(struct ifnet *ifp, struct mbuf *mbuf_chain) { struct nlm_fmn_msg msg; struct xlpge_tx_desc *p2p; struct nlm_xlpge_softc *sc; struct mbuf *m; vm_paddr_t paddr; int fbid, dst, pos, err; int ret = 0, tx_msgstatus, retries; err = 0; if (mbuf_chain == NULL) return (0); sc = ifp->if_softc; p2p = NULL; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING) || ifp->if_drv_flags & IFF_DRV_OACTIVE) { err = ENXIO; goto fail; } /* free a few in coming messages on the fb vc */ xlp_handle_msg_vc(1 << XLPGE_FB_VC, 2); /* vfb id table is setup to map cpu to vc 3 of the cpu */ fbid = nlm_cpuid(); dst = sc->txq; pos = 0; p2p = uma_zalloc(nl_tx_desc_zone, M_NOWAIT); if (p2p == NULL) { printf("alloc fail\n"); err = ENOBUFS; goto fail; } for (m = mbuf_chain; m != NULL; m = m->m_next) { vm_offset_t buf = (vm_offset_t) m->m_data; int len = m->m_len; int frag_sz; uint64_t desc; /*printf("m_data = %p len %d\n", m->m_data, len); */ while (len) { if (pos == XLP_NTXFRAGS - 3) { device_printf(sc->xlpge_dev, "packet defrag %d\n", m_length(mbuf_chain, NULL)); err = ENOBUFS; /* TODO fix error */ goto fail; } paddr = vtophys(buf); frag_sz = PAGE_SIZE - (buf & PAGE_MASK); if (len < frag_sz) frag_sz = len; desc = nae_tx_desc(P2D_NEOP, 0, 127, frag_sz, paddr); p2p->frag[pos] = htobe64(desc); pos++; len -= frag_sz; buf += frag_sz; } } KASSERT(pos != 0, ("Zero-length mbuf chain?\n")); /* Make the last one P2D EOP */ p2p->frag[pos-1] |= htobe64((uint64_t)P2D_EOP << 62); /* stash useful pointers in the desc */ p2p->frag[XLP_NTXFRAGS-3] = 0xf00bad; p2p->frag[XLP_NTXFRAGS-2] = (uintptr_t)p2p; p2p->frag[XLP_NTXFRAGS-1] = (uintptr_t)mbuf_chain; paddr = vtophys(p2p); msg.msg[0] = nae_tx_desc(P2P, 0, fbid, pos, paddr); for (retries = 16; retries > 0; retries--) { ret = nlm_fmn_msgsend(dst, 1, FMN_SWCODE_NAE, &msg); if (ret == 0) return (0); } fail: if (ret != 0) { tx_msgstatus = nlm_read_c2_txmsgstatus(); if ((tx_msgstatus >> 24) & 0x1) device_printf(sc->xlpge_dev, "Transmit queue full - "); if ((tx_msgstatus >> 3) & 0x1) device_printf(sc->xlpge_dev, "ECC error - "); if ((tx_msgstatus >> 2) & 0x1) device_printf(sc->xlpge_dev, "Pending Sync - "); if ((tx_msgstatus >> 1) & 0x1) device_printf(sc->xlpge_dev, "Insufficient input queue credits - "); if (tx_msgstatus & 0x1) device_printf(sc->xlpge_dev, "Insufficient output queue credits - "); } device_printf(sc->xlpge_dev, "Send failed! err = %d\n", err); if (p2p) uma_zfree(nl_tx_desc_zone, p2p); m_freem(mbuf_chain); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (err); } static int nlm_xlpge_gmac_config_speed(struct nlm_xlpge_softc *sc) { struct mii_data *mii; if (sc->type == XAUIC || sc->type == ILC) return (0); if (sc->mii_bus) { mii = device_get_softc(sc->mii_bus); mii_pollstat(mii); } return (0); } static void nlm_xlpge_port_disable(struct nlm_xlpge_softc *sc) { struct ifnet *ifp; ifp = sc->xlpge_if; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; callout_stop(&sc->xlpge_callout); nlm_mac_disable(sc->base_addr, sc->block, sc->type, sc->port); } static void nlm_mii_pollstat(void *arg) { struct nlm_xlpge_softc *sc = (struct nlm_xlpge_softc *)arg; struct mii_data *mii = NULL; if (sc->mii_bus) { mii = device_get_softc(sc->mii_bus); KASSERT(mii != NULL, ("mii ptr is NULL")); mii_pollstat(mii); callout_reset(&sc->xlpge_callout, hz, nlm_mii_pollstat, sc); } } static void nlm_xlpge_port_enable(struct nlm_xlpge_softc *sc) { if ((sc->type != SGMIIC) && (sc->type != XAUIC)) return; nlm_mac_enable(sc->base_addr, sc->block, sc->type, sc->port); nlm_mii_pollstat((void *)sc); } static void nlm_xlpge_init(void *addr) { struct nlm_xlpge_softc *sc; struct ifnet *ifp; struct mii_data *mii = NULL; sc = (struct nlm_xlpge_softc *)addr; ifp = sc->xlpge_if; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; if (sc->mii_bus) { mii = device_get_softc(sc->mii_bus); mii_mediachg(mii); } nlm_xlpge_gmac_config_speed(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; nlm_xlpge_port_enable(sc); /* start the callout */ callout_reset(&sc->xlpge_callout, hz, nlm_mii_pollstat, sc); } /* * Read the MAC address from FDT or board eeprom. */ static void xlpge_read_mac_addr(struct nlm_xlpge_softc *sc) { xlpge_get_macaddr(sc->dev_addr); /* last octet is port specific */ sc->dev_addr[5] += (sc->block * 4) + sc->port; if (sc->type == SGMIIC) nlm_nae_setup_mac_addr_sgmii(sc->base_addr, sc->block, sc->port, sc->type, sc->dev_addr); else if (sc->type == XAUIC) nlm_nae_setup_mac_addr_xaui(sc->base_addr, sc->block, sc->port, sc->type, sc->dev_addr); } static int xlpge_mediachange(struct ifnet *ifp) { return (0); } static void xlpge_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct nlm_xlpge_softc *sc; struct mii_data *md; md = NULL; sc = ifp->if_softc; if (sc->mii_bus) md = device_get_softc(sc->mii_bus); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->link == NLM_LINK_DOWN) return; if (md != NULL) ifmr->ifm_active = md->mii_media.ifm_cur->ifm_media; ifmr->ifm_status |= IFM_ACTIVE; } static int nlm_xlpge_ifinit(struct nlm_xlpge_softc *sc) { struct ifnet *ifp; device_t dev; int port = sc->block * 4 + sc->port; dev = sc->xlpge_dev; ifp = sc->xlpge_if = if_alloc(IFT_ETHER); /*(sc->network_sc)->ifp_ports[port].xlpge_if = ifp;*/ ifp_ports[port].xlpge_if = ifp; if (ifp == NULL) { device_printf(dev, "cannot if_alloc()\n"); return (ENOSPC); } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSEPOCH; sc->if_flags = ifp->if_flags; /*ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_VLAN_HWTAGGING;*/ ifp->if_capabilities = 0; ifp->if_capenable = ifp->if_capabilities; ifp->if_ioctl = nlm_xlpge_ioctl; ifp->if_init = nlm_xlpge_init ; ifp->if_hwassist = 0; ifp->if_snd.ifq_drv_maxlen = NLM_XLPGE_TXQ_SIZE; /* TODO: make this a sysint */ IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); ifmedia_init(&sc->xlpge_mii.mii_media, 0, xlpge_mediachange, xlpge_mediastatus); ifmedia_add(&sc->xlpge_mii.mii_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->xlpge_mii.mii_media, IFM_ETHER | IFM_AUTO); sc->xlpge_mii.mii_media.ifm_media = sc->xlpge_mii.mii_media.ifm_cur->ifm_media; xlpge_read_mac_addr(sc); ether_ifattach(ifp, sc->dev_addr); /* override if_transmit : per ifnet(9), do it after if_attach */ ifp->if_transmit = xlpge_tx; return (0); } static int nlm_xlpge_probe(device_t dev) { return (BUS_PROBE_DEFAULT); } static void * get_buf(void) { struct mbuf *m_new; uint64_t *md; #ifdef INVARIANTS vm_paddr_t temp1, temp2; #endif if ((m_new = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR)) == NULL) return (NULL); m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; KASSERT(((uintptr_t)m_new->m_data & (NAE_CACHELINE_SIZE - 1)) == 0, ("m_new->m_data is not cacheline aligned")); md = (uint64_t *)m_new->m_data; md[0] = (intptr_t)m_new; /* Back Ptr */ md[1] = 0xf00bad; m_adj(m_new, NAE_CACHELINE_SIZE); #ifdef INVARIANTS temp1 = vtophys((vm_offset_t) m_new->m_data); temp2 = vtophys((vm_offset_t) m_new->m_data + 1536); KASSERT((temp1 + 1536) == temp2, ("Alloced buffer is not contiguous")); #endif return ((void *)m_new->m_data); } static void nlm_xlpge_mii_init(device_t dev, struct nlm_xlpge_softc *sc) { int error; error = mii_attach(dev, &sc->mii_bus, sc->xlpge_if, xlpge_mediachange, xlpge_mediastatus, BMSR_DEFCAPMASK, sc->phy_addr, MII_OFFSET_ANY, 0); if (error) { device_printf(dev, "attaching PHYs failed\n"); sc->mii_bus = NULL; } if (sc->mii_bus != NULL) { /* enable MDIO interrupts in the PHY */ /* XXXJC: TODO */ } } static int xlpge_stats_sysctl(SYSCTL_HANDLER_ARGS) { struct nlm_xlpge_softc *sc; uint32_t val; int reg, field; sc = arg1; field = arg2; reg = SGMII_STATS_MLR(sc->block, sc->port) + field; val = nlm_read_nae_reg(sc->base_addr, reg); return (sysctl_handle_int(oidp, &val, 0, req)); } static void nlm_xlpge_setup_stats_sysctl(device_t dev, struct nlm_xlpge_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child; struct sysctl_oid *tree; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); #define XLPGE_STAT(name, offset, desc) \ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, name, \ CTLTYPE_UINT | CTLFLAG_RD, sc, offset, \ xlpge_stats_sysctl, "IU", desc) XLPGE_STAT("tr127", nlm_sgmii_stats_tr127, "TxRx 64 - 127 Bytes"); XLPGE_STAT("tr255", nlm_sgmii_stats_tr255, "TxRx 128 - 255 Bytes"); XLPGE_STAT("tr511", nlm_sgmii_stats_tr511, "TxRx 256 - 511 Bytes"); XLPGE_STAT("tr1k", nlm_sgmii_stats_tr1k, "TxRx 512 - 1023 Bytes"); XLPGE_STAT("trmax", nlm_sgmii_stats_trmax, "TxRx 1024 - 1518 Bytes"); XLPGE_STAT("trmgv", nlm_sgmii_stats_trmgv, "TxRx 1519 - 1522 Bytes"); XLPGE_STAT("rbyt", nlm_sgmii_stats_rbyt, "Rx Bytes"); XLPGE_STAT("rpkt", nlm_sgmii_stats_rpkt, "Rx Packets"); XLPGE_STAT("rfcs", nlm_sgmii_stats_rfcs, "Rx FCS Error"); XLPGE_STAT("rmca", nlm_sgmii_stats_rmca, "Rx Multicast Packets"); XLPGE_STAT("rbca", nlm_sgmii_stats_rbca, "Rx Broadcast Packets"); XLPGE_STAT("rxcf", nlm_sgmii_stats_rxcf, "Rx Control Frames"); XLPGE_STAT("rxpf", nlm_sgmii_stats_rxpf, "Rx Pause Frames"); XLPGE_STAT("rxuo", nlm_sgmii_stats_rxuo, "Rx Unknown Opcode"); XLPGE_STAT("raln", nlm_sgmii_stats_raln, "Rx Alignment Errors"); XLPGE_STAT("rflr", nlm_sgmii_stats_rflr, "Rx Framelength Errors"); XLPGE_STAT("rcde", nlm_sgmii_stats_rcde, "Rx Code Errors"); XLPGE_STAT("rcse", nlm_sgmii_stats_rcse, "Rx Carrier Sense Errors"); XLPGE_STAT("rund", nlm_sgmii_stats_rund, "Rx Undersize Packet Errors"); XLPGE_STAT("rovr", nlm_sgmii_stats_rovr, "Rx Oversize Packet Errors"); XLPGE_STAT("rfrg", nlm_sgmii_stats_rfrg, "Rx Fragments"); XLPGE_STAT("rjbr", nlm_sgmii_stats_rjbr, "Rx Jabber"); XLPGE_STAT("tbyt", nlm_sgmii_stats_tbyt, "Tx Bytes"); XLPGE_STAT("tpkt", nlm_sgmii_stats_tpkt, "Tx Packets"); XLPGE_STAT("tmca", nlm_sgmii_stats_tmca, "Tx Multicast Packets"); XLPGE_STAT("tbca", nlm_sgmii_stats_tbca, "Tx Broadcast Packets"); XLPGE_STAT("txpf", nlm_sgmii_stats_txpf, "Tx Pause Frame"); XLPGE_STAT("tdfr", nlm_sgmii_stats_tdfr, "Tx Deferral Packets"); XLPGE_STAT("tedf", nlm_sgmii_stats_tedf, "Tx Excessive Deferral Pkts"); XLPGE_STAT("tscl", nlm_sgmii_stats_tscl, "Tx Single Collisions"); XLPGE_STAT("tmcl", nlm_sgmii_stats_tmcl, "Tx Multiple Collisions"); XLPGE_STAT("tlcl", nlm_sgmii_stats_tlcl, "Tx Late Collision Pkts"); XLPGE_STAT("txcl", nlm_sgmii_stats_txcl, "Tx Excessive Collisions"); XLPGE_STAT("tncl", nlm_sgmii_stats_tncl, "Tx Total Collisions"); XLPGE_STAT("tjbr", nlm_sgmii_stats_tjbr, "Tx Jabber Frames"); XLPGE_STAT("tfcs", nlm_sgmii_stats_tfcs, "Tx FCS Errors"); XLPGE_STAT("txcf", nlm_sgmii_stats_txcf, "Tx Control Frames"); XLPGE_STAT("tovr", nlm_sgmii_stats_tovr, "Tx Oversize Frames"); XLPGE_STAT("tund", nlm_sgmii_stats_tund, "Tx Undersize Frames"); XLPGE_STAT("tfrg", nlm_sgmii_stats_tfrg, "Tx Fragments"); #undef XLPGE_STAT } static int nlm_xlpge_attach(device_t dev) { struct xlp_port_ivars *pv; struct nlm_xlpge_softc *sc; int port; pv = device_get_ivars(dev); sc = device_get_softc(dev); sc->xlpge_dev = dev; sc->mii_bus = NULL; sc->block = pv->block; sc->node = pv->node; sc->port = pv->port; sc->type = pv->type; sc->xlpge_if = NULL; sc->phy_addr = pv->phy_addr; sc->mdio_bus = pv->mdio_bus; sc->portcfg = nae_port_config; sc->hw_parser_en = pv->hw_parser_en; /* default settings */ sc->speed = NLM_SGMII_SPEED_10; sc->duplexity = NLM_SGMII_DUPLEX_FULL; sc->link = NLM_LINK_DOWN; sc->flowctrl = NLM_FLOWCTRL_DISABLED; sc->network_sc = device_get_softc(device_get_parent(dev)); sc->base_addr = sc->network_sc->base; sc->prepad_en = sc->network_sc->prepad_en; sc->prepad_size = sc->network_sc->prepad_size; callout_init(&sc->xlpge_callout, 1); XLPGE_LOCK_INIT(sc, device_get_nameunit(dev)); port = (sc->block*4)+sc->port; sc->nfree_desc = nae_port_config[port].num_free_descs; sc->txq = nae_port_config[port].txq; sc->rxfreeq = nae_port_config[port].rxfreeq; nlm_xlpge_submit_rx_free_desc(sc, sc->nfree_desc); if (sc->hw_parser_en) nlm_enable_hardware_parser_per_port(sc->base_addr, sc->block, sc->port); nlm_xlpge_ifinit(sc); ifp_ports[port].xlpge_sc = sc; nlm_xlpge_mii_init(dev, sc); nlm_xlpge_setup_stats_sysctl(dev, sc); return (0); } static int nlm_xlpge_detach(device_t dev) { return (0); } static int nlm_xlpge_suspend(device_t dev) { return (0); } static int nlm_xlpge_resume(device_t dev) { return (0); } static int nlm_xlpge_shutdown(device_t dev) { return (0); } /* * miibus function with custom implementation */ static int nlm_xlpge_mii_read(device_t dev, int phyaddr, int regidx) { struct nlm_xlpge_softc *sc; int val; sc = device_get_softc(dev); if (sc->type == SGMIIC) val = nlm_gmac_mdio_read(sc->base_addr, sc->mdio_bus, BLOCK_7, LANE_CFG, phyaddr, regidx); else val = 0xffff; return (val); } static int nlm_xlpge_mii_write(device_t dev, int phyaddr, int regidx, int val) { struct nlm_xlpge_softc *sc; sc = device_get_softc(dev); if (sc->type == SGMIIC) nlm_gmac_mdio_write(sc->base_addr, sc->mdio_bus, BLOCK_7, LANE_CFG, phyaddr, regidx, val); return (0); } static void nlm_xlpge_mii_statchg(device_t dev) { struct nlm_xlpge_softc *sc; struct mii_data *mii; char *speed, *duplexity; sc = device_get_softc(dev); if (sc->mii_bus == NULL) return; mii = device_get_softc(sc->mii_bus); if (mii->mii_media_status & IFM_ACTIVE) { if (IFM_SUBTYPE(mii->mii_media_active) == IFM_10_T) { sc->speed = NLM_SGMII_SPEED_10; speed = "10Mbps"; } else if (IFM_SUBTYPE(mii->mii_media_active) == IFM_100_TX) { sc->speed = NLM_SGMII_SPEED_100; speed = "100Mbps"; } else { /* default to 1G */ sc->speed = NLM_SGMII_SPEED_1000; speed = "1Gbps"; } if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) { sc->duplexity = NLM_SGMII_DUPLEX_FULL; duplexity = "full"; } else { sc->duplexity = NLM_SGMII_DUPLEX_HALF; duplexity = "half"; } printf("Port [%d, %d] setup with speed=%s duplex=%s\n", sc->block, sc->port, speed, duplexity); nlm_nae_setup_mac(sc->base_addr, sc->block, sc->port, 0, 1, 1, sc->speed, sc->duplexity); } } /* * xlpge support function implementations */ static void nlm_xlpge_release_mbuf(uint64_t paddr) { uint64_t mag, desc, mbuf; paddr += (XLP_NTXFRAGS - 3) * sizeof(uint64_t); mag = nlm_paddr_ld(paddr); desc = nlm_paddr_ld(paddr + sizeof(uint64_t)); mbuf = nlm_paddr_ld(paddr + 2 * sizeof(uint64_t)); if (mag != 0xf00bad) { /* somebody else packet Error - FIXME in intialization */ printf("cpu %d: ERR Tx packet paddr %jx, mag %jx, desc %jx mbuf %jx\n", nlm_cpuid(), (uintmax_t)paddr, (uintmax_t)mag, (intmax_t)desc, (uintmax_t)mbuf); return; } m_freem((struct mbuf *)(uintptr_t)mbuf); uma_zfree(nl_tx_desc_zone, (void *)(uintptr_t)desc); } static void nlm_xlpge_rx(struct nlm_xlpge_softc *sc, int port, vm_paddr_t paddr, int len) { struct ifnet *ifp; struct mbuf *m; vm_offset_t temp; unsigned long mag; int prepad_size; ifp = sc->xlpge_if; temp = nlm_paddr_ld(paddr - NAE_CACHELINE_SIZE); mag = nlm_paddr_ld(paddr - NAE_CACHELINE_SIZE + sizeof(uint64_t)); m = (struct mbuf *)(intptr_t)temp; if (mag != 0xf00bad) { /* somebody else packet Error - FIXME in intialization */ printf("cpu %d: ERR Rx packet paddr %jx, temp %p, mag %lx\n", nlm_cpuid(), (uintmax_t)paddr, (void *)temp, mag); return; } m->m_pkthdr.rcvif = ifp; #ifdef DUMP_PACKET { int i = 0, j = 64; unsigned char *buf = (char *)m->m_data; printf("(cpu_%d: nlge_rx, !RX_COPY) Rx Packet: length=%d\n", nlm_cpuid(), len); if (len < j) j = len; if (sc->prepad_en) j += ((sc->prepad_size + 1) * 16); for (i = 0; i < j; i++) { if (i && (i % 16) == 0) printf("\n"); printf("%02x ", buf[i]); } printf("\n"); } #endif if (sc->prepad_en) { prepad_size = ((sc->prepad_size + 1) * 16); m->m_data += prepad_size; m->m_pkthdr.len = m->m_len = (len - prepad_size); } else m->m_pkthdr.len = m->m_len = len; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); #ifdef XLP_DRIVER_LOOPBACK if (port == 16 || port == 17) (*ifp->if_input)(ifp, m); else xlpge_tx(ifp, m); #else (*ifp->if_input)(ifp, m); #endif } void nlm_xlpge_submit_rx_free_desc(struct nlm_xlpge_softc *sc, int num) { int i, size, ret, n; struct nlm_fmn_msg msg; void *ptr; for(i = 0; i < num; i++) { memset(&msg, 0, sizeof(msg)); ptr = get_buf(); if (!ptr) { device_printf(sc->xlpge_dev, "Cannot allocate mbuf\n"); break; } msg.msg[0] = vtophys(ptr); if (msg.msg[0] == 0) { printf("Bad ptr for %p\n", ptr); break; } size = 1; n = 0; while (1) { /* on success returns 1, else 0 */ ret = nlm_fmn_msgsend(sc->rxfreeq, size, 0, &msg); if (ret == 0) break; if (n++ > 10000) { printf("Too many credit fails for send free desc\n"); break; } } } } void nlm_xlpge_msgring_handler(int vc, int size, int code, int src_id, struct nlm_fmn_msg *msg, void *data) { uint64_t phys_addr; struct nlm_xlpnae_softc *sc; struct nlm_xlpge_softc *xlpge_sc; struct ifnet *ifp; uint32_t context; uint32_t port = 0; uint32_t length; sc = (struct nlm_xlpnae_softc *)data; KASSERT(sc != NULL, ("Null sc in msgring handler")); if (size == 1) { /* process transmit complete */ phys_addr = msg->msg[0] & 0xffffffffffULL; /* context is SGMII_RCV_CONTEXT_NUM + three bit vlan type * or vlan priority */ context = (msg->msg[0] >> 40) & 0x3fff; port = cntx2port[context]; if (port >= XLP_MAX_PORTS) { printf("%s:%d Bad port %d (context=%d)\n", __func__, __LINE__, port, context); return; } ifp = ifp_ports[port].xlpge_if; xlpge_sc = ifp_ports[port].xlpge_sc; nlm_xlpge_release_mbuf(phys_addr); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } else if (size > 1) { /* Recieve packet */ phys_addr = msg->msg[1] & 0xffffffffc0ULL; length = (msg->msg[1] >> 40) & 0x3fff; length -= MAC_CRC_LEN; /* context is SGMII_RCV_CONTEXT_NUM + three bit vlan type * or vlan priority */ context = (msg->msg[1] >> 54) & 0x3ff; port = cntx2port[context]; if (port >= XLP_MAX_PORTS) { printf("%s:%d Bad port %d (context=%d)\n", __func__, __LINE__, port, context); return; } ifp = ifp_ports[port].xlpge_if; xlpge_sc = ifp_ports[port].xlpge_sc; nlm_xlpge_rx(xlpge_sc, port, phys_addr, length); /* return back a free descriptor to NA */ nlm_xlpge_submit_rx_free_desc(xlpge_sc, 1); } } Index: head/sys/net/if.h =================================================================== --- head/sys/net/if.h (revision 357009) +++ head/sys/net/if.h (revision 357010) @@ -1,620 +1,620 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #include #if __BSD_VISIBLE /* * does not depend on on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) * The same holds for . (struct sockaddr ifru_addr) */ #ifndef _KERNEL #include #include #endif #endif /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 #if __BSD_VISIBLE #define IFNAMSIZ IF_NAMESIZE #define IF_MAXUNIT 0x7fff /* historical value */ #endif #if __BSD_VISIBLE /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure describing information about an interface * which may be of interest to management entities. */ struct if_data { /* generic interface information */ uint8_t ifi_type; /* ethernet, tokenring, etc */ uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ uint8_t ifi_addrlen; /* media address length */ uint8_t ifi_hdrlen; /* media header length */ uint8_t ifi_link_state; /* current link state */ uint8_t ifi_vhid; /* carp vhid */ uint16_t ifi_datalen; /* length of this data struct */ uint32_t ifi_mtu; /* maximum transmission unit */ uint32_t ifi_metric; /* routing metric (external only) */ uint64_t ifi_baudrate; /* linespeed */ /* volatile statistics */ uint64_t ifi_ipackets; /* packets received on interface */ uint64_t ifi_ierrors; /* input errors on interface */ uint64_t ifi_opackets; /* packets sent on interface */ uint64_t ifi_oerrors; /* output errors on interface */ uint64_t ifi_collisions; /* collisions on csma interfaces */ uint64_t ifi_ibytes; /* total number of octets received */ uint64_t ifi_obytes; /* total number of octets sent */ uint64_t ifi_imcasts; /* packets received via multicast */ uint64_t ifi_omcasts; /* packets sent via multicast */ uint64_t ifi_iqdrops; /* dropped on input */ uint64_t ifi_oqdrops; /* dropped on output */ uint64_t ifi_noproto; /* destined for unsupported protocol */ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */ /* Unions are here to make sizes MI. */ union { /* uptime at attach or stat reset */ time_t tt; uint64_t ph; } __ifi_epoch; #define ifi_epoch __ifi_epoch.tt union { /* time of last administrative change */ struct timeval tv; struct { uint64_t ph1; uint64_t ph2; } ph; } __ifi_lastchange; #define ifi_lastchange __ifi_lastchange.tv }; /*- * Interface flags are of two types: network stack owned flags, and driver * owned flags. Historically, these values were stored in the same ifnet * flags field, but with the advent of fine-grained locking, they have been * broken out such that the network stack is responsible for synchronizing * the stack-owned fields, and the device driver the device-owned fields. * Both halves can perform lockless reads of the other half's field, subject * to accepting the involved races. * * Both sets of flags come from the same number space, and should not be * permitted to conflict, as they are exposed to user space via a single * field. * * The following symbols identify read and write requirements for fields: * * (i) if_flags field set by device driver before attach, read-only there * after. * (n) if_flags field written only by the network stack, read by either the * stack or driver. * (d) if_drv_flags field written only by the device driver, read by either * the stack or driver. */ #define IFF_UP 0x1 /* (n) interface is up */ #define IFF_BROADCAST 0x2 /* (i) broadcast address valid */ #define IFF_DEBUG 0x4 /* (n) turn on debugging */ #define IFF_LOOPBACK 0x8 /* (i) is a loopback net */ #define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */ -/* 0x20 was IFF_SMART */ +#define IFF_NEEDSEPOCH 0x20 /* (i) calls if_input w/o epoch */ #define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */ #define IFF_NOARP 0x80 /* (n) no address resolution protocol */ #define IFF_PROMISC 0x100 /* (n) receive all packets */ #define IFF_ALLMULTI 0x200 /* (n) receive all multicast packets */ #define IFF_DRV_OACTIVE 0x400 /* (d) tx hardware queue is full */ #define IFF_SIMPLEX 0x800 /* (i) can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* (i) supports multicast */ #define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ #define IFF_NOGROUP 0x800000 /* (n) interface is not part of any groups */ /* * Old names for driver flags so that user space tools can continue to use * the old (portable) names. */ #ifndef _KERNEL #define IFF_RUNNING IFF_DRV_RUNNING #define IFF_OACTIVE IFF_DRV_OACTIVE #endif /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\ IFF_DYING|IFF_CANTCONFIG) /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Some convenience macros used for setting ifi_baudrate. * XXX 1000 vs. 1024? --thorpej@netbsd.org */ #define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ /* * Capabilities that interfaces can advertise. * * struct ifnet.if_capabilities * contains the optional features & capabilities a particular interface * supports (not only the driver but also the detected hw revision). * Capabilities are defined by IFCAP_* below. * struct ifnet.if_capenable * contains the enabled (either by default or through ifconfig) optional * features & capabilities on this interface. * Capabilities are defined by IFCAP_* below. * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above * contains the enabled optional feature & capabilites that can be used * individually per packet and are specified in the mbuf pkthdr.csum_flags * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differenciated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h * * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. * This is not strictly necessary because the common code never * changes capabilities, and it is left to the individual driver * to do the right thing. However, having the filter here * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ #define IFCAP_NETCONS 0x00004 /* can be a network console */ #define IFCAP_VLAN_MTU 0x00008 /* VLAN-compatible MTU */ #define IFCAP_VLAN_HWTAGGING 0x00010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x00020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x00040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x00080 /* can do IFCAP_HWCSUM on VLANs */ #define IFCAP_TSO4 0x00100 /* can do TCP Segmentation Offload */ #define IFCAP_TSO6 0x00200 /* can do TCP6 Segmentation Offload */ #define IFCAP_LRO 0x00400 /* can do Large Receive Offload */ #define IFCAP_WOL_UCAST 0x00800 /* wake on any unicast frame */ #define IFCAP_WOL_MCAST 0x01000 /* wake on any multicast frame */ #define IFCAP_WOL_MAGIC 0x02000 /* wake on any Magic Packet */ #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ /* available 0x20000 */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ #define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */ #define IFCAP_HWRXTSTMP 0x2000000 /* hardware rx timestamping */ #define IFCAP_NOMAP 0x4000000 /* can TX unmapped mbufs */ #define IFCAP_TXTLS4 0x8000000 /* can do TLS encryption and segmentation for TCP */ #define IFCAP_TXTLS6 0x10000000 /* can do TLS encryption and segmentation for TCP6 */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_TXTLS (IFCAP_TXTLS4 | IFCAP_TXTLS6) #define IFCAP_CANTCHANGE (IFCAP_NETMAP) #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket * For the new, extensible interface see struct if_msghdrl below. */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; struct if_data ifm_data;/* statistics and other data about if */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifm_data_off or within ifm_data. Both the if_msghdr and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct if_msghdrl given a pointer to struct if_msghdrl. */ #define IF_MSGHDRL_IFM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifm_data_off) #define IF_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifm_len) struct if_msghdrl { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifm_len; /* length of if_msghdrl incl. if_data */ u_short ifm_data_off; /* offset of if_data from beginning */ int _ifm_spare2; struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket * For the new, extensible interface see struct ifa_msghdrl below. */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; int ifam_metric; /* value of ifa_ifp->if_metric */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifam_metric or within ifam_data. Both the ifa_msghdrl and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct ifa_msghdrl given a pointer to struct ifa_msghdrl. */ #define IFA_MSGHDRL_IFAM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifam_data_off) #define IFA_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifam_len) struct ifa_msghdrl { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifam_len; /* length of ifa_msghdrl incl. if_data */ u_short ifam_data_off; /* offset of if_data from beginning */ int ifam_metric; /* value of ifa_ifp->if_metric */ struct if_data ifam_data;/* statistics and other data about if or * address */ }; /* * Message format for use in obtaining information about multicast addresses * from the routing socket */ struct ifma_msghdr { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatibility */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ u_short _ifmam_spare1; }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ /* * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests */ struct ifreq_buffer { size_t length; void *buffer; }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; caddr_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #ifndef _KERNEL #define ifr_buffer ifr_ifru.ifru_buffer /* user supplied buffer with its length */ #endif #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ #define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ #ifndef _KERNEL #define ifr_data ifr_ifru.ifru_data /* for use by interface */ #endif #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ #define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ #define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; int ifra_vhid; }; /* 9.x compat */ struct oifaliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ int ifm_mask; /* don't care mask */ int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* media words */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with * a newline. The canonical example to copy and paste is in if_tun.c. */ #define IFSTATMAX 800 /* 10 lines of text */ struct ifstat { char ifs_name[IFNAMSIZ]; /* if name, e.g. "en0" */ char ascii[IFSTATMAX + 1]; }; /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { caddr_t ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * interface groups */ #define IFG_ALL "all" /* group contains all interfaces */ /* XXX: will we implement this? */ #define IFG_EGRESS "egress" /* if(s) default route(s) point to */ struct ifg_req { union { char ifgrqu_group[IFNAMSIZ]; char ifgrqu_member[IFNAMSIZ]; } ifgrq_ifgrqu; #define ifgrq_group ifgrq_ifgrqu.ifgrqu_group #define ifgrq_member ifgrq_ifgrqu.ifgrqu_member }; /* * Used to lookup groups for an interface */ struct ifgroupreq { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; struct ifg_req *ifgru_groups; } ifgr_ifgru; #ifndef _KERNEL #define ifgr_group ifgr_ifgru.ifgru_group #define ifgr_groups ifgr_ifgru.ifgru_groups #endif }; /* * Structure used to request i2c data * from interface transceivers. */ struct ifi2creq { uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */ uint8_t offset; /* read offset */ uint8_t len; /* read length */ uint8_t spare0; uint32_t spare1; uint8_t data[8]; /* read buffer */ }; /* * RSS hash. */ #define RSS_FUNC_NONE 0 /* RSS disabled */ #define RSS_FUNC_PRIVATE 1 /* non-standard */ #define RSS_FUNC_TOEPLITZ 2 #define RSS_TYPE_IPV4 0x00000001 #define RSS_TYPE_TCP_IPV4 0x00000002 #define RSS_TYPE_IPV6 0x00000004 #define RSS_TYPE_IPV6_EX 0x00000008 #define RSS_TYPE_TCP_IPV6 0x00000010 #define RSS_TYPE_TCP_IPV6_EX 0x00000020 #define RSS_TYPE_UDP_IPV4 0x00000040 #define RSS_TYPE_UDP_IPV6 0x00000080 #define RSS_TYPE_UDP_IPV6_EX 0x00000100 #define RSS_KEYLEN 128 struct ifrsskey { char ifrk_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrk_func; /* RSS_FUNC_ */ uint8_t ifrk_spare0; uint16_t ifrk_keylen; uint8_t ifrk_key[RSS_KEYLEN]; }; struct ifrsshash { char ifrh_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrh_func; /* RSS_FUNC_ */ uint8_t ifrh_spare0; uint16_t ifrh_spare1; uint32_t ifrh_types; /* RSS_TYPE_ */ }; #define IFNET_PCP_NONE 0xff /* PCP disabled */ #define IFDR_MSG_SIZE 64 #define IFDR_REASON_MSG 1 #define IFDR_REASON_VENDOR 2 struct ifdownreason { char ifdr_name[IFNAMSIZ]; uint32_t ifdr_reason; uint32_t ifdr_vendor; char ifdr_msg[IFDR_MSG_SIZE]; }; #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif #endif #ifndef _KERNEL struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS void if_freenameindex(struct if_nameindex *); char *if_indextoname(unsigned int, char *); struct if_nameindex *if_nameindex(void); unsigned int if_nametoindex(const char *); __END_DECLS #endif #endif /* !_NET_IF_H_ */